From 2b74fccadb87701eff91bf4ce315829fc3fd9e62 Mon Sep 17 00:00:00 2001 From: Sourabh Desai <sourabhdesai@gmail.com> Date: Sat, 20 Jan 2024 19:46:12 -0800 Subject: [PATCH] make hash a @property (#10163) * make hash a @property * fix tests by making hash property a required abstractmethod on basenode * update changelog --- CHANGELOG.md | 1 + llama_index/schema.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40fef716dc..6a85d218d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - `beautifulsoup4` dependency to new optional extra `html` (#10156) +- make `BaseNode.hash` an `@property` (#10163) ## [0.9.34] - 2024-01-19 diff --git a/llama_index/schema.py b/llama_index/schema.py index 7647045236..e3382bae13 100644 --- a/llama_index/schema.py +++ b/llama_index/schema.py @@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from dataclasses_json import DataClassJsonMixin from typing_extensions import Self -from llama_index.bridge.pydantic import BaseModel, Field, root_validator +from llama_index.bridge.pydantic import BaseModel, Field from llama_index.utils import SAMPLE_TEXT, truncate_text if TYPE_CHECKING: @@ -223,7 +223,6 @@ class BaseNode(BaseComponent): default_factory=dict, description="A mapping of relationships to other node information.", ) - hash: str = Field(default="", description="Hash of the node content.") @classmethod @abstractmethod @@ -242,6 +241,11 @@ class BaseNode(BaseComponent): def set_content(self, value: Any) -> None: """Set the content of the node.""" + @property + @abstractmethod + def hash(self) -> str: + """Get hash of node.""" + @property def node_id(self) -> str: return self.id_ @@ -382,16 +386,10 @@ class TextNode(BaseNode): def class_name(cls) -> str: return "TextNode" - @root_validator - def _check_hash(cls, values: dict) -> dict: - """Generate a hash to represent the node.""" - text = values.get("text", "") - metadata = values.get("metadata", {}) - doc_identity = str(text) + str(metadata) - values["hash"] = str( - sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest() - ) - return values + @property + def hash(self) -> str: + doc_identity = str(self.text) + str(self.metadata) + return str(sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest()) @classmethod def get_type(cls) -> str: -- GitLab