diff --git a/CHANGELOG.md b/CHANGELOG.md index 30da66c33f6d18678277880fa0d09e1f9ce86741..059971e5b8169b6bb9453b07496c6765e8bf76e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Fix token counting for new openai client (#8981) - Fix small pydantic bug in postgres vector db (#8962) +- Fixed `chunk_overlap` and `doc_id` bugs in `HierarchicalNodeParser` (#8983) ## [0.9.2] - 2023-11-16 diff --git a/llama_index/node_parser/interface.py b/llama_index/node_parser/interface.py index 5e6bd53449a2863d904f7954238a17380b1aaa92..8c0c556f3e6bc6c995b21ca73a4da7edfadc2479 100644 --- a/llama_index/node_parser/interface.py +++ b/llama_index/node_parser/interface.py @@ -53,7 +53,7 @@ class NodeParser(TransformComponent, ABC): show_progress (bool): whether to show progress bar """ - doc_id_to_document = {doc.doc_id: doc for doc in documents} + doc_id_to_document = {doc.id_: doc for doc in documents} with self.callback_manager.event( CBEventType.NODE_PARSING, payload={EventPayload.DOCUMENTS: documents} diff --git a/llama_index/node_parser/relational/hierarchical.py b/llama_index/node_parser/relational/hierarchical.py index a3eef65c36c480e156ffd5dd96988c604ec3b77b..f3aaaf8ce7954e58ede402d3097235f8ab4a911b 100644 --- a/llama_index/node_parser/relational/hierarchical.py +++ b/llama_index/node_parser/relational/hierarchical.py @@ -78,6 +78,7 @@ class HierarchicalNodeParser(NodeParser): def from_defaults( cls, chunk_sizes: Optional[List[int]] = None, + chunk_overlap: int = 20, node_parser_ids: Optional[List[str]] = None, node_parser_map: Optional[Dict[str, NodeParser]] = None, include_metadata: bool = True, @@ -96,6 +97,7 @@ class HierarchicalNodeParser(NodeParser): node_parser_map[node_parser_id] = SentenceSplitter( chunk_size=chunk_size, callback_manager=callback_manager, + chunk_overlap=chunk_overlap, ) else: if chunk_sizes is not None: