From 175230f49bf7a5a94b4596ab0b98447b3bc1ff76 Mon Sep 17 00:00:00 2001
From: Logan <logan.markewich@live.com>
Date: Fri, 17 Nov 2023 11:03:08 -0600
Subject: [PATCH] fix hierarchical node parser bugs (#8983)

---
 CHANGELOG.md                                       | 1 +
 llama_index/node_parser/interface.py               | 2 +-
 llama_index/node_parser/relational/hierarchical.py | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 30da66c33f..059971e5b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@
 
 - Fix token counting for new openai client (#8981)
 - Fix small pydantic bug in postgres vector db (#8962)
+- Fixed `chunk_overlap` and `doc_id` bugs in `HierarchicalNodeParser` (#8983)
 
 ## [0.9.2] - 2023-11-16
 
diff --git a/llama_index/node_parser/interface.py b/llama_index/node_parser/interface.py
index 5e6bd53449..8c0c556f3e 100644
--- a/llama_index/node_parser/interface.py
+++ b/llama_index/node_parser/interface.py
@@ -53,7 +53,7 @@ class NodeParser(TransformComponent, ABC):
             show_progress (bool): whether to show progress bar
 
         """
-        doc_id_to_document = {doc.doc_id: doc for doc in documents}
+        doc_id_to_document = {doc.id_: doc for doc in documents}
 
         with self.callback_manager.event(
             CBEventType.NODE_PARSING, payload={EventPayload.DOCUMENTS: documents}
diff --git a/llama_index/node_parser/relational/hierarchical.py b/llama_index/node_parser/relational/hierarchical.py
index a3eef65c36..f3aaaf8ce7 100644
--- a/llama_index/node_parser/relational/hierarchical.py
+++ b/llama_index/node_parser/relational/hierarchical.py
@@ -78,6 +78,7 @@ class HierarchicalNodeParser(NodeParser):
     def from_defaults(
         cls,
         chunk_sizes: Optional[List[int]] = None,
+        chunk_overlap: int = 20,
         node_parser_ids: Optional[List[str]] = None,
         node_parser_map: Optional[Dict[str, NodeParser]] = None,
         include_metadata: bool = True,
@@ -96,6 +97,7 @@ class HierarchicalNodeParser(NodeParser):
                 node_parser_map[node_parser_id] = SentenceSplitter(
                     chunk_size=chunk_size,
                     callback_manager=callback_manager,
+                    chunk_overlap=chunk_overlap,
                 )
         else:
             if chunk_sizes is not None:
-- 
GitLab