Skip to content
Snippets Groups Projects
Unverified Commit 175230f4 authored by Logan's avatar Logan Committed by GitHub
Browse files

fix hierarchical node parser bugs (#8983)

parent 7eeb750d
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,7 @@
- Fix token counting for new openai client (#8981)
- Fix small pydantic bug in postgres vector db (#8962)
- Fixed `chunk_overlap` and `doc_id` bugs in `HierarchicalNodeParser` (#8983)
## [0.9.2] - 2023-11-16
......
......@@ -53,7 +53,7 @@ class NodeParser(TransformComponent, ABC):
show_progress (bool): whether to show progress bar
"""
doc_id_to_document = {doc.doc_id: doc for doc in documents}
doc_id_to_document = {doc.id_: doc for doc in documents}
with self.callback_manager.event(
CBEventType.NODE_PARSING, payload={EventPayload.DOCUMENTS: documents}
......
......@@ -78,6 +78,7 @@ class HierarchicalNodeParser(NodeParser):
def from_defaults(
cls,
chunk_sizes: Optional[List[int]] = None,
chunk_overlap: int = 20,
node_parser_ids: Optional[List[str]] = None,
node_parser_map: Optional[Dict[str, NodeParser]] = None,
include_metadata: bool = True,
......@@ -96,6 +97,7 @@ class HierarchicalNodeParser(NodeParser):
node_parser_map[node_parser_id] = SentenceSplitter(
chunk_size=chunk_size,
callback_manager=callback_manager,
chunk_overlap=chunk_overlap,
)
else:
if chunk_sizes is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment