Skip to content
Snippets Groups Projects
Unverified Commit 2ebdb236 authored by Haotian Zhang's avatar Haotian Zhang Committed by GitHub
Browse files

improve multi doc retrieval (#11346)

* improve multi doc retrieval

* cr

* cr

* cr

* cr
parent 272685f8
Branches
Tags
No related merge requests found
"""Node parser interface."""
from abc import ABC, abstractmethod
from typing import Any, Callable, List, Sequence
......
......@@ -249,7 +249,11 @@ class BaseElementNodeParser(NodeParser):
doc = Document(text="\n\n".join(list(buffer)))
return node_parser.get_nodes_from_documents([doc])
def get_nodes_from_elements(self, elements: List[Element]) -> List[BaseNode]:
def get_nodes_from_elements(
self,
elements: List[Element],
metadata_inherited: Optional[Dict[str, Any]] = None,
) -> List[BaseNode]:
"""Get nodes and mappings."""
from llama_index.core.node_parser import SentenceSplitter
......@@ -342,5 +346,8 @@ class BaseElementNodeParser(NodeParser):
nodes.extend(cur_text_nodes)
cur_text_el_buffer = []
# remove empty nodes
# remove empty nodes and keep node original metadata inherited from parent nodes
for node in nodes:
if metadata_inherited:
node.metadata.update(metadata_inherited)
return [node for node in nodes if len(node.text) > 0]
......@@ -57,7 +57,7 @@ class MarkdownElementNodeParser(BaseElementNodeParser):
self.extract_table_summaries(table_elements)
# convert into nodes
# will return a list of Nodes and Index Nodes
return self.get_nodes_from_elements(elements)
return self.get_nodes_from_elements(elements, node.metadata)
def extract_elements(
self,
......
......@@ -92,7 +92,7 @@ class UnstructuredElementNodeParser(BaseElementNodeParser):
self.extract_table_summaries(table_elements)
# convert into nodes
# will return a list of Nodes and Index Nodes
return self.get_nodes_from_elements(elements)
return self.get_nodes_from_elements(elements, node.metadata)
def extract_elements(
self, text: str, table_filters: Optional[List[Callable]] = None, **kwargs: Any
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment