Skip to content
Snippets Groups Projects
Unverified Commit 99d2421b authored by Andrei Fajardo's avatar Andrei Fajardo Committed by GitHub
Browse files

Delete txtai.py from chatgpt-plugin-reader package directory (#10601)

parent c6caa133
No related branches found
No related tags found
No related merge requests found
"""txtai reader."""
from typing import Any, Dict, List
import numpy as np
from llama_index.readers.base import BaseReader
from llama_index.schema import Document
class TxtaiReader(BaseReader):
"""txtai reader.
Retrieves documents through an existing in-memory txtai index.
These documents can then be used in a downstream LlamaIndex data structure.
If you wish use txtai itself as an index to to organize documents,
insert documents, and perform queries on them, please use VectorStoreIndex
with TxtaiVectorStore.
Args:
txtai_index (txtai.ann.ANN): A txtai Index object (required)
"""
def __init__(self, index: Any):
"""Initialize with parameters."""
import_err_msg = """
`txtai` package not found. For instructions on
how to install `txtai` please visit
https://neuml.github.io/txtai/install/
"""
try:
import txtai # noqa
except ImportError:
raise ImportError(import_err_msg)
self._index = index
def load_data(
self,
query: np.ndarray,
id_to_text_map: Dict[str, str],
k: int = 4,
separate_documents: bool = True,
) -> List[Document]:
"""Load data from txtai index.
Args:
query (np.ndarray): A 2D numpy array of query vectors.
id_to_text_map (Dict[str, str]): A map from ID's to text.
k (int): Number of nearest neighbors to retrieve. Defaults to 4.
separate_documents (Optional[bool]): Whether to return separate
documents. Defaults to True.
Returns:
List[Document]: A list of documents.
"""
search_result = self._index.search(query, k)
documents = []
for query_result in search_result:
for doc_id, _ in query_result:
doc_id = str(doc_id)
if doc_id not in id_to_text_map:
raise ValueError(
f"Document ID {doc_id} not found in id_to_text_map."
)
text = id_to_text_map[doc_id]
documents.append(Document(text=text))
if not separate_documents:
# join all documents into one
text_list = [doc.get_content() for doc in documents]
text = "\n\n".join(text_list)
documents = [Document(text=text)]
return documents
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment