From b9f8085ab5b41b52d23524aaa2b5f00bed5ff9b8 Mon Sep 17 00:00:00 2001 From: Logan <logan.markewich@live.com> Date: Tue, 14 May 2024 08:54:32 -0600 Subject: [PATCH] VectorStore -> BasePydanticVectorStore, also get/delete_nodes, clear() (#13439) --- docs/BUILD | 9 - .../examples/low_level/vector_store.ipynb | 21 ++- .../llama_index/core/bridge/langchain.py | 107 ++++++----- .../core/indices/document_summary/base.py | 4 +- .../core/indices/multi_modal/base.py | 10 +- .../core/indices/multi_modal/retriever.py | 6 +- .../core/indices/vector_store/base.py | 6 +- .../core/langchain_helpers/__init__.py | 2 +- .../llama_index/core/llms/loading.py | 4 +- .../llama_index/core/llms/utils.py | 4 +- .../core/node_parser/text/langchain.py | 11 +- .../core/postprocessor/rankGPT_rerank.py | 2 +- .../llama_index/core/program/utils.py | 2 +- .../core/prompts/default_prompt_selectors.py | 2 +- .../core/storage/storage_context.py | 25 ++- .../llama_index/core/vector_stores/simple.py | 93 ++++++++-- .../llama_index/core/vector_stores/types.py | 45 +++++ llama-index-core/pyproject.toml | 45 +---- .../tests/output_parsers/test_base.py | 2 +- llama-index-core/tests/text_splitter/BUILD | 1 + .../tests/text_splitter/test_code_splitter.py | 1 + llama-index-core/tests/tools/test_base.py | 2 +- .../tests/tools/test_ondemand_loader.py | 2 +- .../tests/vector_stores/test_simple.py | 18 ++ .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_azure_openai.py | 19 -- .../tests/test_embeddings_azure_openai.py | 7 - .../llama-index-embeddings-cohere/BUILD | 5 - .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_fireworks_embedding.py | 7 - .../llama-index-embeddings-gemini/tests/BUILD | 2 +- .../llama-index-embeddings-google/tests/BUILD | 2 +- .../llama-index-embeddings-octoai/tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_embeddings_octoai.py | 7 - .../llama-index-embeddings-openai/tests/BUILD | 4 +- .../tests/test_openai_utils.py | 55 ------ .../llama-index-llms-cohere/pyproject.toml | 2 +- .../llms/llama-index-llms-cohere/tests/BUILD | 4 +- .../llms/llama-index-llms-gemini/tests/BUILD | 2 +- .../llms/llama-index-llms-palm/tests/BUILD | 2 +- .../tests/BUILD | 2 +- .../llama-index-readers-guru/pyproject.toml | 3 +- .../llama-index-readers-pebblo/pyproject.toml | 4 +- .../tests/test_readers_pebblo.py | 2 + .../pyproject.toml | 3 +- .../alibabacloud_opensearch/base.py | 16 +- .../pyproject.toml | 2 +- ...t_vector_stores_alibabacloud_opensearch.py | 4 +- .../llama_index/vector_stores/awadb/base.py | 29 ++- .../pyproject.toml | 2 +- .../tests/test_vector_stores_awadb.py | 4 +- .../vector_stores/awsdocdb/base.py | 27 ++- .../pyproject.toml | 2 +- .../llama_index/vector_stores/bagel/base.py | 13 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_bagel.py | 4 +- .../vector_stores/baiduvectordb/base.py | 41 +++-- .../pyproject.toml | 2 +- .../tests/test_vector_stores_baiduvectordb.py | 4 +- .../vector_stores/chatgpt_plugin/base.py | 18 +- .../pyproject.toml | 2 +- .../test_vector_stores_chatgpt_plugin.py | 4 +- .../llama_index/vector_stores/chroma/base.py | 61 +++++- .../tests/test_chromadb.py | 59 +++++- .../vector_stores/dashvector/base.py | 16 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_dashvector.py | 4 +- .../vector_stores/dynamodb/base.py | 16 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_dynamodb.py | 4 +- .../llama_index/vector_stores/epsilla/base.py | 16 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_epsilla.py | 4 +- .../tests/BUILD | 2 +- .../llama_index/vector_stores/jaguar/base.py | 15 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_jaguar.py | 4 +- .../llama_index/vector_stores/metal/base.py | 31 +++- .../pyproject.toml | 2 +- .../tests/test_vector_stores_metal.py | 4 +- .../llama_index/vector_stores/myscale/base.py | 54 ++++-- .../pyproject.toml | 2 +- .../tests/test_vector_stores_myscale.py | 4 +- .../vector_stores/neo4jvector/base.py | 49 +++-- .../pyproject.toml | 2 +- .../tests/test_vector_stores_neo4jvector.py | 4 +- .../llama_index/vector_stores/neptune/base.py | 34 +++- .../pyproject.toml | 2 +- .../tests/test_vector_stores_neptune.py | 13 +- .../llama_index/vector_stores/qdrant/base.py | 174 +++++++++++++++++- .../pyproject.toml | 1 + .../tests/BUILD | 4 + .../tests/conftest.py | 38 ++++ .../tests/test_vector_stores_qdrant.py | 53 ++++++ .../vector_stores/rocksetdb/base.py | 40 ++-- .../pyproject.toml | 2 +- .../tests/test_vector_stores_rocksetdb.py | 4 +- .../vector_stores/singlestoredb/base.py | 43 +++-- .../pyproject.toml | 2 +- .../tests/test_vector_stores_singlestoredb.py | 4 +- .../llama_index/vector_stores/tair/base.py | 23 ++- .../pyproject.toml | 2 +- .../tests/test_vector_stores_tair.py | 4 +- .../vector_stores/tencentvectordb/base.py | 57 +++--- .../pyproject.toml | 2 +- .../test_vector_stores_tencentvectordb.py | 4 +- .../vector_stores/timescalevector/base.py | 29 ++- .../pyproject.toml | 2 +- .../vector_stores/typesense/base.py | 20 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_typesense.py | 4 +- .../llama_index/vector_stores/upstash/base.py | 19 +- .../pyproject.toml | 2 +- .../tests/test_vector_stores_upstash.py | 4 +- .../llama_index/vector_stores/vearch/base.py | 66 ++++--- .../pyproject.toml | 2 +- .../tests/test_vector_stores_vearch.py | 4 +- .../llama_index/vector_stores/vespa/base.py | 75 +++++--- .../pyproject.toml | 2 +- .../llama_index/vector_stores/zep/base.py | 25 ++- .../pyproject.toml | 2 +- .../tests/test_vector_stores_zep.py | 4 +- .../tests/test_query_engine.py | 8 - .../tests/BUILD | 4 +- .../llama_index/packs/raptor/base.py | 6 +- 128 files changed, 1266 insertions(+), 616 deletions(-) delete mode 100644 docs/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/__init__.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_azure_openai.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_embeddings_azure_openai.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/__init__.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/test_fireworks_embedding.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/__init__.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/test_embeddings_octoai.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/test_openai_utils.py create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/conftest.py diff --git a/docs/BUILD b/docs/BUILD deleted file mode 100644 index 589a38260..000000000 --- a/docs/BUILD +++ /dev/null @@ -1,9 +0,0 @@ -python_sources() - -poetry_requirements( - name="poetry", -) - -python_requirements( - name="reqs", -) diff --git a/docs/docs/examples/low_level/vector_store.ipynb b/docs/docs/examples/low_level/vector_store.ipynb index 90ea8a8e9..6750e7e2b 100644 --- a/docs/docs/examples/low_level/vector_store.ipynb +++ b/docs/docs/examples/low_level/vector_store.ipynb @@ -166,7 +166,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_index.core.vector_stores.types import VectorStore\n", + "from llama_index.core.vector_stores.types import BasePydanticVectorStore\n", "from llama_index.core.vector_stores import (\n", " VectorStoreQuery,\n", " VectorStoreQueryResult,\n", @@ -176,7 +176,7 @@ "import os\n", "\n", "\n", - "class BaseVectorStore(VectorStore):\n", + "class BaseVectorStore(BasePydanticVectorStore):\n", " \"\"\"Simple custom Vector Store.\n", "\n", " Stores documents in a simple in-memory dict.\n", @@ -311,14 +311,14 @@ "metadata": {}, "outputs": [], "source": [ + "from llama_index.core.bridge.pydantic import Field\n", + "\n", + "\n", "class VectorStore2(BaseVectorStore):\n", " \"\"\"VectorStore2 (add/get/delete implemented).\"\"\"\n", "\n", " stores_text: bool = True\n", - "\n", - " def __init__(self) -> None:\n", - " \"\"\"Init params.\"\"\"\n", - " self.node_dict: Dict[str, BaseNode] = {}\n", + " node_dict: Dict[str, BaseNode] = Field(default_factory=dict)\n", "\n", " def get(self, text_id: str) -> List[float]:\n", " \"\"\"Get embedding.\"\"\"\n", @@ -469,6 +469,9 @@ "metadata": {}, "outputs": [], "source": [ + "from typing import cast\n", + "\n", + "\n", "class VectorStore3A(VectorStore2):\n", " \"\"\"Implements semantic/dense search.\"\"\"\n", "\n", @@ -485,7 +488,7 @@ "\n", " similarities, node_ids = get_top_k_embeddings(\n", " query_embedding,\n", - " embeddings,\n", + " doc_embeddings,\n", " doc_ids,\n", " similarity_top_k=query.similarity_top_k,\n", " )\n", @@ -891,9 +894,9 @@ ], "metadata": { "kernelspec": { - "display_name": "llama_index_v2", + "display_name": "venv", "language": "python", - "name": "llama_index_v2" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/llama-index-core/llama_index/core/bridge/langchain.py b/llama-index-core/llama_index/core/bridge/langchain.py index ac888d0ec..645ce94af 100644 --- a/llama-index-core/llama_index/core/bridge/langchain.py +++ b/llama-index-core/llama_index/core/bridge/langchain.py @@ -1,60 +1,85 @@ -import langchain -from langchain.agents import AgentExecutor, AgentType, initialize_agent +import langchain # pants: no-infer-dep +from langchain.agents import ( + AgentExecutor, + AgentType, + initialize_agent, +) # pants: no-infer-dep # agents and tools -from langchain.agents.agent_toolkits.base import BaseToolkit -from langchain.base_language import BaseLanguageModel +from langchain.agents.agent_toolkits.base import BaseToolkit # pants: no-infer-dep +from langchain.base_language import BaseLanguageModel # pants: no-infer-dep # callback -from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager -from langchain.chains.prompt_selector import ConditionalPromptSelector, is_chat_model -from langchain.chat_models.base import BaseChatModel -from langchain.docstore.document import Document -from langchain.memory import ChatMessageHistory, ConversationBufferMemory +from langchain.callbacks.base import ( + BaseCallbackHandler, + BaseCallbackManager, +) # pants: no-infer-dep +from langchain.chains.prompt_selector import ( + ConditionalPromptSelector, + is_chat_model, +) # pants: no-infer-dep +from langchain.chat_models.base import BaseChatModel # pants: no-infer-dep +from langchain.docstore.document import Document # pants: no-infer-dep +from langchain.memory import ( + ChatMessageHistory, + ConversationBufferMemory, +) # pants: no-infer-dep # chat and memory -from langchain.memory.chat_memory import BaseChatMemory -from langchain.output_parsers import ResponseSchema +from langchain.memory.chat_memory import BaseChatMemory # pants: no-infer-dep +from langchain.output_parsers import ResponseSchema # pants: no-infer-dep # prompts -from langchain.prompts import PromptTemplate -from langchain.prompts.chat import ( - AIMessagePromptTemplate, - BaseMessagePromptTemplate, - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -) +from langchain.prompts import PromptTemplate # pants: no-infer-dep +from langchain.prompts.chat import ( # pants: no-infer-dep + AIMessagePromptTemplate, # pants: no-infer-dep + BaseMessagePromptTemplate, # pants: no-infer-dep + ChatPromptTemplate, # pants: no-infer-dep + HumanMessagePromptTemplate, # pants: no-infer-dep + SystemMessagePromptTemplate, # pants: no-infer-dep +) # pants: no-infer-dep # schema -from langchain.schema import ( - AIMessage, - BaseMemory, - BaseMessage, - BaseOutputParser, - ChatGeneration, - ChatMessage, - FunctionMessage, - HumanMessage, - LLMResult, - SystemMessage, -) +from langchain.schema import ( # pants: no-infer-dep + AIMessage, # pants: no-infer-dep + BaseMemory, # pants: no-infer-dep + BaseMessage, # pants: no-infer-dep + BaseOutputParser, # pants: no-infer-dep + ChatGeneration, # pants: no-infer-dep + ChatMessage, # pants: no-infer-dep + FunctionMessage, # pants: no-infer-dep + HumanMessage, # pants: no-infer-dep + LLMResult, # pants: no-infer-dep + SystemMessage, # pants: no-infer-dep +) # pants: no-infer-dep # embeddings -from langchain.schema.embeddings import Embeddings -from langchain.schema.prompt_template import BasePromptTemplate +from langchain.schema.embeddings import Embeddings # pants: no-infer-dep +from langchain.schema.prompt_template import BasePromptTemplate # pants: no-infer-dep # input & output -from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter -from langchain.tools import BaseTool, StructuredTool, Tool -from langchain_community.chat_models import ChatAnyscale, ChatOpenAI -from langchain_community.embeddings import ( - HuggingFaceBgeEmbeddings, - HuggingFaceEmbeddings, -) +from langchain.text_splitter import ( + RecursiveCharacterTextSplitter, + TextSplitter, +) # pants: no-infer-dep +from langchain.tools import BaseTool, StructuredTool, Tool # pants: no-infer-dep +from langchain_community.chat_models import ( + ChatAnyscale, + ChatOpenAI, +) # pants: no-infer-dep +from langchain_community.embeddings import ( # pants: no-infer-dep + HuggingFaceBgeEmbeddings, # pants: no-infer-dep + HuggingFaceEmbeddings, # pants: no-infer-dep +) # pants: no-infer-dep # LLMs -from langchain_community.llms import AI21, BaseLLM, Cohere, FakeListLLM, OpenAI +from langchain_community.llms import ( + AI21, + BaseLLM, + Cohere, + FakeListLLM, + OpenAI, +) # pants: no-infer-dep __all__ = [ "langchain", diff --git a/llama-index-core/llama_index/core/indices/document_summary/base.py b/llama-index-core/llama_index/core/indices/document_summary/base.py index 427192cf6..09ce35d42 100644 --- a/llama-index-core/llama_index/core/indices/document_summary/base.py +++ b/llama-index-core/llama_index/core/indices/document_summary/base.py @@ -40,7 +40,7 @@ from llama_index.core.settings import ( from llama_index.core.storage.docstore.types import RefDocInfo from llama_index.core.storage.storage_context import StorageContext from llama_index.core.utils import get_tqdm_iterable -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore logger = logging.getLogger(__name__) @@ -115,7 +115,7 @@ class DocumentSummaryIndex(BaseIndex[IndexDocumentSummary]): ) @property - def vector_store(self) -> VectorStore: + def vector_store(self) -> BasePydanticVectorStore: return self._vector_store def as_retriever( diff --git a/llama-index-core/llama_index/core/indices/multi_modal/base.py b/llama-index-core/llama_index/core/indices/multi_modal/base.py index 0f2669c49..e9be18585 100644 --- a/llama-index-core/llama_index/core/indices/multi_modal/base.py +++ b/llama-index-core/llama_index/core/indices/multi_modal/base.py @@ -33,7 +33,7 @@ from llama_index.core.vector_stores.simple import ( DEFAULT_VECTOR_STORE, SimpleVectorStore, ) -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore logger = logging.getLogger(__name__) @@ -63,7 +63,7 @@ class MultiModalVectorStoreIndex(VectorStoreIndex): # Image-related kwargs # image_vector_store going to be deprecated. image_store can be passed from storage_context # keep image_vector_store here for backward compatibility - image_vector_store: Optional[VectorStore] = None, + image_vector_store: Optional[BasePydanticVectorStore] = None, image_embed_model: EmbedType = "clip:ViT-B/32", is_image_to_text: bool = False, # is_image_vector_store_empty is used to indicate whether image_vector_store is empty @@ -112,7 +112,7 @@ class MultiModalVectorStoreIndex(VectorStoreIndex): ) @property - def image_vector_store(self) -> VectorStore: + def image_vector_store(self) -> BasePydanticVectorStore: return self._image_vector_store @property @@ -164,12 +164,12 @@ class MultiModalVectorStoreIndex(VectorStoreIndex): @classmethod def from_vector_store( cls, - vector_store: VectorStore, + vector_store: BasePydanticVectorStore, embed_model: Optional[EmbedType] = None, # deprecated service_context: Optional[ServiceContext] = None, # Image-related kwargs - image_vector_store: Optional[VectorStore] = None, + image_vector_store: Optional[BasePydanticVectorStore] = None, image_embed_model: EmbedType = "clip", **kwargs: Any, ) -> "VectorStoreIndex": diff --git a/llama-index-core/llama_index/core/indices/multi_modal/retriever.py b/llama-index-core/llama_index/core/indices/multi_modal/retriever.py index a5bca739b..470d924e2 100644 --- a/llama-index-core/llama_index/core/indices/multi_modal/retriever.py +++ b/llama-index-core/llama_index/core/indices/multi_modal/retriever.py @@ -25,7 +25,7 @@ from llama_index.core.settings import ( ) from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -225,7 +225,7 @@ class MultiModalVectorIndexRetriever(MultiModalRetriever): self, query_bundle_with_embeddings: QueryBundle, similarity_top_k: int, - vector_store: VectorStore, + vector_store: BasePydanticVectorStore, ) -> List[NodeWithScore]: query = self._build_vector_store_query( query_bundle_with_embeddings, similarity_top_k @@ -347,7 +347,7 @@ class MultiModalVectorIndexRetriever(MultiModalRetriever): self, query_bundle_with_embeddings: QueryBundle, similarity_top_k: int, - vector_store: VectorStore, + vector_store: BasePydanticVectorStore, ) -> List[NodeWithScore]: query = self._build_vector_store_query( query_bundle_with_embeddings, similarity_top_k diff --git a/llama-index-core/llama_index/core/indices/vector_store/base.py b/llama-index-core/llama_index/core/indices/vector_store/base.py index d4a30ff7c..0d69857c6 100644 --- a/llama-index-core/llama_index/core/indices/vector_store/base.py +++ b/llama-index-core/llama_index/core/indices/vector_store/base.py @@ -26,7 +26,7 @@ from llama_index.core.settings import Settings, embed_model_from_settings_or_con from llama_index.core.storage.docstore.types import RefDocInfo from llama_index.core.storage.storage_context import StorageContext from llama_index.core.utils import iter_batch -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore logger = logging.getLogger(__name__) @@ -87,7 +87,7 @@ class VectorStoreIndex(BaseIndex[IndexDict]): @classmethod def from_vector_store( cls, - vector_store: VectorStore, + vector_store: BasePydanticVectorStore, embed_model: Optional[EmbedType] = None, # deprecated service_context: Optional[ServiceContext] = None, @@ -110,7 +110,7 @@ class VectorStoreIndex(BaseIndex[IndexDict]): ) @property - def vector_store(self) -> VectorStore: + def vector_store(self) -> BasePydanticVectorStore: return self._vector_store def as_retriever(self, **kwargs: Any) -> BaseRetriever: diff --git a/llama-index-core/llama_index/core/langchain_helpers/__init__.py b/llama-index-core/llama_index/core/langchain_helpers/__init__.py index 8b8e08068..795a41f5a 100644 --- a/llama-index-core/llama_index/core/langchain_helpers/__init__.py +++ b/llama-index-core/llama_index/core/langchain_helpers/__init__.py @@ -1,7 +1,7 @@ """Init file for langchain helpers.""" try: - import langchain # noqa + import langchain # noqa # pants: no-infer-dep except ImportError: raise ImportError( "langchain not installed. " diff --git a/llama-index-core/llama_index/core/llms/loading.py b/llama-index-core/llama_index/core/llms/loading.py index 7271115f1..005a69c89 100644 --- a/llama-index-core/llama_index/core/llms/loading.py +++ b/llama-index-core/llama_index/core/llms/loading.py @@ -13,14 +13,14 @@ RECOGNIZED_LLMS: Dict[str, Type[LLM]] = { try: from llama_index.llms.openai import OpenAI # pants: no-infer-dep - RECOGNIZED_LLMS[OpenAI.class_name()] = OpenAI + RECOGNIZED_LLMS[OpenAI.class_name()] = OpenAI # pants: no-infer-dep except ImportError: pass try: from llama_index.llms.azure_openai import AzureOpenAI # pants: no-infer-dep - RECOGNIZED_LLMS[AzureOpenAI.class_name()] = AzureOpenAI + RECOGNIZED_LLMS[AzureOpenAI.class_name()] = AzureOpenAI # pants: no-infer-dep except ImportError: pass diff --git a/llama-index-core/llama_index/core/llms/utils.py b/llama-index-core/llama_index/core/llms/utils.py index 1c87dd38c..ae0bab6c4 100644 --- a/llama-index-core/llama_index/core/llms/utils.py +++ b/llama-index-core/llama_index/core/llms/utils.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: - from langchain.base_language import BaseLanguageModel + from langchain.base_language import BaseLanguageModel # pants: no-infer-dep import os @@ -19,7 +19,7 @@ def resolve_llm( from llama_index.core.settings import Settings try: - from langchain.base_language import BaseLanguageModel + from langchain.base_language import BaseLanguageModel # pants: no-infer-dep except ImportError: BaseLanguageModel = None # type: ignore diff --git a/llama-index-core/llama_index/core/node_parser/text/langchain.py b/llama-index-core/llama_index/core/node_parser/text/langchain.py index 5a02799d5..79dd4951a 100644 --- a/llama-index-core/llama_index/core/node_parser/text/langchain.py +++ b/llama-index-core/llama_index/core/node_parser/text/langchain.py @@ -7,7 +7,9 @@ from llama_index.core.node_parser.node_utils import default_id_func from llama_index.core.schema import Document if TYPE_CHECKING: - from langchain.text_splitter import TextSplitter as LC_TextSplitter + from langchain.text_splitter import ( + TextSplitter as LC_TextSplitter, + ) # pants: no-infer-dep class LangchainNodeParser(TextSplitter): @@ -28,13 +30,6 @@ class LangchainNodeParser(TextSplitter): id_func: Optional[Callable[[int, Document], str]] = None, ): """Initialize with parameters.""" - try: - from langchain.text_splitter import TextSplitter as LC_TextSplitter # noqa - except ImportError: - raise ImportError( - "Could not run `from langchain.text_splitter import TextSplitter`, " - "please run `pip install langchain`" - ) id_func = id_func or default_id_func super().__init__( diff --git a/llama-index-core/llama_index/core/postprocessor/rankGPT_rerank.py b/llama-index-core/llama_index/core/postprocessor/rankGPT_rerank.py index 67f12dd7d..4295c460a 100644 --- a/llama-index-core/llama_index/core/postprocessor/rankGPT_rerank.py +++ b/llama-index-core/llama_index/core/postprocessor/rankGPT_rerank.py @@ -15,7 +15,7 @@ logger.setLevel(logging.WARNING) def get_default_llm() -> LLM: - from llama_index.llms.openai import OpenAI + from llama_index.llms.openai import OpenAI # pants: no-infer-dep return OpenAI(model="gpt-3.5-turbo-16k") diff --git a/llama-index-core/llama_index/core/program/utils.py b/llama-index-core/llama_index/core/program/utils.py index 37679216a..18735e280 100644 --- a/llama-index-core/llama_index/core/program/utils.py +++ b/llama-index-core/llama_index/core/program/utils.py @@ -44,7 +44,7 @@ def get_program_for_llm( # we fall back to the LLM program try: - from llama_index.llms.openai import OpenAI + from llama_index.llms.openai import OpenAI # pants: no-infer-dep if isinstance(llm, OpenAI): from llama_index.program.openai import ( diff --git a/llama-index-core/llama_index/core/prompts/default_prompt_selectors.py b/llama-index-core/llama_index/core/prompts/default_prompt_selectors.py index 23a9d1191..957ba6c43 100644 --- a/llama-index-core/llama_index/core/prompts/default_prompt_selectors.py +++ b/llama-index-core/llama_index/core/prompts/default_prompt_selectors.py @@ -21,7 +21,7 @@ try: COHERE_REFINE_TEMPLATE, COHERE_TREE_SUMMARIZE_TEMPLATE, COHERE_REFINE_TABLE_CONTEXT_PROMPT, - ) + ) # pants: no-infer-dep except ImportError: COHERE_QA_TEMPLATE = None COHERE_REFINE_TEMPLATE = None diff --git a/llama-index-core/llama_index/core/storage/storage_context.py b/llama-index-core/llama_index/core/storage/storage_context.py index 5dd78801b..b24b9f647 100644 --- a/llama-index-core/llama_index/core/storage/storage_context.py +++ b/llama-index-core/llama_index/core/storage/storage_context.py @@ -38,7 +38,6 @@ from llama_index.core.vector_stores.simple import ( ) from llama_index.core.vector_stores.types import ( BasePydanticVectorStore, - VectorStore, ) DEFAULT_PERSIST_DIR = "./storage" @@ -54,14 +53,14 @@ class StorageContext: indices, and vectors. It contains the following: - docstore: BaseDocumentStore - index_store: BaseIndexStore - - vector_store: VectorStore + - vector_store: BasePydanticVectorStore - graph_store: GraphStore """ docstore: BaseDocumentStore index_store: BaseIndexStore - vector_stores: Dict[str, VectorStore] + vector_stores: Dict[str, BasePydanticVectorStore] graph_store: GraphStore @classmethod @@ -69,11 +68,9 @@ class StorageContext: cls, docstore: Optional[BaseDocumentStore] = None, index_store: Optional[BaseIndexStore] = None, - vector_store: Optional[Union[VectorStore, BasePydanticVectorStore]] = None, - image_store: Optional[VectorStore] = None, - vector_stores: Optional[ - Dict[str, Union[VectorStore, BasePydanticVectorStore]] - ] = None, + vector_store: Optional[BasePydanticVectorStore] = None, + image_store: Optional[BasePydanticVectorStore] = None, + vector_stores: Optional[Dict[str, BasePydanticVectorStore]] = None, graph_store: Optional[GraphStore] = None, persist_dir: Optional[str] = None, fs: Optional[fsspec.AbstractFileSystem] = None, @@ -83,9 +80,9 @@ class StorageContext: Args: docstore (Optional[BaseDocumentStore]): document store index_store (Optional[BaseIndexStore]): index store - vector_store (Optional[VectorStore]): vector store + vector_store (Optional[BasePydanticVectorStore]): vector store graph_store (Optional[GraphStore]): graph store - image_store (Optional[VectorStore]): image store + image_store (Optional[BasePydanticVectorStore]): image store """ if persist_dir is None: @@ -214,7 +211,7 @@ class StorageContext: index_store = SimpleIndexStore.from_dict(save_dict[INDEX_STORE_KEY]) graph_store = SimpleGraphStore.from_dict(save_dict[GRAPH_STORE_KEY]) - vector_stores: Dict[str, VectorStore] = {} + vector_stores: Dict[str, BasePydanticVectorStore] = {} for key, vector_store_dict in save_dict[VECTOR_STORE_KEY].items(): vector_stores[key] = SimpleVectorStore.from_dict(vector_store_dict) @@ -226,10 +223,12 @@ class StorageContext: ) @property - def vector_store(self) -> VectorStore: + def vector_store(self) -> BasePydanticVectorStore: """Backwrds compatibility for vector_store property.""" return self.vector_stores[DEFAULT_VECTOR_STORE] - def add_vector_store(self, vector_store: VectorStore, namespace: str) -> None: + def add_vector_store( + self, vector_store: BasePydanticVectorStore, namespace: str + ) -> None: """Add a vector store to the storage context.""" self.vector_stores[namespace] = vector_store diff --git a/llama-index-core/llama_index/core/vector_stores/simple.py b/llama-index-core/llama_index/core/vector_stores/simple.py index bd4d564ef..9b7272447 100644 --- a/llama-index-core/llama_index/core/vector_stores/simple.py +++ b/llama-index-core/llama_index/core/vector_stores/simple.py @@ -8,6 +8,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, cast import fsspec from dataclasses_json import DataClassJsonMixin +from llama_index.core.bridge.pydantic import Field, PrivateAttr from llama_index.core.indices.query.embedding_utils import ( get_top_k_embeddings, get_top_k_embeddings_learner, @@ -18,9 +19,9 @@ from llama_index.core.utils import concat_dirs from llama_index.core.vector_stores.types import ( DEFAULT_PERSIST_DIR, DEFAULT_PERSIST_FNAME, + BasePydanticVectorStore, MetadataFilters, FilterCondition, - VectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -95,7 +96,7 @@ class SimpleVectorStoreData(DataClassJsonMixin): metadata_dict: Dict[str, Any] = field(default_factory=dict) -class SimpleVectorStore(VectorStore): +class SimpleVectorStore(BasePydanticVectorStore): """Simple Vector Store. In this vector store, embeddings are stored within a simple, in-memory dictionary. @@ -108,6 +109,9 @@ class SimpleVectorStore(VectorStore): stores_text: bool = False + data: SimpleVectorStoreData = Field(default_factory=SimpleVectorStoreData) + _fs: fsspec.AbstractFileSystem = PrivateAttr() + def __init__( self, data: Optional[SimpleVectorStoreData] = None, @@ -115,7 +119,7 @@ class SimpleVectorStore(VectorStore): **kwargs: Any, ) -> None: """Initialize params.""" - self._data = data or SimpleVectorStoreData() + super().__init__(data=data or SimpleVectorStoreData()) self._fs = fs or fsspec.filesystem("file") @classmethod @@ -142,11 +146,11 @@ class SimpleVectorStore(VectorStore): cls, persist_dir: str = DEFAULT_PERSIST_DIR, fs: Optional[fsspec.AbstractFileSystem] = None, - ) -> Dict[str, VectorStore]: + ) -> Dict[str, BasePydanticVectorStore]: """Load from namespaced persist dir.""" listing_fn = os.listdir if fs is None else fs.listdir - vector_stores: Dict[str, VectorStore] = {} + vector_stores: Dict[str, BasePydanticVectorStore] = {} try: for fname in listing_fn(persist_dir): @@ -176,14 +180,32 @@ class SimpleVectorStore(VectorStore): return vector_stores + @classmethod + def class_name(cls) -> str: + """Class name.""" + return "SimpleVectorStore" + @property def client(self) -> None: """Get client.""" return + @property + def _data(self) -> SimpleVectorStoreData: + """Backwards compatibility.""" + return self.data + def get(self, text_id: str) -> List[float]: """Get embedding.""" - return self._data.embedding_dict[text_id] + return self.data.embedding_dict[text_id] + + def get_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """Get nodes.""" + raise NotImplementedError("SimpleVectorStore does not store nodes directly.") def add( self, @@ -192,14 +214,14 @@ class SimpleVectorStore(VectorStore): ) -> List[str]: """Add nodes to index.""" for node in nodes: - self._data.embedding_dict[node.node_id] = node.get_embedding() - self._data.text_id_to_ref_doc_id[node.node_id] = node.ref_doc_id or "None" + self.data.embedding_dict[node.node_id] = node.get_embedding() + self.data.text_id_to_ref_doc_id[node.node_id] = node.ref_doc_id or "None" metadata = node_to_metadata_dict( node, remove_text=True, flat_metadata=False ) metadata.pop("_node_content", None) - self._data.metadata_dict[node.node_id] = metadata + self.data.metadata_dict[node.node_id] = metadata return [node.node_id for node in nodes] def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: @@ -211,18 +233,49 @@ class SimpleVectorStore(VectorStore): """ text_ids_to_delete = set() - for text_id, ref_doc_id_ in self._data.text_id_to_ref_doc_id.items(): + for text_id, ref_doc_id_ in self.data.text_id_to_ref_doc_id.items(): if ref_doc_id == ref_doc_id_: text_ids_to_delete.add(text_id) for text_id in text_ids_to_delete: - del self._data.embedding_dict[text_id] - del self._data.text_id_to_ref_doc_id[text_id] + del self.data.embedding_dict[text_id] + del self.data.text_id_to_ref_doc_id[text_id] # Handle metadata_dict not being present in stores that were persisted # without metadata, or, not being present for nodes stored # prior to metadata functionality. - if self._data.metadata_dict is not None: - self._data.metadata_dict.pop(text_id, None) + if self.data.metadata_dict is not None: + self.data.metadata_dict.pop(text_id, None) + + def delete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + filter_fn = _build_metadata_filter_fn( + lambda node_id: self.data.metadata_dict[node_id], filters + ) + + if node_ids is not None: + node_id_set = set(node_ids) + + def node_filter_fn(node_id: str) -> bool: + return node_id in node_id_set and filter_fn(node_id) + + else: + + def node_filter_fn(node_id: str) -> bool: + return filter_fn(node_id) + + for node_id in list(self.data.embedding_dict.keys()): + if node_filter_fn(node_id): + del self.data.embedding_dict[node_id] + del self.data.text_id_to_ref_doc_id[node_id] + self.data.metadata_dict.pop(node_id, None) + + def clear(self) -> None: + """Clear the store.""" + self.data = SimpleVectorStoreData() def query( self, @@ -233,8 +286,8 @@ class SimpleVectorStore(VectorStore): # Prevent metadata filtering on stores that were persisted without metadata. if ( query.filters is not None - and self._data.embedding_dict - and not self._data.metadata_dict + and self.data.embedding_dict + and not self.data.metadata_dict ): raise ValueError( "Cannot filter stores that were persisted without metadata. " @@ -242,7 +295,7 @@ class SimpleVectorStore(VectorStore): ) # Prefilter nodes based on the query filter and node ID restrictions. query_filter_fn = _build_metadata_filter_fn( - lambda node_id: self._data.metadata_dict[node_id], query.filters + lambda node_id: self.data.metadata_dict[node_id], query.filters ) if query.node_ids is not None: @@ -259,7 +312,7 @@ class SimpleVectorStore(VectorStore): node_ids = [] embeddings = [] # TODO: consolidate with get_query_text_embedding_similarities - for node_id, embedding in self._data.embedding_dict.items(): + for node_id, embedding in self.data.embedding_dict.items(): if node_filter_fn(node_id) and query_filter_fn(node_id): node_ids.append(node_id) embeddings.append(embedding) @@ -306,7 +359,7 @@ class SimpleVectorStore(VectorStore): fs.makedirs(dirpath) with fs.open(persist_path, "w") as f: - json.dump(self._data.to_dict(), f) + json.dump(self.data.to_dict(), f) @classmethod def from_persist_path( @@ -331,4 +384,4 @@ class SimpleVectorStore(VectorStore): return cls(data) def to_dict(self) -> dict: - return self._data.to_dict() + return self.data.to_dict() diff --git a/llama-index-core/llama_index/core/vector_stores/types.py b/llama-index-core/llama_index/core/vector_stores/types.py index 3a4952c07..9345b5046 100644 --- a/llama-index-core/llama_index/core/vector_stores/types.py +++ b/llama-index-core/llama_index/core/vector_stores/types.py @@ -316,11 +316,30 @@ class BasePydanticVectorStore(BaseComponent, ABC): stores_text: bool is_embedding_query: bool = True + class Config: + arbitrary_types_allowed = True + @property @abstractmethod def client(self) -> Any: """Get client.""" + def get_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """Get nodes from vector store.""" + raise NotImplementedError("get_nodes not implemented") + + async def aget_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """Asynchronously get nodes from vector store.""" + return self.get_nodes(node_ids, filters) + @abstractmethod def add( self, @@ -353,6 +372,32 @@ class BasePydanticVectorStore(BaseComponent, ABC): """ self.delete(ref_doc_id, **delete_kwargs) + def delete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + """Delete nodes from vector store.""" + raise NotImplementedError("delete_nodes not implemented") + + async def adelete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + """Asynchronously delete nodes from vector store.""" + self.delete_nodes(node_ids, filters) + + def clear(self) -> None: + """Clear all nodes from configured vector store.""" + raise NotImplementedError("clear not implemented") + + async def aclear(self) -> None: + """Asynchronously clear all nodes from configured vector store.""" + self.clear() + @abstractmethod def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: """Query vector store.""" diff --git a/llama-index-core/pyproject.toml b/llama-index-core/pyproject.toml index cd2fdd700..d0aa3df51 100644 --- a/llama-index-core/pyproject.toml +++ b/llama-index-core/pyproject.toml @@ -47,12 +47,10 @@ version = "0.10.36" [tool.poetry.dependencies] SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"} -beautifulsoup4 = {optional = true, version = "^4.12.2"} dataclasses-json = "*" deprecated = ">=1.2.9.3" fsspec = ">=2023.5.0" httpx = "*" -langchain = {optional = true, version = ">=0.0.303"} nest-asyncio = "^1.5.8" nltk = "^3.8.1" numpy = "*" @@ -64,21 +62,10 @@ tiktoken = ">=0.3.3" typing-extensions = ">=4.5.0" typing-inspect = ">=0.8.0" requests = ">=2.31.0" # Pin to avoid CVE-2023-32681 in requests 2.3 to 2.30 -gradientai = {optional = true, version = ">=1.4.0"} -asyncpg = {optional = true, version = "^0.29.0"} -pgvector = {optional = true, version = "^0.2.4"} -optimum = {extras = ["onnxruntime"], optional = true, version = "^1.13.2"} -sentencepiece = {optional = true, version = "^0.1.99"} -transformers = {extras = ["torch"], optional = true, version = "^4.33.1"} -guidance = {optional = true, version = "^0.0.64"} -lm-format-enforcer = {optional = true, version = "^0.4.3"} jsonpath-ng = {optional = true, version = "^1.6.0"} -rank-bm25 = {optional = true, version = "^0.2.2"} -scikit-learn = {optional = true, version = "*"} spacy = {optional = true, version = "^3.7.1"} aiohttp = "^3.8.6" networkx = ">=3.0" -psycopg2-binary = {optional = true, version = "^2.9.9"} dirtyjson = "^1.0.8" tqdm = "^4.66.1" pillow = ">=9.0.0" @@ -86,49 +73,18 @@ PyYAML = ">=6.0.1" llamaindex-py-client = "^0.1.18" wrapt = "*" -[tool.poetry.extras] -gradientai = [ - "gradientai", -] -html = [ - "beautifulsoup4", -] -langchain = [ - "langchain", -] -local_models = [ - "optimum", - "sentencepiece", - "transformers", -] -postgres = [ - "asyncpg", - "pgvector", - "psycopg2-binary", -] -query_tools = [ - "guidance", - "jsonpath-ng", - "lm-format-enforcer", - "rank-bm25", - "scikit-learn", - "spacy", -] - [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = ">=23.7.0,<=24.3.0"} boto3 = "1.33.6" # needed for tests botocore = ">=1.33.13" codespell = {extras = ["toml"], version = ">=v2.2.6"} docker = "^7.0.0" -google-ai-generativelanguage = {python = ">=3.9,<3.12", version = "^0.4.0"} ipython = "8.10.0" jupyter = "^1.0.0" motor = "^3.3.2" mypy = "0.991" pre-commit = "3.2.0" pylint = "2.15.10" -pymongo = "^4.5.0" # needed for tests pypdf = "*" pytest = "7.2.1" pytest-asyncio = "0.21.0" @@ -136,6 +92,7 @@ pytest-dotenv = "0.5.2" pytest-mock = "3.11.1" rake-nltk = "1.0.6" ruff = "0.0.292" +tree-sitter = "0.20.0" # 0.22 seems to break for now tree-sitter-languages = "1.9.1" types-Deprecated = ">=0.1.0" types-PyYAML = "^6.0.12.12" diff --git a/llama-index-core/tests/output_parsers/test_base.py b/llama-index-core/tests/output_parsers/test_base.py index 6fb0defd2..23d96ac7f 100644 --- a/llama-index-core/tests/output_parsers/test_base.py +++ b/llama-index-core/tests/output_parsers/test_base.py @@ -5,7 +5,7 @@ import pytest from llama_index.core.output_parsers.langchain import LangchainOutputParser try: - import langchain + import langchain # pants: no-infer-dep from llama_index.core.bridge.langchain import ( BaseOutputParser as LCOutputParser, ) diff --git a/llama-index-core/tests/text_splitter/BUILD b/llama-index-core/tests/text_splitter/BUILD index d20ffc39f..bd8c21321 100644 --- a/llama-index-core/tests/text_splitter/BUILD +++ b/llama-index-core/tests/text_splitter/BUILD @@ -4,4 +4,5 @@ python_test_utils( python_tests( name="tests", + dependencies=["llama-index-core/pyproject.toml:poetry#tree-sitter"] ) diff --git a/llama-index-core/tests/text_splitter/test_code_splitter.py b/llama-index-core/tests/text_splitter/test_code_splitter.py index 6ebde6e1d..a16cf5897 100644 --- a/llama-index-core/tests/text_splitter/test_code_splitter.py +++ b/llama-index-core/tests/text_splitter/test_code_splitter.py @@ -2,6 +2,7 @@ import os from typing import List +import tree_sitter # noqa from llama_index.core.schema import Document, MetadataMode, TextNode from llama_index.core.text_splitter import CodeSplitter diff --git a/llama-index-core/tests/tools/test_base.py b/llama-index-core/tests/tools/test_base.py index 62c957b60..fec2b9a09 100644 --- a/llama-index-core/tests/tools/test_base.py +++ b/llama-index-core/tests/tools/test_base.py @@ -7,7 +7,7 @@ from llama_index.core.bridge.pydantic import BaseModel from llama_index.core.tools.function_tool import FunctionTool try: - import langchain + import langchain # pants: no-infer-dep except ImportError: langchain = None # type: ignore diff --git a/llama-index-core/tests/tools/test_ondemand_loader.py b/llama-index-core/tests/tools/test_ondemand_loader.py index e6bb26bef..30ee946a9 100644 --- a/llama-index-core/tests/tools/test_ondemand_loader.py +++ b/llama-index-core/tests/tools/test_ondemand_loader.py @@ -5,7 +5,7 @@ from typing import List import pytest try: - import langchain + import langchain # pants: no-infer-dep except ImportError: langchain = None # type: ignore diff --git a/llama-index-core/tests/vector_stores/test_simple.py b/llama-index-core/tests/vector_stores/test_simple.py index 213d3fd2e..00f098429 100644 --- a/llama-index-core/tests/vector_stores/test_simple.py +++ b/llama-index-core/tests/vector_stores/test_simple.py @@ -181,3 +181,21 @@ class SimpleVectorStoreTest(unittest.TestCase): ) result = simple_vector_store.query(query) self.assertEqual(len(result.ids), 0) + + def test_clear(self) -> None: + simple_vector_store = SimpleVectorStore() + simple_vector_store.add(_node_embeddings_for_test()) + simple_vector_store.clear() + query = VectorStoreQuery(query_embedding=[1.0, 1.0], similarity_top_k=3) + result = simple_vector_store.query(query) + self.assertEqual(len(result.ids), 0) + + def test_delete_nodes(self) -> None: + simple_vector_store = SimpleVectorStore() + simple_vector_store.add(_node_embeddings_for_test()) + simple_vector_store.delete_nodes( + [_NODE_ID_WEIGHT_1_RANK_A, _NODE_ID_WEIGHT_2_RANK_C] + ) + query = VectorStoreQuery(query_embedding=[1.0, 1.0], similarity_top_k=3) + result = simple_vector_store.query(query) + self.assertEqual(result.ids, [_NODE_ID_WEIGHT_3_RANK_C]) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/BUILD deleted file mode 100644 index dabf212d7..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_azure_openai.py b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_azure_openai.py deleted file mode 100644 index 38fc4bf79..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_azure_openai.py +++ /dev/null @@ -1,19 +0,0 @@ -from unittest.mock import MagicMock, patch - -import httpx -from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding - - -@patch("llama_index.embeddings.azure_openai.base.AzureOpenAI") -def test_custom_http_client(azure_openai_mock: MagicMock) -> None: - """ - Verify that a custom http_client set for AzureOpenAIEmbedding. - Should get passed on to the implementation from OpenAI. - """ - custom_http_client = httpx.Client() - embedding = AzureOpenAIEmbedding(http_client=custom_http_client, api_key="mock") - embedding._get_client() - azure_openai_mock.assert_called() - kwargs = azure_openai_mock.call_args.kwargs - assert "http_client" in kwargs - assert kwargs["http_client"] == custom_http_client diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_embeddings_azure_openai.py b/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_embeddings_azure_openai.py deleted file mode 100644 index f0ca7021f..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-azure-openai/tests/test_embeddings_azure_openai.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.base.embeddings.base import BaseEmbedding -from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding - - -def test_azure_openai_embedding_class(): - names_of_base_classes = [b.__name__ for b in AzureOpenAIEmbedding.__mro__] - assert BaseEmbedding.__name__ in names_of_base_classes diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-cohere/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-cohere/BUILD index aea235c0c..0896ca890 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-cohere/BUILD +++ b/llama-index-integrations/embeddings/llama-index-embeddings-cohere/BUILD @@ -1,8 +1,3 @@ poetry_requirements( name="poetry", ) - -python_requirement( - requirements=[], - dependencies=["!llama-index-core:poetry#langchain","!llama-index-integrations/llms/llama-index-llms-cohere:poetry#cohere"] -) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/BUILD deleted file mode 100644 index dabf212d7..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/test_fireworks_embedding.py b/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/test_fireworks_embedding.py deleted file mode 100644 index be882c0f5..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-fireworks/tests/test_fireworks_embedding.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.base.embeddings.base import BaseEmbedding -from llama_index.embeddings.fireworks import FireworksEmbedding - - -def test_fireworks_class(): - emb = FireworksEmbedding() - assert isinstance(emb, BaseEmbedding) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-gemini/tests/BUILD +++ b/llama-index-integrations/embeddings/llama-index-embeddings-gemini/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-google/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-google/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-google/tests/BUILD +++ b/llama-index-integrations/embeddings/llama-index-embeddings-google/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/BUILD deleted file mode 100644 index dabf212d7..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/test_embeddings_octoai.py b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/test_embeddings_octoai.py deleted file mode 100644 index 01dcbe518..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/tests/test_embeddings_octoai.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.base.embeddings.base import BaseEmbedding -from llama_index.embeddings.octoai import OctoAIEmbedding - - -def test_fireworks_class(): - emb = OctoAIEmbedding() - assert isinstance(emb, BaseEmbedding) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/BUILD index dabf212d7..0feb181a9 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/BUILD +++ b/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/BUILD @@ -1 +1,3 @@ -python_tests() +python_tests( + dependencies=["llama-index-integrations/embeddings/llama-index-embeddings-huggingface/llama_index/embeddings/huggingface/base.py"] +) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/test_openai_utils.py b/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/test_openai_utils.py deleted file mode 100644 index ee7d92afd..000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-openai/tests/test_openai_utils.py +++ /dev/null @@ -1,55 +0,0 @@ -from pytest import MonkeyPatch -from typing import Any, Dict - -from llama_index.core.embeddings.mock_embed_model import MockEmbedding -from llama_index.core.embeddings.utils import resolve_embed_model -from llama_index.embeddings.huggingface import ( - HuggingFaceEmbedding, -) -from llama_index.embeddings.openai import OpenAIEmbedding - - -def mock_hf_embeddings(self: Any, *args: Any, **kwargs: Dict[str, Any]) -> Any: - """Mock HuggingFaceEmbeddings.""" - super(HuggingFaceEmbedding, self).__init__( - model_name="fake", - tokenizer_name="fake", - model="fake", - tokenizer="fake", - ) - return - - -def mock_openai_embeddings(self: Any, *args: Any, **kwargs: Dict[str, Any]) -> Any: - """Mock OpenAIEmbedding.""" - super(OpenAIEmbedding, self).__init__( - api_key="fake", api_base="fake", api_version="fake" - ) - return - - -def test_resolve_embed_model(monkeypatch: MonkeyPatch) -> None: - monkeypatch.setattr( - "llama_index.embeddings.huggingface.HuggingFaceEmbedding.__init__", - mock_hf_embeddings, - ) - monkeypatch.setattr( - "llama_index.embeddings.openai.OpenAIEmbedding.__init__", - mock_openai_embeddings, - ) - - # Test None - embed_model = resolve_embed_model(None) - assert isinstance(embed_model, MockEmbedding) - - # Test str - embed_model = resolve_embed_model("local") - assert isinstance(embed_model, HuggingFaceEmbedding) - - # Test LCEmbeddings - embed_model = resolve_embed_model(HuggingFaceEmbedding()) - assert isinstance(embed_model, HuggingFaceEmbedding) - - # Test BaseEmbedding - embed_model = resolve_embed_model(OpenAIEmbedding()) - assert isinstance(embed_model, OpenAIEmbedding) diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml index 1c41c1934..2ec643b58 100644 --- a/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-cohere/pyproject.toml @@ -32,7 +32,7 @@ version = "0.2.0" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.36" -cohere = "^5.1.2" +cohere = "^5.1.1" [tool.poetry.group.dev.dependencies] ipython = "8.10.0" diff --git a/llama-index-integrations/llms/llama-index-llms-cohere/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-cohere/tests/BUILD index dabf212d7..a1431c235 100644 --- a/llama-index-integrations/llms/llama-index-llms-cohere/tests/BUILD +++ b/llama-index-integrations/llms/llama-index-llms-cohere/tests/BUILD @@ -1 +1,3 @@ -python_tests() +python_tests( + interpreter_constraints=["==3.10.*"], +) diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-gemini/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/llms/llama-index-llms-gemini/tests/BUILD +++ b/llama-index-integrations/llms/llama-index-llms-gemini/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/llms/llama-index-llms-palm/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-palm/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/llms/llama-index-llms-palm/tests/BUILD +++ b/llama-index-integrations/llms/llama-index-llms-palm/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-gemini/tests/BUILD b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-gemini/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-gemini/tests/BUILD +++ b/llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-gemini/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/readers/llama-index-readers-guru/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-guru/pyproject.toml index 4bb0101e2..6314b3a34 100644 --- a/llama-index-integrations/readers/llama-index-readers-guru/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-guru/pyproject.toml @@ -29,11 +29,12 @@ license = "MIT" maintainers = ["mcclain-thiel"] name = "llama-index-readers-guru" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.1" +bs4 = "*" [tool.poetry.group.dev.dependencies] ipython = "8.10.0" diff --git a/llama-index-integrations/readers/llama-index-readers-pebblo/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-pebblo/pyproject.toml index 8575ac253..97260b3d6 100644 --- a/llama-index-integrations/readers/llama-index-readers-pebblo/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-pebblo/pyproject.toml @@ -30,12 +30,12 @@ license = "MIT" name = "llama-index-readers-pebblo" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.0" -llama-index = "^0.10.20" +langchain = ">=0.0.303" requests = "^2" [tool.poetry.group.dev.dependencies] diff --git a/llama-index-integrations/readers/llama-index-readers-pebblo/tests/test_readers_pebblo.py b/llama-index-integrations/readers/llama-index-readers-pebblo/tests/test_readers_pebblo.py index 1d9fa6a14..14e08a9a9 100644 --- a/llama-index-integrations/readers/llama-index-readers-pebblo/tests/test_readers_pebblo.py +++ b/llama-index-integrations/readers/llama-index-readers-pebblo/tests/test_readers_pebblo.py @@ -1,5 +1,7 @@ import pytest import os + +import langchain # noqa from llama_index.core.readers.base import BaseReader from llama_index.readers.pebblo import PebbloSafeReader from pathlib import Path diff --git a/llama-index-integrations/readers/llama-index-readers-wordlift/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-wordlift/pyproject.toml index b8d0c9f7c..527d3531d 100644 --- a/llama-index-integrations/readers/llama-index-readers-wordlift/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-wordlift/pyproject.toml @@ -29,13 +29,14 @@ license = "MIT" maintainers = ["msftwarelab"] name = "llama-index-readers-wordlift" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" llama-index-core = "^0.10.1" langchain = "^0.1.4" graphql-core = "^3.2.3" +bs4 = "*" [tool.poetry.group.dev.dependencies] ipython = "8.10.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/llama_index/vector_stores/alibabacloud_opensearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/llama_index/vector_stores/alibabacloud_opensearch/base.py index e8d53298c..34a0ca044 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/llama_index/vector_stores/alibabacloud_opensearch/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/llama_index/vector_stores/alibabacloud_opensearch/base.py @@ -5,13 +5,13 @@ import logging import asyncio from typing import Any, List, Dict, Optional +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, TextNode - from llama_index.core.vector_stores.types import ( MetadataFilters, FilterOperator, FilterCondition, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -149,7 +149,7 @@ class AlibabaCloudOpenSearchConfig: return getattr(self, item) -class AlibabaCloudOpenSearchStore(VectorStore): +class AlibabaCloudOpenSearchStore(BasePydanticVectorStore): """The AlibabaCloud OpenSearch Vector Store. In this vector store we store the text, its embedding and its metadata @@ -188,8 +188,13 @@ class AlibabaCloudOpenSearchStore(VectorStore): stores_text: bool = True flat_metadata: bool = True + _client: Any = PrivateAttr() + _config: AlibabaCloudOpenSearchConfig = PrivateAttr() + def __init__(self, config: AlibabaCloudOpenSearchConfig) -> None: """Initialize params.""" + super().__init__() + self._config = config self._client = client.Client( models.Config( @@ -200,6 +205,11 @@ class AlibabaCloudOpenSearchStore(VectorStore): ) ) + @classmethod + def class_name(cls) -> str: + """Class name.""" + return "AlibabaCloudOpenSearchStore" + @property def client(self) -> Any: """Get client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/pyproject.toml index 3bfa1f960..18a3f1083 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-alibabacloud-opensearch" readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/tests/test_vector_stores_alibabacloud_opensearch.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/tests/test_vector_stores_alibabacloud_opensearch.py index 76ac57f22..de9564fd7 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/tests/test_vector_stores_alibabacloud_opensearch.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-alibabacloud-opensearch/tests/test_vector_stores_alibabacloud_opensearch.py @@ -1,4 +1,4 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.alibabacloud_opensearch import ( AlibabaCloudOpenSearchStore, ) @@ -6,4 +6,4 @@ from llama_index.vector_stores.alibabacloud_opensearch import ( def test_class(): names_of_base_classes = [b.__name__ for b in AlibabaCloudOpenSearchStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/llama_index/vector_stores/awadb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/llama_index/vector_stores/awadb/base.py index 45ad491b4..bacb965ae 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/llama_index/vector_stores/awadb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/llama_index/vector_stores/awadb/base.py @@ -8,9 +8,10 @@ import logging import uuid from typing import Any, List, Optional, Set +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -23,7 +24,7 @@ from llama_index.core.vector_stores.utils import ( logger = logging.getLogger(__name__) -class AwaDBVectorStore(VectorStore): +class AwaDBVectorStore(BasePydanticVectorStore): """AwaDB vector store. In this vector store, embeddings are stored within a AwaDB table. @@ -46,10 +47,12 @@ class AwaDBVectorStore(VectorStore): stores_text: bool = True DEFAULT_TABLE_NAME = "llamaindex_awadb" + _awadb_client: Any = PrivateAttr() + @property def client(self) -> Any: """Get AwaDB client.""" - return self.awadb_client + return self._awadb_client def __init__( self, @@ -70,21 +73,27 @@ class AwaDBVectorStore(VectorStore): Returns: None. """ + super().__init__() + import_err_msg = "`awadb` package not found, please run `pip install awadb`" try: import awadb except ImportError: raise ImportError(import_err_msg) if log_and_data_dir is not None: - self.awadb_client = awadb.Client(log_and_data_dir) + self._awadb_client = awadb.Client(log_and_data_dir) else: - self.awadb_client = awadb.Client() + self._awadb_client = awadb.Client() if table_name == self.DEFAULT_TABLE_NAME: table_name += "_" table_name += str(uuid.uuid4()).split("-")[-1] - self.awadb_client.Create(table_name) + self._awadb_client.Create(table_name) + + @classmethod + def class_name(cls) -> str: + return "AwaDBVectorStore" def add( self, @@ -99,7 +108,7 @@ class AwaDBVectorStore(VectorStore): Returns: Added node ids """ - if not self.awadb_client: + if not self._awadb_client: raise ValueError("AwaDB client not initialized") embeddings = [] @@ -116,7 +125,7 @@ class AwaDBVectorStore(VectorStore): ids.append(node.node_id) texts.append(node.get_content(metadata_mode=MetadataMode.NONE) or "") - self.awadb_client.AddTexts( + self._awadb_client.AddTexts( "embedding_text", "text_embedding", texts, @@ -141,7 +150,7 @@ class AwaDBVectorStore(VectorStore): return ids: List[str] = [] ids.append(ref_doc_id) - self.awadb_client.Delete(ids) + self._awadb_client.Delete(ids) def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: """Query index for top k most similar nodes. @@ -158,7 +167,7 @@ class AwaDBVectorStore(VectorStore): meta_filters[filter.key] = filter.value not_include_fields: Set[str] = {"text_embedding"} - results = self.awadb_client.Search( + results = self._awadb_client.Search( query=query.query_embedding, topn=query.similarity_top_k, meta_filter=meta_filters, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/pyproject.toml index 3ac9a0865..80c65f07f 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-awadb" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/tests/test_vector_stores_awadb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/tests/test_vector_stores_awadb.py index ad53b289d..b36936eff 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/tests/test_vector_stores_awadb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-awadb/tests/test_vector_stores_awadb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.awadb import AwaDBVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in AwaDBVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py index 22774ed11..94b411ec5 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/llama_index/vector_stores/awsdocdb/base.py @@ -8,10 +8,11 @@ from enum import Enum from typing import Any, Dict, List, Optional, cast import numpy as np +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -21,6 +22,8 @@ from llama_index.core.vector_stores.utils import ( node_to_metadata_dict, ) +from pymongo import MongoClient + logger = logging.getLogger(__name__) @@ -105,7 +108,7 @@ class DocDbIndex: # an index that does not exist) -class AWSDocDbVectorStore(VectorStore): +class AWSDocDbVectorStore(BasePydanticVectorStore): """AWS DocumentDB Vector Store. To use, you should have both: @@ -120,6 +123,16 @@ class AWSDocDbVectorStore(VectorStore): stores_text: bool = True flat_metadata: bool = True + _docdb_client: MongoClient = PrivateAttr() + _similarity_score: AWSDocDbVectorStoreSimilarityType = PrivateAttr() + _collection: Any = PrivateAttr() + _embedding_key: str = PrivateAttr() + _id_key: str = PrivateAttr() + _text_key: str = PrivateAttr() + _metadata_key: str = PrivateAttr() + _insert_kwargs: Dict = PrivateAttr() + _index_crud: DocDbIndex = PrivateAttr() + def __init__( self, docdb_client: Optional[Any] = None, @@ -148,11 +161,7 @@ class AWSDocDbVectorStore(VectorStore): the metadata for each document. insert_kwargs: The kwargs used during `insert`. """ - import_err_msg = "`pymongo` package not found, please run `pip install pymongo`" - try: - from pymongo import MongoClient - except ImportError: - raise ImportError(import_err_msg) + super().__init__() if docdb_client is not None: self._docdb_client = cast(MongoClient, docdb_client) @@ -167,6 +176,10 @@ class AWSDocDbVectorStore(VectorStore): self._insert_kwargs = insert_kwargs or {} self._index_crud = DocDbIndex(index_name, self._embedding_key, self._collection) + @classmethod + def class_name(cls) -> str: + return "AWSDocDbVectorStore" + def add( self, nodes: List[BaseNode], diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/pyproject.toml index ceecd8c59..68bf28471 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-awsdocdb" readme = "README.md" -version = "0.1.4" +version = "0.1.5" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/llama_index/vector_stores/bagel/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/llama_index/vector_stores/bagel/base.py index 7390bec65..55b9fcc80 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/llama_index/vector_stores/bagel/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/llama_index/vector_stores/bagel/base.py @@ -2,10 +2,11 @@ import logging import math from typing import Any, List +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -28,7 +29,7 @@ def _to_bagel_filter(standard_filters: MetadataFilters) -> dict: return filters -class BagelVectorStore(VectorStore): +class BagelVectorStore(BasePydanticVectorStore): """Vector store for Bagel. Examples: @@ -56,6 +57,8 @@ class BagelVectorStore(VectorStore): stores_text: bool = True flat_metadata: bool = True + _collection: Any = PrivateAttr() + def __init__(self, collection: Any, **kwargs: Any) -> None: """ Initialize BagelVectorStore. @@ -64,6 +67,8 @@ class BagelVectorStore(VectorStore): collection: Bagel collection. **kwargs: Additional arguments. """ + super().__init__() + try: from bagel.api.Cluster import Cluster except ImportError: @@ -74,6 +79,10 @@ class BagelVectorStore(VectorStore): self._collection = collection + @classmethod + def class_name(cls) -> str: + return "BagelVectorStore" + def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: """ Add a list of nodes with embeddings to the vector store. diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/pyproject.toml index 1ed6cf029..7be8b046e 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-bagel" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/tests/test_vector_stores_bagel.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/tests/test_vector_stores_bagel.py index 7e5bbe656..dc5f432aa 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/tests/test_vector_stores_bagel.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-bagel/tests/test_vector_stores_bagel.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.bagel import BagelVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in BagelVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/llama_index/vector_stores/baiduvectordb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/llama_index/vector_stores/baiduvectordb/base.py index 4a8a00e41..62c1d9153 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/llama_index/vector_stores/baiduvectordb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/llama_index/vector_stores/baiduvectordb/base.py @@ -4,6 +4,7 @@ import json import time from typing import Any, Dict, List, Optional +from llama_index.core.bridge.pydantic import Field, PrivateAttr from llama_index.core.schema import ( BaseNode, NodeRelationship, @@ -12,7 +13,7 @@ from llama_index.core.schema import ( ) from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -120,7 +121,7 @@ class TableParams: self.filter_fields = filter_fields -class BaiduVectorDB(VectorStore): +class BaiduVectorDB(BasePydanticVectorStore): """Baidu VectorDB as a vector store. In order to use this you need to have a database instance. @@ -135,7 +136,12 @@ class BaiduVectorDB(VectorStore): table_params (Optional[TableParams]): The table parameters for BaiduVectorDB """ - user_defined_fields: List[TableField] = [] + user_defined_fields: List[TableField] = Field(default_factory=list) + batch_size: int + + _vdb_client: Any = PrivateAttr() + _database: Any = PrivateAttr() + _table: Any = PrivateAttr() def __init__( self, @@ -148,11 +154,14 @@ class BaiduVectorDB(VectorStore): **kwargs: Any, ): """Init params.""" + super().__init__( + user_defined_fields=table_params.filter_fields, + batch_size=batch_size, + ) + self._init_client(endpoint, account, api_key) self._create_database_if_not_exists(database_name) self._create_table(table_params) - self.batch_size = batch_size - self.user_defined_fields = table_params.filter_fields @classmethod def class_name(cls) -> str: @@ -190,15 +199,15 @@ class BaiduVectorDB(VectorStore): endpoint=endpoint, connection_timeout_in_mills=DEFAULT_TIMEOUT_IN_MILLS, ) - self.vdb_client = pymochow.MochowClient(config) + self._vdb_client = pymochow.MochowClient(config) def _create_database_if_not_exists(self, database_name: str) -> None: - db_list = self.vdb_client.list_databases() + db_list = self._vdb_client.list_databases() if database_name in [db.database_name for db in db_list]: - self.database = self.vdb_client.database(database_name) + self._database = self._vdb_client.database(database_name) else: - self.database = self.vdb_client.create_database(database_name) + self._database = self._vdb_client.create_database(database_name) def _create_table(self, table_params: TableParams) -> None: import pymochow @@ -207,9 +216,9 @@ class BaiduVectorDB(VectorStore): raise ValueError(VALUE_NONE_ERROR.format("table_params")) try: - self.table = self.database.describe_table(table_params.table_name) + self._table = self._database.describe_table(table_params.table_name) if table_params.drop_exists: - self.database.drop_table(table_params.table_name) + self._database.drop_table(table_params.table_name) # wait db release resource time.sleep(5) self._create_table_in_db(table_params) @@ -264,7 +273,7 @@ class BaiduVectorDB(VectorStore): indexes.append(SecondaryIndex(index_name=index_name, field=field.name)) schema = Schema(fields=fields, indexes=indexes) - self.table = self.database.create_table( + self._table = self._database.create_table( table_name=table_params.table_name, replication=table_params.replication, partition=Partition(partition_num=table_params.partition), @@ -364,12 +373,12 @@ class BaiduVectorDB(VectorStore): rows = [] if len(rows) > 0: - self.table.upsert(rows=rows) + self._table.upsert(rows=rows) - self.table.rebuild_index(INDEX_VECTOR) + self._table.rebuild_index(INDEX_VECTOR) while True: time.sleep(2) - index = self.table.describe_index(INDEX_VECTOR) + index = self._table.describe_index(INDEX_VECTOR) if index.state == IndexState.NORMAL: break @@ -406,7 +415,7 @@ class BaiduVectorDB(VectorStore): params=HNSWSearchParams(ef=DEFAULT_HNSW_EF, limit=query.similarity_top_k), filter=search_filter, ) - res = self.table.search(anns=anns, retrieve_vector=True) + res = self._table.search(anns=anns, retrieve_vector=True) rows = res.rows if rows is None or len(rows) == 0: return VectorStoreQueryResult(nodes=[], similarities=[], ids=[]) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/pyproject.toml index 1e0059305..f8002baa1 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-vector-stores-baiduvectordb" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/tests/test_vector_stores_baiduvectordb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/tests/test_vector_stores_baiduvectordb.py index 8371b5534..99228664d 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/tests/test_vector_stores_baiduvectordb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb/tests/test_vector_stores_baiduvectordb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.baiduvectordb import BaiduVectorDB def test_class(): names_of_base_classes = [b.__name__ for b in BaiduVectorDB.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py index fc3a8855f..0dbe5355c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py @@ -4,6 +4,8 @@ import os from typing import Any, Dict, List, Optional import requests + +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import ( BaseNode, MetadataMode, @@ -13,7 +15,7 @@ from llama_index.core.schema import ( ) from llama_index.core.utils import get_tqdm_iterable from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -53,7 +55,7 @@ def convert_docs_to_json(nodes: List[BaseNode]) -> List[Dict]: return docs -class ChatGPTRetrievalPluginClient(VectorStore): +class ChatGPTRetrievalPluginClient(BasePydanticVectorStore): """ChatGPT Retrieval Plugin Client. In this client, we make use of the endpoints defined by ChatGPT. @@ -68,6 +70,12 @@ class ChatGPTRetrievalPluginClient(VectorStore): stores_text: bool = True is_embedding_query: bool = False + _endpoint_url: str = PrivateAttr() + _bearer_token: Optional[str] = PrivateAttr() + _retries: Optional[Retry] = PrivateAttr() + _batch_size: int = PrivateAttr() + _s: requests.Session = PrivateAttr() + def __init__( self, endpoint_url: str, @@ -77,6 +85,8 @@ class ChatGPTRetrievalPluginClient(VectorStore): **kwargs: Any, ) -> None: """Initialize params.""" + super().__init__() + self._endpoint_url = endpoint_url self._bearer_token = bearer_token or os.getenv("BEARER_TOKEN") self._retries = retries @@ -85,6 +95,10 @@ class ChatGPTRetrievalPluginClient(VectorStore): self._s = requests.Session() self._s.mount("http://", HTTPAdapter(max_retries=self._retries)) + @classmethod + def class_name(cls) -> str: + return "ChatGPTRetrievalPluginClient" + @property def client(self) -> None: """Get client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml index aef252afb..c0feb7ec5 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-chatgpt-plugin" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py index ed9d774e4..a119f0263 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.chatgpt_plugin import ChatGPTRetrievalPluginClient def test_class(): names_of_base_classes = [b.__name__ for b in ChatGPTRetrievalPluginClient.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py index d47dc4a76..5a6f00884 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/llama_index/vector_stores/chroma/base.py @@ -144,7 +144,7 @@ class ChromaVectorStore(BasePydanticVectorStore): persist_dir: Optional[str] collection_kwargs: Dict[str, Any] = Field(default_factory=dict) - _collection: Any = PrivateAttr() + _collection: Collection = PrivateAttr() def __init__( self, @@ -231,6 +231,32 @@ class ChromaVectorStore(BasePydanticVectorStore): def class_name(cls) -> str: return "ChromaVectorStore" + def get_nodes( + self, + node_ids: Optional[List[str]], + filters: Optional[List[MetadataFilters]] = None, + ) -> List[BaseNode]: + """Get nodes from index. + + Args: + node_ids (List[str]): list of node ids + filters (List[MetadataFilters]): list of metadata filters + + """ + if not self._collection: + raise ValueError("Collection not initialized") + + node_ids = node_ids or [] + + if filters: + where = _to_chroma_filter(filters) + else: + where = {} + + result = self._get(None, where=where, ids=node_ids) + + return result.nodes + def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: """Add nodes to index. @@ -282,6 +308,35 @@ class ChromaVectorStore(BasePydanticVectorStore): """ self._collection.delete(where={"document_id": ref_doc_id}) + def delete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[List[MetadataFilters]] = None, + ) -> None: + """Delete nodes from index. + + Args: + node_ids (List[str]): list of node ids + filters (List[MetadataFilters]): list of metadata filters + + """ + if not self._collection: + raise ValueError("Collection not initialized") + + node_ids = node_ids or [] + + if filters: + where = _to_chroma_filter(filters) + else: + where = {} + + self._collection.delete(ids=node_ids, where=where) + + def clear(self) -> None: + """Clear the collection.""" + ids = self._collection.get()["ids"] + self._collection.delete(ids=ids) + @property def client(self) -> Any: """Return client.""" @@ -367,7 +422,9 @@ class ChromaVectorStore(BasePydanticVectorStore): return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids) - def _get(self, limit: int, where: dict, **kwargs) -> VectorStoreQueryResult: + def _get( + self, limit: Optional[int], where: dict, **kwargs + ) -> VectorStoreQueryResult: results = self._collection.get( limit=limit, where=where, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/tests/test_chromadb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/tests/test_chromadb.py index 6404452c3..050cfbecd 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/tests/test_chromadb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-chroma/tests/test_chromadb.py @@ -12,11 +12,6 @@ from llama_index.core.vector_stores.types import ( FilterCondition, ) -## -# Run tests -# cd tests/vector_stores -# pytest test_chromadb.py - PARAMS: Dict[str, str] = { "host": os.environ.get("CHROMADB_HOST", "localhost"), @@ -196,3 +191,57 @@ async def test_add_to_chromadb_and_query_by_metafilters_only( res.nodes[0].get_content() == "I was taught that the way of progress was neither swift nor easy." ) + + +def test_get_nodes( + vector_store: ChromaVectorStore, node_embeddings: List[TextNode] +) -> None: + vector_store.add(node_embeddings) + res = vector_store.get_nodes( + node_ids=[ + "c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + "c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + "c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d", + ] + ) + assert len(res) == 3 + assert res[0].get_content() == "lorem ipsum" + assert res[1].get_content() == "lorem ipsum" + assert res[2].get_content() == "lorem ipsum" + + +def test_delete_nodes( + vector_store: ChromaVectorStore, node_embeddings: List[TextNode] +) -> None: + vector_store.add(node_embeddings) + vector_store.delete_nodes( + node_ids=[ + "c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + "c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + ] + ) + res = vector_store.get_nodes( + node_ids=[ + "c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + "c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + "c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d", + ] + ) + assert len(res) == 1 + assert res[0].get_content() == "lorem ipsum" + assert res[0].id_ == "c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d" + + +def test_clear( + vector_store: ChromaVectorStore, node_embeddings: List[TextNode] +) -> None: + vector_store.add(node_embeddings) + vector_store.clear() + res = vector_store.get_nodes( + node_ids=[ + "c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + "c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + "c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d", + ] + ) + assert len(res) == 0 diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/llama_index/vector_stores/dashvector/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/llama_index/vector_stores/dashvector/base.py index b91ea867c..a38b43e87 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/llama_index/vector_stores/dashvector/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/llama_index/vector_stores/dashvector/base.py @@ -3,10 +3,11 @@ import logging from typing import Any, List, Optional, cast +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -42,7 +43,7 @@ def _to_dashvector_filter( return " and ".join(filters) -class DashVectorStore(VectorStore): +class DashVectorStore(BasePydanticVectorStore): """Dash Vector Store. In this vector store, embeddings and docs are stored within a @@ -77,6 +78,10 @@ class DashVectorStore(VectorStore): stores_text: bool = True flat_metadata: bool = True + _support_sparse_vector: bool = PrivateAttr() + _encoder: Optional[Any] = PrivateAttr() + _collection: Optional[Any] = PrivateAttr() + def __init__( self, collection: Optional[Any] = None, @@ -84,6 +89,8 @@ class DashVectorStore(VectorStore): encoder: Optional[Any] = None, ) -> None: """Initialize params.""" + super().__init__() + try: import dashvector except ImportError: @@ -108,6 +115,11 @@ class DashVectorStore(VectorStore): if collection is not None: self._collection = cast(dashvector.Collection, collection) + @classmethod + def class_name(cls) -> str: + """Get class name.""" + return "DashVectorStore" + def add( self, nodes: List[BaseNode], diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/pyproject.toml index b65968ee3..05ad6406a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-dashvector" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/tests/test_vector_stores_dashvector.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/tests/test_vector_stores_dashvector.py index cf253034f..4eb402f61 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/tests/test_vector_stores_dashvector.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dashvector/tests/test_vector_stores_dashvector.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.dashvector import DashVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in DashVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py index 1421df155..cdf77e4f3 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py @@ -5,13 +5,14 @@ from __future__ import annotations from logging import getLogger from typing import Any, Dict, List, cast +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.indices.query.embedding_utils import ( get_top_k_embeddings, get_top_k_embeddings_learner, ) from llama_index.core.schema import BaseNode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -29,7 +30,7 @@ LEARNER_MODES = { } -class DynamoDBVectorStore(VectorStore): +class DynamoDBVectorStore(BasePydanticVectorStore): """DynamoDB Vector Store. In this vector store, embeddings are stored within dynamodb table. @@ -51,10 +52,17 @@ class DynamoDBVectorStore(VectorStore): stores_text: bool = False + _kvstore: DynamoDBKVStore = PrivateAttr() + _collection_embedding: str = PrivateAttr() + _collection_text_id_to_doc_id: str = PrivateAttr() + _key_value: str = PrivateAttr() + def __init__( self, dynamodb_kvstore: DynamoDBKVStore, namespace: str | None = None ) -> None: """Initialize params.""" + super().__init__() + self._kvstore = dynamodb_kvstore namespace = namespace or DEFAULT_NAMESPACE self._collection_embedding = f"{namespace}/embedding" @@ -69,6 +77,10 @@ class DynamoDBVectorStore(VectorStore): dynamodb_kvstore = DynamoDBKVStore.from_table_name(table_name=table_name) return cls(dynamodb_kvstore=dynamodb_kvstore, namespace=namespace) + @classmethod + def class_name(cls) -> str: + return "DynamoDBVectorStore" + @property def client(self) -> None: """Get client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/pyproject.toml index 549db6ff7..676a01377 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-dynamodb" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/tests/test_vector_stores_dynamodb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/tests/test_vector_stores_dynamodb.py index 21b9473b8..a71b3b05a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/tests/test_vector_stores_dynamodb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/tests/test_vector_stores_dynamodb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.dynamodb import DynamoDBVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in DynamoDBVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/llama_index/vector_stores/epsilla/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/llama_index/vector_stores/epsilla/base.py index 49b9e8e16..c7511c1d5 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/llama_index/vector_stores/epsilla/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/llama_index/vector_stores/epsilla/base.py @@ -3,10 +3,11 @@ import logging from typing import Any, List, Optional +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( DEFAULT_PERSIST_DIR, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -24,7 +25,7 @@ from pyepsilla import vectordb logger = logging.getLogger(__name__) -class EpsillaVectorStore(VectorStore): +class EpsillaVectorStore(BasePydanticVectorStore): """The Epsilla Vector Store. In this vector store we store the text, its embedding and @@ -69,6 +70,10 @@ class EpsillaVectorStore(VectorStore): stores_text = True flat_metadata: bool = False + _client: vectordb.Client = PrivateAttr() + _collection_name: str = PrivateAttr() + _collection_created: bool = PrivateAttr() + def __init__( self, client: Any, @@ -80,6 +85,8 @@ class EpsillaVectorStore(VectorStore): **kwargs: Any, ) -> None: """Init params.""" + super().__init__() + if not isinstance(client, vectordb.Client): raise TypeError( f"client should be an instance of pyepsilla.vectordb.Client, " @@ -115,6 +122,11 @@ class EpsillaVectorStore(VectorStore): if self._collection_name not in table_list and dimension is not None: self._create_collection(dimension) + @classmethod + def class_name(cls) -> str: + return "EpsillaVectorStore" + + @property def client(self) -> Any: """Return the Epsilla client.""" return self._client diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/pyproject.toml index cb87a61e2..9147cc16b 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-epsilla" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/tests/test_vector_stores_epsilla.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/tests/test_vector_stores_epsilla.py index b5fde5ab4..ce415bb6f 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/tests/test_vector_stores_epsilla.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-epsilla/tests/test_vector_stores_epsilla.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.epsilla import EpsillaVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in EpsillaVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-google/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-google/tests/BUILD index 619cac15f..a1431c235 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-google/tests/BUILD +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-google/tests/BUILD @@ -1,3 +1,3 @@ python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], + interpreter_constraints=["==3.10.*"], ) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py index d3541bb15..272289e46 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py @@ -17,9 +17,11 @@ import logging from typing import Any, List, Optional, Tuple, Union, cast from jaguardb_http_client.JaguarHttpClient import JaguarHttpClient + +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, Document, TextNode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -27,7 +29,7 @@ from llama_index.core.vector_stores.types import ( logger = logging.getLogger(__name__) -class JaguarVectorStore(VectorStore): +class JaguarVectorStore(BasePydanticVectorStore): """Jaguar vector store. See http://www.jaguardb.com @@ -51,6 +53,14 @@ class JaguarVectorStore(VectorStore): stores_text: bool = True + _pod: str = PrivateAttr() + _store: str = PrivateAttr() + _vector_index: str = PrivateAttr() + _vector_type: str = PrivateAttr() + _vector_dimension: int = PrivateAttr() + _jag: JaguarHttpClient = PrivateAttr() + _token: str = PrivateAttr() + def __init__( self, pod: str, @@ -70,6 +80,7 @@ class JaguarVectorStore(VectorStore): vector_dimension: int: dimension of the vector index url: str: URL end point of jaguar http server """ + super().__init__() self._pod = pod self._store = store self._vector_index = vector_index diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/pyproject.toml index 415003e5c..b64e33daa 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-jaguar" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/tests/test_vector_stores_jaguar.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/tests/test_vector_stores_jaguar.py index 8f8f17745..04705bb09 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/tests/test_vector_stores_jaguar.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/tests/test_vector_stores_jaguar.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.jaguar import JaguarVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in JaguarVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py index e0e5710c3..387389ec3 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py @@ -5,7 +5,7 @@ import metal_sdk # noqa from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -29,7 +29,7 @@ def _to_metal_filters(standard_filters: MetadataFilters) -> list: return filters -class MetalVectorStore(VectorStore): +class MetalVectorStore(BasePydanticVectorStore): """Metal Vector Store. Examples: @@ -52,6 +52,15 @@ class MetalVectorStore(VectorStore): ``` """ + stores_text: bool = True + flat_metadata: bool = False + is_embedding_query: bool = True + + api_key: str + client_id: str + index_id: str + metal_client: Metal + def __init__( self, api_key: str, @@ -59,14 +68,16 @@ class MetalVectorStore(VectorStore): index_id: str, ): """Init params.""" - self.api_key = api_key - self.client_id = client_id - self.index_id = index_id - - self.metal_client = Metal(api_key, client_id, index_id) - self.stores_text = True - self.flat_metadata = False - self.is_embedding_query = True + super().__init__( + api_key=api_key, + client_id=client_id, + index_id=index_id, + metal_client=Metal(api_key, client_id, index_id), + ) + + @classmethod + def class_name(cls) -> str: + return "MetalVectorStore" def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: if query.filters is not None: diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml index 51c0ba218..0542e39ac 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-metal" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py index 02553d43f..dcd9b4255 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.metal import MetalVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in MetalVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/llama_index/vector_stores/myscale/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/llama_index/vector_stores/myscale/base.py index e31d30bb4..8956042c2 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/llama_index/vector_stores/myscale/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/llama_index/vector_stores/myscale/base.py @@ -8,6 +8,7 @@ import json import logging from typing import Any, Dict, List, Optional, cast +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import ( BaseNode, MetadataMode, @@ -17,7 +18,7 @@ from llama_index.core.schema import ( ) from llama_index.core.utils import iter_batch from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -31,7 +32,7 @@ from llama_index.readers.myscale.base import ( logger = logging.getLogger(__name__) -class MyScaleVectorStore(VectorStore): +class MyScaleVectorStore(BasePydanticVectorStore): """MyScale Vector Store. In this vector store, embeddings and docs are stored within an existing @@ -79,12 +80,17 @@ class MyScaleVectorStore(VectorStore): """ stores_text: bool = True - _index_existed: bool = False metadata_column: str = "metadata" AMPLIFY_RATIO_LE5 = 100 AMPLIFY_RATIO_GT5 = 20 AMPLIFY_RATIO_GT50 = 10 + _index_existed: bool = PrivateAttr(False) + _client: Any = PrivateAttr() + _config: MyScaleSettings = PrivateAttr() + _column_config: Dict = PrivateAttr() + _dim: int = PrivateAttr() + def __init__( self, myscale_client: Optional[Any] = None, @@ -103,6 +109,8 @@ class MyScaleVectorStore(VectorStore): `clickhouse_connect` package not found, please run `pip install clickhouse-connect` """ + super().__init__() + try: from clickhouse_connect.driver.httpclient import HttpClient except ImportError: @@ -112,7 +120,7 @@ class MyScaleVectorStore(VectorStore): raise ValueError("Missing MyScale client!") self._client = cast(HttpClient, myscale_client) - self.config = MyScaleSettings( + self._config = MyScaleSettings( table=table, database=database, index_type=index_type, @@ -124,7 +132,7 @@ class MyScaleVectorStore(VectorStore): ) # schema column name, type, and construct format method - self.column_config: Dict = { + self._column_config: Dict = { "id": {"type": "String", "extract_func": lambda x: x.node_id}, "doc_id": {"type": "String", "extract_func": lambda x: x.ref_doc_id}, "text": { @@ -150,6 +158,11 @@ class MyScaleVectorStore(VectorStore): if embed_dims is not None: self._create_index(embed_dims) + @classmethod + def class_name(cls) -> str: + """Get class name.""" + return "MyScaleVectorStore" + @property def client(self) -> Any: """Get client.""" @@ -157,19 +170,20 @@ class MyScaleVectorStore(VectorStore): def _create_index(self, dimension: int) -> None: index_params = ( - ", " + ",".join([f"'{k}={v}'" for k, v in self.config.index_params.items()]) - if self.config.index_params + ", " + + ",".join([f"'{k}={v}'" for k, v in self._config.index_params.items()]) + if self._config.index_params else "" ) schema_ = f""" - CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}( - {",".join([f'{k} {v["type"]}' for k, v in self.column_config.items()])}, + CREATE TABLE IF NOT EXISTS {self._config.database}.{self._config.table}( + {",".join([f'{k} {v["type"]}' for k, v in self._column_config.items()])}, CONSTRAINT vector_length CHECK length(vector) = {dimension}, - VECTOR INDEX {self.config.table}_index vector TYPE - {self.config.index_type}('metric_type={self.config.metric}'{index_params}) + VECTOR INDEX {self._config.table}_index vector TYPE + {self._config.index_type}('metric_type={self._config.metric}'{index_params}) ) ENGINE = MergeTree ORDER BY id """ - self.dim = dimension + self._dim = dimension self._client.command("SET allow_experimental_object_type=1") self._client.command(schema_) self._index_existed = True @@ -183,14 +197,14 @@ class MyScaleVectorStore(VectorStore): item_value_str = ",".join( [ f"'{column['extract_func'](item)}'" - for column in self.column_config.values() + for column in self._column_config.values() ] ) _data.append(f"({item_value_str})") return f""" INSERT INTO TABLE - {self.config.database}.{self.config.table}({",".join(self.column_config.keys())}) + {self._config.database}.{self._config.table}({",".join(self._column_config.keys())}) VALUES {','.join(_data)} """ @@ -199,7 +213,7 @@ class MyScaleVectorStore(VectorStore): self, stage_one_sql: str, query_str: str, similarity_top_k: int ) -> str: terms_pattern = [f"(?i){x}" for x in query_str.split(" ")] - column_keys = self.column_config.keys() + column_keys = self._column_config.keys() return ( f"SELECT {','.join(filter(lambda k: k != 'vector', column_keys))}, " f"dist FROM ({stage_one_sql}) tempt " @@ -241,7 +255,7 @@ class MyScaleVectorStore(VectorStore): if not self._index_existed: self._create_index(len(nodes[0].get_embedding())) - for result_batch in iter_batch(nodes, self.config.batch_size): + for result_batch in iter_batch(nodes, self._config.batch_size): insert_statement = self._build_insert_statement(values=result_batch) self._client.command(insert_statement) @@ -256,14 +270,14 @@ class MyScaleVectorStore(VectorStore): """ self._client.command( - f"DELETE FROM {self.config.database}.{self.config.table} " + f"DELETE FROM {self._config.database}.{self._config.table} " f"where doc_id='{ref_doc_id}'" ) def drop(self) -> None: """Drop MyScale Index and table.""" self._client.command( - f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}" + f"DROP TABLE IF EXISTS {self._config.database}.{self._config.table}" ) def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: @@ -285,7 +299,7 @@ class MyScaleVectorStore(VectorStore): ) # build query sql - query_statement = self.config.build_query_statement( + query_statement = self._config.build_query_statement( query_embed=query_embedding, where_str=where_str, limit=query.similarity_top_k, @@ -297,7 +311,7 @@ class MyScaleVectorStore(VectorStore): if query.similarity_top_k > 50: amplify_ratio = self.AMPLIFY_RATIO_GT50 query_statement = self._build_hybrid_search_statement( - self.config.build_query_statement( + self._config.build_query_statement( query_embed=query_embedding, where_str=where_str, limit=query.similarity_top_k * amplify_ratio, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/pyproject.toml index 7a4970df2..1b5743146 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-myscale" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/tests/test_vector_stores_myscale.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/tests/test_vector_stores_myscale.py index c44b916fc..e2b2d0124 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/tests/test_vector_stores_myscale.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-myscale/tests/test_vector_stores_myscale.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.myscale import MyScaleVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in MyScaleVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/llama_index/vector_stores/neo4jvector/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/llama_index/vector_stores/neo4jvector/base.py index 61c48d73a..c69f1d901 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/llama_index/vector_stores/neo4jvector/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/llama_index/vector_stores/neo4jvector/base.py @@ -2,9 +2,11 @@ from typing import Any, Dict, List, Optional, Tuple import logging import neo4j + +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, FilterOperator, @@ -178,7 +180,7 @@ def construct_metadata_filter(filters: MetadataFilters): return (" AND ".join(collected_snippets[0]), collected_snippets[1]) -class Neo4jVectorStore(VectorStore): +class Neo4jVectorStore(BasePydanticVectorStore): """Neo4j Vector Store. Examples: @@ -200,6 +202,21 @@ class Neo4jVectorStore(VectorStore): stores_text: bool = True flat_metadata = True + distance_strategy: str + index_name: str + keyword_index_name: str + hybrid_search: bool + node_label: str + embedding_node_property: str + text_node_property: str + retrieval_query: str + embedding_dimension: int + + _driver: neo4j.GraphDatabase.driver = PrivateAttr() + _database: str = PrivateAttr() + _support_metadata_filter: bool = PrivateAttr() + _is_enterprise: bool = PrivateAttr() + def __init__( self, username: str, @@ -217,6 +234,18 @@ class Neo4jVectorStore(VectorStore): retrieval_query: str = "", **kwargs: Any, ) -> None: + super().__init__( + distance_strategy=distance_strategy, + index_name=index_name, + keyword_index_name=keyword_index_name, + hybrid_search=hybrid_search, + node_label=node_label, + embedding_node_property=embedding_node_property, + text_node_property=text_node_property, + retrieval_query=retrieval_query, + embedding_dimension=embedding_dimension, + ) + if distance_strategy not in ["cosine", "euclidean"]: raise ValueError("distance_strategy must be either 'euclidean' or 'cosine'") @@ -251,16 +280,6 @@ class Neo4jVectorStore(VectorStore): [index_name, node_label, embedding_node_property, text_node_property], ) - self.distance_strategy = distance_strategy - self.index_name = index_name - self.keyword_index_name = keyword_index_name - self.hybrid_search = hybrid_search - self.node_label = node_label - self.embedding_node_property = embedding_node_property - self.text_node_property = text_node_property - self.retrieval_query = retrieval_query - self.embedding_dimension = embedding_dimension - index_already_exists = self.retrieve_existing_index() if not index_already_exists: self.create_new_index() @@ -301,9 +320,9 @@ class Neo4jVectorStore(VectorStore): # Flag for metadata filtering metadata_target_version = (5, 18, 0) if version_tuple < metadata_target_version: - self.support_metadata_filter = False + self._support_metadata_filter = False else: - self.support_metadata_filter = True + self._support_metadata_filter = True # Flag for enterprise self._is_enterprise = db_data[0]["edition"] == "enterprise" @@ -458,7 +477,7 @@ class Neo4jVectorStore(VectorStore): def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: if query.filters: # Verify that 5.18 or later is used - if not self.support_metadata_filter: + if not self._support_metadata_filter: raise ValueError( "Metadata filtering is only supported in " "Neo4j version 5.18 or greater" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/pyproject.toml index f495b8ece..86201d71f 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-neo4jvector" readme = "README.md" -version = "0.1.4" +version = "0.1.5" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/tests/test_vector_stores_neo4jvector.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/tests/test_vector_stores_neo4jvector.py index 814f59f30..d10191f94 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/tests/test_vector_stores_neo4jvector.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neo4jvector/tests/test_vector_stores_neo4jvector.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.neo4jvector import Neo4jVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in Neo4jVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/llama_index/vector_stores/neptune/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/llama_index/vector_stores/neptune/base.py index 06c343e48..0b2d18e5d 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/llama_index/vector_stores/neptune/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/llama_index/vector_stores/neptune/base.py @@ -3,9 +3,11 @@ from typing import Any, Dict, List, Optional import json import logging from typing import Union + +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -36,10 +38,19 @@ class NeptuneVectorQueryException(Exception): return self.details -class NeptuneAnalyticsVectorStore(VectorStore): +class NeptuneAnalyticsVectorStore(BasePydanticVectorStore): stores_text: bool = True flat_metadata = True + node_label: str + graph_identifier: str + embedding_dimension: int + text_node_property: str + hybrid_search: bool + retrieval_query: Optional[str] + + _client: Any = PrivateAttr() + def __init__( self, graph_identifier: str, @@ -54,12 +65,15 @@ class NeptuneAnalyticsVectorStore(VectorStore): **kwargs: Any, ) -> None: """Create a new Neptune Analytics graph wrapper instance.""" - self.node_label = node_label - self.graph_identifier = graph_identifier - self.embedding_dimension = embedding_dimension - self.text_node_property = text_node_property - self.hybrid_search = hybrid_search - self.retrieval_query = retrieval_query + super().__init__( + graph_identifier=graph_identifier, + embedding_dimension=embedding_dimension, + node_label=node_label, + text_node_property=text_node_property, + hybrid_search=hybrid_search, + retrieval_query=retrieval_query, + ) + try: if client is not None: self._client = client @@ -119,6 +133,10 @@ class NeptuneAnalyticsVectorStore(VectorStore): f"Vector search index does not exist for the Neptune Analytics graph." ) + @classmethod + def class_name(cls) -> str: + return "NeptuneAnalyticsVectorStore" + def database_query( self, query: str, params: Optional[dict] = None ) -> List[Dict[str, Any]]: diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/pyproject.toml index 77500cf6d..b4ea1e16a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-vector-stores-neptune" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/tests/test_vector_stores_neptune.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/tests/test_vector_stores_neptune.py index 9ef3fa726..4d40bf06b 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/tests/test_vector_stores_neptune.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-neptune/tests/test_vector_stores_neptune.py @@ -1,12 +1,7 @@ -from unittest.mock import MagicMock, patch - -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.neptune import NeptuneAnalyticsVectorStore -@patch("llama_index.vector_stores.neptune.NeptuneAnalyticsVectorStore") -def test_neptune_analytics_vector_store(MockNeptuneAnalyticsGraphStore: MagicMock): - instance: NeptuneAnalyticsVectorStore = ( - MockNeptuneAnalyticsGraphStore.return_value() - ) - assert isinstance(instance, VectorStore) +def test_neptune_analytics_vector_store(): + names_of_base_classes = [b.__name__ for b in NeptuneAnalyticsVectorStore.__mro__] + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py index 4cbfe3997..c70db5410 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py @@ -111,8 +111,8 @@ class QdrantVectorStore(BasePydanticVectorStore): client_kwargs: dict = Field(default_factory=dict) enable_hybrid: bool - _client: Any = PrivateAttr() - _aclient: Any = PrivateAttr() + _client: qdrant_client.QdrantClient = PrivateAttr() + _aclient: qdrant_client.AsyncQdrantClient = PrivateAttr() _collection_initialized: bool = PrivateAttr() _sparse_doc_fn: Optional[SparseEncoderCallable] = PrivateAttr() _sparse_query_fn: Optional[SparseEncoderCallable] = PrivateAttr() @@ -273,6 +273,86 @@ class QdrantVectorStore(BasePydanticVectorStore): return points, ids + def get_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """ + Get nodes from the index. + + Args: + node_ids (Optional[List[str]]): List of node IDs to retrieve. + filters (Optional[MetadataFilters]): Metadata filters to apply. + + Returns: + List[BaseNode]: List of nodes retrieved from the index. + """ + should = [] + if node_ids is not None: + should = [ + HasIdCondition( + has_id=node_ids, + ) + ] + + if filters is not None: + filter = self._build_subfilter(filters) + if filter.should is None: + filter.should = should + else: + filter.should.extend(should) + else: + filter = Filter(should=should) + + response = self._client.scroll( + collection_name=self.collection_name, + limit=9999, + scroll_filter=filter, + ) + + return self.parse_to_query_result(response[0]).nodes + + async def aget_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + ) -> List[BaseNode]: + """ + Asynchronous method to get nodes from the index. + + Args: + node_ids (Optional[List[str]]): List of node IDs to retrieve. + filters (Optional[MetadataFilters]): Metadata filters to apply. + + Returns: + List[BaseNode]: List of nodes retrieved from the index. + """ + should = [] + if node_ids is not None: + should = [ + HasIdCondition( + has_id=node_ids, + ) + ] + + if filters is not None: + filter = self._build_subfilter(filters) + if filter.should is None: + filter.should = should + else: + filter.should.extend(should) + else: + filter = Filter(should=should) + + response = await self._aclient.scroll( + collection_name=self.collection_name, + limit=9999, + scroll_filter=filter, + ) + + return self.parse_to_query_result(response[0]).nodes + def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: """ Add nodes to index. @@ -374,6 +454,90 @@ class QdrantVectorStore(BasePydanticVectorStore): ), ) + def delete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + """ + Delete nodes using with node_ids. + + Args: + node_ids (Optional[List[str]): List of node IDs to delete. + filters (Optional[MetadataFilters]): Metadata filters to apply. + """ + should = [] + if node_ids is not None: + should = [ + HasIdCondition( + has_id=node_ids, + ) + ] + + if filters is not None: + filter = self._build_subfilter(filters) + if filter.should is None: + filter.should = should + else: + filter.should.extend(should) + else: + filter = Filter(should=should) + + self._client.delete( + collection_name=self.collection_name, + points_selector=filter, + ) + + async def adelete_nodes( + self, + node_ids: Optional[List[str]] = None, + filters: Optional[MetadataFilters] = None, + **delete_kwargs: Any, + ) -> None: + """ + Asynchronous method to delete nodes using with node_ids. + + Args: + node_ids (Optional[List[str]): List of node IDs to delete. + filters (Optional[MetadataFilters]): Metadata filters to apply. + """ + should = [] + if node_ids is not None: + should = [ + HasIdCondition( + has_id=node_ids, + ) + ] + + if filters is not None: + filter = self._build_subfilter(filters) + if filter.should is None: + filter.should = should + else: + filter.should.extend(should) + else: + filter = Filter(should=should) + + await self._aclient.delete( + collection_name=self.collection_name, + points_selector=filter, + ) + + def clear(self) -> None: + """ + Clear the index. + """ + self._client.delete_collection(collection_name=self.collection_name) + self._collection_initialized = False + + async def aclear(self) -> None: + """ + Asynchronous method to clear the index. + """ + await self._aclient.delete_collection(collection_name=self.collection_name) + self._collection_initialized = False + @property def client(self) -> Any: """Return the Qdrant client.""" @@ -766,8 +930,12 @@ class QdrantVectorStore(BasePydanticVectorStore): relationships=relationships, ) nodes.append(node) - similarities.append(point.score) ids.append(str(point.id)) + try: + similarities.append(point.score) + except AttributeError: + # certain requests do not return a score + similarities.append(1.0) return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml index a41bd93a9..c2d814669 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml @@ -46,6 +46,7 @@ mypy = "0.991" pre-commit = "3.2.0" pylint = "2.15.10" pytest = "7.2.1" +pytest-asyncio = "*" pytest-mock = "3.11.1" ruff = "0.0.292" tree-sitter-languages = "^1.8.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/BUILD index dabf212d7..45d59ac82 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/BUILD +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/BUILD @@ -1 +1,5 @@ python_tests() + +python_test_utils( + name="test_utils", +) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/conftest.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/conftest.py new file mode 100644 index 000000000..a472bdce8 --- /dev/null +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/conftest.py @@ -0,0 +1,38 @@ +from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode +from llama_index.vector_stores.qdrant import QdrantVectorStore +import qdrant_client +import pytest_asyncio + + +@pytest_asyncio.fixture +async def vector_store() -> QdrantVectorStore: + client = qdrant_client.QdrantClient(":memory:") + aclient = qdrant_client.AsyncQdrantClient(":memory:") + vector_store = QdrantVectorStore("test", client=client, aclient=aclient) + + nodes = [ + TextNode( + text="test1", + id_="11111111-1111-1111-1111-111111111111", + embedding=[1.0, 0.0], + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="test-0")}, + ), + TextNode( + text="test2", + id_="22222222-2222-2222-2222-222222222222", + embedding=[0.0, 1.0], + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="test-0")}, + ), + TextNode( + text="test3", + id_="33333333-3333-3333-3333-333333333333", + embedding=[1.0, 1.0], + ), + ] + + vector_store.add(nodes) + + # in-memory client does not share data between instances + await vector_store.async_add(nodes) + + return vector_store diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py index f069f7836..2cca8ac3c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py @@ -1,7 +1,60 @@ from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.qdrant import QdrantVectorStore +import pytest def test_class(): names_of_base_classes = [b.__name__ for b in QdrantVectorStore.__mro__] assert BasePydanticVectorStore.__name__ in names_of_base_classes + + +def test_delete__and_get_nodes(vector_store: QdrantVectorStore) -> None: + vector_store.delete_nodes(node_ids=["11111111-1111-1111-1111-111111111111"]) + + existing_nodes = vector_store.get_nodes( + node_ids=[ + "11111111-1111-1111-1111-111111111111", + "22222222-2222-2222-2222-222222222222", + "33333333-3333-3333-3333-333333333333", + ] + ) + assert len(existing_nodes) == 2 + + +def test_clear(vector_store: QdrantVectorStore) -> None: + vector_store.clear() + with pytest.raises(ValueError, match="Collection test not found"): + vector_store.get_nodes( + node_ids=[ + "11111111-1111-1111-1111-111111111111", + "22222222-2222-2222-2222-222222222222", + "33333333-3333-3333-3333-333333333333", + ] + ) + + +@pytest.mark.asyncio() +async def test_adelete_and_aget(vector_store: QdrantVectorStore) -> None: + await vector_store.adelete_nodes(node_ids=["11111111-1111-1111-1111-111111111111"]) + + existing_nodes = await vector_store.aget_nodes( + node_ids=[ + "11111111-1111-1111-1111-111111111111", + "22222222-2222-2222-2222-222222222222", + "33333333-3333-3333-3333-333333333333", + ] + ) + assert len(existing_nodes) == 2 + + +@pytest.mark.asyncio() +async def test_aclear(vector_store: QdrantVectorStore) -> None: + await vector_store.aclear() + with pytest.raises(ValueError, match="Collection test not found"): + await vector_store.aget_nodes( + node_ids=[ + "11111111-1111-1111-1111-111111111111", + "22222222-2222-2222-2222-222222222222", + "33333333-3333-3333-3333-333333333333", + ] + ) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/llama_index/vector_stores/rocksetdb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/llama_index/vector_stores/rocksetdb/base.py index e976061c6..af2c1b892 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/llama_index/vector_stores/rocksetdb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/llama_index/vector_stores/rocksetdb/base.py @@ -9,7 +9,7 @@ from typing import Any, List, Type, TypeVar import rockset from llama_index.core.schema import BaseNode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -56,7 +56,7 @@ def _get_client(api_key: str | None, api_server: str | None, client: Any | None) return client -class RocksetVectorStore(VectorStore): +class RocksetVectorStore(BasePydanticVectorStore): """Rockset Vector Store. Examples: @@ -86,6 +86,16 @@ class RocksetVectorStore(VectorStore): EUCLIDEAN_DIST = "EUCLIDEAN_DIST" DOT_PRODUCT = "DOT_PRODUCT" + rockset: ModuleType + rs: Any + workspace: str + collection: str + text_key: str + embedding_col: str + metadata_col: str + distance_func: DistanceFunc + distance_order: str + def __init__( self, collection: str, @@ -117,16 +127,18 @@ class RocksetVectorStore(VectorStore): vector relationship (default: RocksetVectorStore.DistanceFunc.COSINE_SIM) """ - self.rockset = _get_rockset() - self.rs = _get_client(api_key, api_server, client) - self.workspace = workspace - self.collection = collection - self.text_key = text_key - self.embedding_col = embedding_col - self.metadata_col = metadata_col - self.distance_func = distance_func - self.distance_order = ( - "ASC" if distance_func is distance_func.EUCLIDEAN_DIST else "DESC" + super().__init__( + rockset=_get_rockset(), + rs=_get_client(api_key, api_server, client), + collection=collection, + text_key=text_key, + embedding_col=embedding_col, + metadata_col=metadata_col, + workspace=workspace, + distance_func=distance_func, + distance_order=( + "ASC" if distance_func is distance_func.EUCLIDEAN_DIST else "DESC" + ), ) try: @@ -136,6 +148,10 @@ class RocksetVectorStore(VectorStore): # rockset version < 2.1.0 pass + @classmethod + def class_name(cls) -> str: + return "RocksetVectorStore" + @property def client(self) -> Any: return self.rs diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/pyproject.toml index 763cb1b3a..4db97eed2 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-rocksetdb" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/tests/test_vector_stores_rocksetdb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/tests/test_vector_stores_rocksetdb.py index 92d0f2459..08c737ed3 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/tests/test_vector_stores_rocksetdb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-rocksetdb/tests/test_vector_stores_rocksetdb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.rocksetdb import RocksetVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in RocksetVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/llama_index/vector_stores/singlestoredb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/llama_index/vector_stores/singlestoredb/base.py index ca3de1111..bff4d0d87 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/llama_index/vector_stores/singlestoredb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/llama_index/vector_stores/singlestoredb/base.py @@ -4,7 +4,7 @@ from typing import Any, List, Optional, Sequence from llama_index.core.schema import BaseNode, MetadataMode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -19,7 +19,7 @@ import singlestoredb as s2 logger = logging.getLogger(__name__) -class SingleStoreVectorStore(VectorStore): +class SingleStoreVectorStore(BasePydanticVectorStore): """SingleStore vector store. This vector store stores embeddings within a SingleStore database table. @@ -80,6 +80,16 @@ class SingleStoreVectorStore(VectorStore): stores_text: bool = True flat_metadata: bool = True + table_name: str + content_field: str + metadata_field: str + vector_field: str + pool_size: int + max_overflow: int + timeout: float + connection_kwargs: dict + connection_pool: QueuePool + def __init__( self, table_name: str = "embeddings", @@ -92,20 +102,21 @@ class SingleStoreVectorStore(VectorStore): **kwargs: Any, ) -> None: """Init params.""" - self.table_name = table_name - self.content_field = content_field - self.metadata_field = metadata_field - self.vector_field = vector_field - self.pool_size = pool_size - self.max_overflow = max_overflow - self.timeout = timeout - - self.connection_kwargs = kwargs - self.connection_pool = QueuePool( - self._get_connection, - pool_size=self.pool_size, - max_overflow=self.max_overflow, - timeout=self.timeout, + super().__init__( + table_name=table_name, + content_field=content_field, + metadata_field=metadata_field, + vector_field=vector_field, + pool_size=pool_size, + max_overflow=max_overflow, + timeout=timeout, + connection_kwargs=kwargs, + connection_pool=QueuePool( + self._get_connection, + pool_size=pool_size, + max_overflow=max_overflow, + timeout=timeout, + ), ) self._create_table() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/pyproject.toml index 2de8876ae..de46c068b 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-singlestoredb" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/tests/test_vector_stores_singlestoredb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/tests/test_vector_stores_singlestoredb.py index 0b55469bf..d64518d48 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/tests/test_vector_stores_singlestoredb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-singlestoredb/tests/test_vector_stores_singlestoredb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.singlestoredb import SingleStoreVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in SingleStoreVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/llama_index/vector_stores/tair/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/llama_index/vector_stores/tair/base.py index 441795064..b4975d52c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/llama_index/vector_stores/tair/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/llama_index/vector_stores/tair/base.py @@ -6,6 +6,7 @@ An index that is built on top of Alibaba Cloud's Tair database. import logging from typing import Any, Dict, List, Optional +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import ( BaseNode, MetadataMode, @@ -15,7 +16,7 @@ from llama_index.core.schema import ( ) from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -36,7 +37,7 @@ def _to_filter_expr(filters: MetadataFilters) -> str: return "&&".join(conditions) -class TairVectorStore(VectorStore): +class TairVectorStore(BasePydanticVectorStore): """Initialize TairVectorStore. Two index types are available: FLAT & HNSW. @@ -83,6 +84,15 @@ class TairVectorStore(VectorStore): stores_node = True flat_metadata = False + _tair_client: Tair = PrivateAttr() + _index_name: str = PrivateAttr() + _index_type: str = PrivateAttr() + _metric_type: str = PrivateAttr() + _overwrite: bool = PrivateAttr() + _index_args: Dict[str, Any] = PrivateAttr() + _query_args: Dict[str, Any] = PrivateAttr() + _dim: int = PrivateAttr() + def __init__( self, tair_url: str, @@ -117,6 +127,11 @@ class TairVectorStore(VectorStore): self._index_args = {"ef_construct": ef_construct, "M": M} self._query_args = {"ef_search": ef_search} + @classmethod + def class_name(cls) -> str: + """Class name.""" + return "TairVectorStore" + @property def client(self) -> "Tair": """Return the Tair client instance.""" @@ -136,7 +151,7 @@ class TairVectorStore(VectorStore): return [] # set vector dim for creation if index doesn't exist - self.dim = len(nodes[0].get_embedding()) + self._dim = len(nodes[0].get_embedding()) if self._index_exists(): if self._overwrite: @@ -251,7 +266,7 @@ class TairVectorStore(VectorStore): _logger.info(f"Creating index {self._index_name}") self._tair_client.tvs_create_index( self._index_name, - self.dim, + self._dim, distance_type=self._metric_type, index_type=self._index_type, data_type=tairvector.DataType.Float32, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/pyproject.toml index e72c4a9d5..71fa44459 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-tair" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/tests/test_vector_stores_tair.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/tests/test_vector_stores_tair.py index 6a067f79f..f7af56985 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/tests/test_vector_stores_tair.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tair/tests/test_vector_stores_tair.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.tair import TairVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in TairVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/llama_index/vector_stores/tencentvectordb/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/llama_index/vector_stores/tencentvectordb/base.py index f593d96f1..ffe736ef7 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/llama_index/vector_stores/tencentvectordb/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/llama_index/vector_stores/tencentvectordb/base.py @@ -7,6 +7,7 @@ An index that is built with Tencent Vector Database. import json from typing import Any, Dict, List, Optional +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import ( BaseNode, NodeRelationship, @@ -14,7 +15,7 @@ from llama_index.core.schema import ( TextNode, ) from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -130,8 +131,8 @@ class CollectionParams: vector_params: Optional[Dict] = None, filter_fields: Optional[List[FilterField]] = [], ): - self.collection_name = collection_name - self.collection_description = collection_description + self._collection_name = collection_name + self._collection_description = collection_description self.dimension = dimension self.shard = shard self.replicas = replicas @@ -139,10 +140,10 @@ class CollectionParams: self.metric_type = metric_type self.vector_params = vector_params self.drop_exists = drop_exists - self.filter_fields = filter_fields or [] + self._filter_fields = filter_fields or [] -class TencentVectorDB(VectorStore): +class TencentVectorDB(BasePydanticVectorStore): """Tencent Vector Store. In this vector store, embeddings and docs are stored within a Collection. @@ -177,6 +178,12 @@ class TencentVectorDB(VectorStore): stores_text: bool = True filter_fields: List[FilterField] = [] + batch_size: int + _tencent_client: Any = PrivateAttr() + _database: Any = PrivateAttr() + _collection: Any = PrivateAttr() + _filter_fields: List[FilterField] = PrivateAttr() + def __init__( self, url: str, @@ -189,17 +196,17 @@ class TencentVectorDB(VectorStore): **kwargs: Any, ): """Init params.""" + super().__init__(batch_size=batch_size) self._init_client(url, username, key, read_consistency) self._create_database_if_not_exists(database_name) self._create_collection(database_name, collection_params) self._init_filter_fields() - self.batch_size = batch_size def _init_filter_fields(self) -> None: - fields = vars(self.collection).get("indexes", []) + fields = vars(self._collection).get("indexes", []) for field in fields: if field["fieldName"] not in [FIELD_ID, DEFAULT_DOC_ID_KEY, FIELD_VECTOR]: - self.filter_fields.append( + self._filter_fields.append( FilterField(name=field["fieldName"], data_type=field["fieldType"]) ) @@ -247,7 +254,7 @@ class TencentVectorDB(VectorStore): VALUE_RANGE_ERROR.format(READ_CONSISTENCY, READ_CONSISTENCY_VALUES) ) - self.tencent_client = tcvectordb.VectorDBClient( + self._tencent_client = tcvectordb.VectorDBClient( url=url, username=username, key=key, @@ -256,12 +263,12 @@ class TencentVectorDB(VectorStore): ) def _create_database_if_not_exists(self, database_name: str) -> None: - db_list = self.tencent_client.list_databases() + db_list = self._tencent_client.list_databases() if database_name in [db.database_name for db in db_list]: - self.database = self.tencent_client.database(database_name) + self._database = self._tencent_client.database(database_name) else: - self.database = self.tencent_client.create_database(database_name) + self._database = self._tencent_client.create_database(database_name) def _create_collection( self, database_name: str, collection_params: CollectionParams @@ -277,9 +284,9 @@ class TencentVectorDB(VectorStore): raise ValueError(VALUE_NONE_ERROR.format("collection_params")) try: - self.collection = self.database.describe_collection(collection_name) + self._collection = self._database.describe_collection(collection_name) if collection_params.drop_exists: - self.database.drop_collection(collection_name) + self._database.drop_collection(collection_name) self._create_collection_in_db( collection_name, collection_description, collection_params ) @@ -333,7 +340,7 @@ class TencentVectorDB(VectorStore): for field in collection_params.filter_fields: index.add(field.to_vdb_filter()) - self.collection = self.database.create_collection( + self._collection = self._database.create_collection( name=collection_name, shard=collection_params.shard, replicas=collection_params.replicas, @@ -412,7 +419,7 @@ class TencentVectorDB(VectorStore): @property def client(self) -> Any: """Get client.""" - return self.tencent_client + return self._tencent_client def add( self, @@ -435,7 +442,7 @@ class TencentVectorDB(VectorStore): document.__dict__[DEFAULT_DOC_ID_KEY] = node.ref_doc_id if node.metadata is not None: document.__dict__[FIELD_METADATA] = json.dumps(node.metadata) - for field in self.filter_fields: + for field in self._filter_fields: v = node.metadata.get(field.name) if field.match_value(v): document.__dict__[field.name] = v @@ -446,13 +453,13 @@ class TencentVectorDB(VectorStore): ids.append(node.node_id) if len(entries) >= self.batch_size: - self.collection.upsert( + self._collection.upsert( documents=entries, build_index=True, timeout=DEFAULT_TIMEOUT ) entries = [] if len(entries) > 0: - self.collection.upsert( + self._collection.upsert( documents=entries, build_index=True, timeout=DEFAULT_TIMEOUT ) @@ -472,16 +479,18 @@ class TencentVectorDB(VectorStore): from tcvectordb.model.document import Filter delete_ids = ref_doc_id if isinstance(ref_doc_id, list) else [ref_doc_id] - self.collection.delete(filter=Filter(Filter.In(DEFAULT_DOC_ID_KEY, delete_ids))) + self._collection.delete( + filter=Filter(Filter.In(DEFAULT_DOC_ID_KEY, delete_ids)) + ) def query_by_ids(self, ids: List[str]) -> List[Dict]: - return self.collection.query(document_ids=ids, limit=len(ids)) + return self._collection.query(document_ids=ids, limit=len(ids)) def truncate(self) -> None: - self.database.truncate_collection(self.collection.collection_name) + self._database.truncate_collection(self._collection.collection_name) def describe_collection(self) -> Any: - return self.database.describe_collection(self.collection.collection_name) + return self._database.describe_collection(self._collection.collection_name) def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: """Query index for top k most similar nodes. @@ -502,7 +511,7 @@ class TencentVectorDB(VectorStore): using filter: `doc_id in (query.doc_ids)` """ search_filter = self._to_vdb_filter(query, **kwargs) - results = self.collection.search( + results = self._collection.search( vectors=[query.query_embedding], limit=query.similarity_top_k, retrieve_vector=True, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/pyproject.toml index e6ecaf345..fef446ff9 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-tencentvectordb" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/tests/test_vector_stores_tencentvectordb.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/tests/test_vector_stores_tencentvectordb.py index 6138a9aa4..d194c73eb 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/tests/test_vector_stores_tencentvectordb.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tencentvectordb/tests/test_vector_stores_tencentvectordb.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.tencentvectordb import TencentVectorDB def test_class(): names_of_base_classes = [b.__name__ for b in TencentVectorDB.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/llama_index/vector_stores/timescalevector/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/llama_index/vector_stores/timescalevector/base.py index 24d410b34..2f6c90bc3 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/llama_index/vector_stores/timescalevector/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/llama_index/vector_stores/timescalevector/base.py @@ -3,11 +3,12 @@ import uuid from datetime import timedelta from typing import Any, Dict, List, Optional +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.constants import DEFAULT_EMBEDDING_DIM from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -26,7 +27,7 @@ class IndexType(enum.Enum): PGVECTOR_HNSW = 3 -class TimescaleVectorStore(VectorStore): +class TimescaleVectorStore(BasePydanticVectorStore): """Timescale vector store. Examples: @@ -50,6 +51,14 @@ class TimescaleVectorStore(VectorStore): stores_text = True flat_metadata = False + service_url: str + table_name: str + num_dimensions: int + time_partition_interval: Optional[timedelta] + + _sync_client: client.Sync = PrivateAttr() + _async_client: client.Async = PrivateAttr() + def __init__( self, service_url: str, @@ -57,14 +66,22 @@ class TimescaleVectorStore(VectorStore): num_dimensions: int = DEFAULT_EMBEDDING_DIM, time_partition_interval: Optional[timedelta] = None, ) -> None: - self.service_url = service_url - self.table_name: str = table_name.lower() - self.num_dimensions = num_dimensions - self.time_partition_interval = time_partition_interval + table_name = table_name.lower() + + super().__init__( + service_url=service_url, + table_name=table_name, + num_dimensions=num_dimensions, + time_partition_interval=time_partition_interval, + ) self._create_clients() self._create_tables() + @classmethod + def class_name(cls) -> str: + return "TimescaleVectorStore" + async def close(self) -> None: self._sync_client.close() await self._async_client.close() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/pyproject.toml index 127589680..7a8702383 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-timescalevector/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-timescalevector" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/llama_index/vector_stores/typesense/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/llama_index/vector_stores/typesense/base.py index ed6e52f0b..07e9496be 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/llama_index/vector_stores/typesense/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/llama_index/vector_stores/typesense/base.py @@ -7,11 +7,12 @@ An index that is built on top of an existing vector store. import logging from typing import Any, Callable, List, Optional, cast +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.utils import get_tokenizer from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -34,7 +35,7 @@ DEFAULT_BATCH_SIZE = 100 DEFAULT_METADATA_KEY = "metadata" -class TypesenseVectorStore(VectorStore): +class TypesenseVectorStore(BasePydanticVectorStore): """Typesense Vector Store. In this vector store, embeddings and docs are stored within a @@ -72,6 +73,14 @@ class TypesenseVectorStore(VectorStore): is_embedding_query: bool = False flat_metadata: bool = False + _tokenizer: Callable[[str], List] = PrivateAttr() + _text_key: str = PrivateAttr() + _collection_name: str = PrivateAttr() + _collection: Any = PrivateAttr() + _batch_size: int = PrivateAttr() + _metadata_key: str = PrivateAttr() + _client: typesense.Client = PrivateAttr() + def __init__( self, client: Any, @@ -83,6 +92,8 @@ class TypesenseVectorStore(VectorStore): **kwargs: Any, ) -> None: """Initialize params.""" + super().__init__() + if client is not None: if not isinstance(client, typesense.Client): raise ValueError( @@ -97,6 +108,11 @@ class TypesenseVectorStore(VectorStore): self._batch_size = batch_size self._metadata_key = metadata_key + @classmethod + def class_name(cls) -> str: + """Class name.""" + return "TypesenseVectorStore" + @property def client(self) -> Any: """Return Typesense client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/pyproject.toml index 304c01b05..bf670dbea 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-typesense" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/tests/test_vector_stores_typesense.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/tests/test_vector_stores_typesense.py index 7da11f688..5975e286c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/tests/test_vector_stores_typesense.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-typesense/tests/test_vector_stores_typesense.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.typesense import TypesenseVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in TypesenseVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py index 5cfea6ae4..5c8e81266 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py @@ -9,10 +9,11 @@ https://upstash.com/docs/vector/overall/getstarted import logging from typing import Any, List +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode from llama_index.core.utils import iter_batch from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -24,6 +25,7 @@ from llama_index.core.vector_stores.utils import ( metadata_dict_to_node, node_to_metadata_dict, ) +from upstash_vector import Index logger = logging.getLogger(__name__) @@ -86,7 +88,7 @@ def _to_upstash_filters(filters: MetadataFilters) -> str: # print(combined_filters) -class UpstashVectorStore(VectorStore): +class UpstashVectorStore(BasePydanticVectorStore): """Upstash Vector Store. Examples: @@ -106,6 +108,9 @@ class UpstashVectorStore(VectorStore): stores_text: bool = True flat_metadata: bool = False + batch_size: int + _index: Index = PrivateAttr() + @classmethod def class_name(cls) -> str: return "UpstashVectorStore" @@ -129,15 +134,7 @@ class UpstashVectorStore(VectorStore): Raises: ImportError: If the upstash-vector python package is not installed. """ - self.batch_size = batch_size - - try: - from upstash_vector import Index - except ImportError: - raise ImportError( - "Could not import upstash_vector.Index, Please install it with `pip install upstash-vector`" - ) - + super().__init__(batch_size=batch_size) self._index = Index(url=url, token=token) def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml index 8c0737e75..8996434e1 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-upstash" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_vector_stores_upstash.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_vector_stores_upstash.py index 4c0596005..0cd35a4a4 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_vector_stores_upstash.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_vector_stores_upstash.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.upstash import UpstashVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in UpstashVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/llama_index/vector_stores/vearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/llama_index/vector_stores/vearch/base.py index e0c41f2b3..2cc6e1cce 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/llama_index/vector_stores/vearch/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/llama_index/vector_stores/vearch/base.py @@ -5,9 +5,10 @@ from typing import Any, Iterable, List, Optional import numpy as np +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -16,11 +17,14 @@ from llama_index.core.vector_stores.utils import ( metadata_dict_to_node, node_to_metadata_dict, ) +import vearch_cluster logger = logging.getLogger(__name__) +_DEFAULT_TABLE_NAME = "llama_index_vearch" +_DEFAULT_CLUSTER_DB_NAME = "llama_index_vearch_client_db" -class VearchVectorStore(VectorStore): +class VearchVectorStore(BasePydanticVectorStore): """ Vearch vector store: embeddings are stored within a Vearch table. @@ -34,8 +38,11 @@ class VearchVectorStore(VectorStore): flat_metadata: bool = True stores_text: bool = True - _DEFAULT_TABLE_NAME = "llama_index_vearch" - _DEFAULT_CLUSTER_DB_NAME = "llama_index_vearch_client_db" + + using_db_name: str + using_table_name: str + url: str + _vearch: vearch_cluster.VearchCluster = PrivateAttr() def __init__( self, @@ -45,33 +52,34 @@ class VearchVectorStore(VectorStore): **kwargs: Any, ) -> None: """Initialize vearch vector store.""" - try: - import vearch_cluster - except ImportError: - raise ValueError( - "Could not import suitable python package." - "Please install it with `pip install vearch_cluster." - ) - if path_or_url is None: raise ValueError("Please input url of cluster") + if not db_name: - db_name = self._DEFAULT_CLUSTER_DB_NAME + db_name = _DEFAULT_CLUSTER_DB_NAME db_name += "_" db_name += str(uuid.uuid4()).split("-")[-1] - self.using_db_name = db_name - self.url = path_or_url - self.vearch = vearch_cluster.VearchCluster(path_or_url) + if not table_name: - table_name = self._DEFAULT_TABLE_NAME + table_name = _DEFAULT_TABLE_NAME table_name += "_" table_name += str(uuid.uuid4()).split("-")[-1] - self.using_table_name = table_name + + super().__init__( + using_db_name=db_name, + using_table_name=table_name, + url=path_or_url, + ) + self._vearch = vearch_cluster.VearchCluster(path_or_url) + + @classmethod + def class_name(cls) -> str: + return "VearchVectorStore" @property def client(self) -> Any: """Get client.""" - return self.vearch + return self._vearch def _get_matadata_field(self, metadatas: Optional[List[dict]] = None) -> None: field_list = [] @@ -105,12 +113,12 @@ class VearchVectorStore(VectorStore): if embeddings is None: raise ValueError("embeddings is None") self._get_matadata_field(metadatas) - dbs_list = self.vearch.list_dbs() + dbs_list = self._vearch.list_dbs() if self.using_db_name not in dbs_list: - create_db_code = self.vearch.create_db(self.using_db_name) + create_db_code = self._vearch.create_db(self.using_db_name) if not create_db_code: raise ValueError("create db failed!!!") - space_list = self.vearch.list_spaces(self.using_db_name) + space_list = self._vearch.list_spaces(self.using_db_name) if self.using_table_name not in space_list: create_space_code = self._create_space(len(embeddings[0])) if not create_space_code: @@ -128,14 +136,14 @@ class VearchVectorStore(VectorStore): profiles["text_embedding"] = { "feature": (embed_np / np.linalg.norm(embed_np)).tolist() } - insert_res = self.vearch.insert_one( + insert_res = self._vearch.insert_one( self.using_db_name, self.using_table_name, profiles ) if insert_res["status"] == 200: docid.append(insert_res["_id"]) continue else: - retry_insert = self.vearch.insert_one( + retry_insert = self._vearch.insert_one( self.using_db_name, self.using_table_name, profiles ) docid.append(retry_insert["_id"]) @@ -184,14 +192,14 @@ class VearchVectorStore(VectorStore): tmp_proer[item["field"]] = {"type": type_dict[item["type"]]} space_config["properties"] = tmp_proer - return self.vearch.create_space(self.using_db_name, space_config) + return self._vearch.create_space(self.using_db_name, space_config) def add( self, nodes: List[BaseNode], **add_kwargs: Any, ) -> List[str]: - if not self.vearch: + if not self._vearch: raise ValueError("Vearch Engine is not initialized") embeddings = [] @@ -234,7 +242,7 @@ class VearchVectorStore(VectorStore): for filter_ in query.filters.legacy_filters(): meta_filters[filter_.key] = filter_.value if self.flag: - meta_field_list = self.vearch.get_space( + meta_field_list = self._vearch.get_space( self.using_db_name, self.using_table_name ) meta_field_list.remove("text_embedding") @@ -255,7 +263,7 @@ class VearchVectorStore(VectorStore): "size": k, "fields": meta_field_list, } - query_result = self.vearch.search( + query_result = self._vearch.search( self.using_db_name, self.using_table_name, query_data ) res = query_result["hits"]["hits"] @@ -321,7 +329,7 @@ class VearchVectorStore(VectorStore): }, "size": 10000, } - self.vearch.delete_by_query( + self._vearch.delete_by_query( self, self.using_db_name, self.using_table_name, queries ) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/pyproject.toml index 340fde2d3..f35ac990d 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/pyproject.toml @@ -30,7 +30,7 @@ license = "MIT" name = "llama-index-vector-stores-vearch" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.1.0" +version = "0.1.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/tests/test_vector_stores_vearch.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/tests/test_vector_stores_vearch.py index 60b897a69..9136147d4 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/tests/test_vector_stores_vearch.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-vearch/tests/test_vector_stores_vearch.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.vearch import VearchVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in VearchVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/llama_index/vector_stores/vespa/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/llama_index/vector_stores/vespa/base.py index 74ec8d657..4040e404e 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/llama_index/vector_stores/vespa/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/llama_index/vector_stores/vespa/base.py @@ -2,10 +2,10 @@ from typing import Any, List, Optional, Callable - +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode from llama_index.core.vector_stores.types import ( - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -45,7 +45,7 @@ def callback(response: VespaResponse, id: str): ) -class VespaVectorStore(VectorStore): +class VespaVectorStore(BasePydanticVectorStore): """ Vespa vector store. @@ -104,6 +104,23 @@ class VespaVectorStore(VectorStore): is_embedding_query: bool = False flat_metadata: bool = True + application_package: ApplicationPackage + deployment_target: str + default_schema_name: str + namespace: str + embeddings_outside_vespa: bool + port: int + url: Optional[str] + groupname: Optional[str] + tenant: Optional[str] + application: Optional[str] + key_location: Optional[str] + key_content: Optional[str] + auth_client_token_id: Optional[str] + kwargs: dict + + _app: Vespa = PrivateAttr() + def __init__( self, application_package: ApplicationPackage = hybrid_template, @@ -131,29 +148,36 @@ class VespaVectorStore(VectorStore): "Using default hybrid template. Please make sure that the Vespa application is set up with the correct schema and rank profile." ) # Initialize all parameters - self.application_package = application_package - self.deployment_target = deployment_target - self.default_schema_name = default_schema_name - self.namespace = namespace - self.embeddings_outside_vespa = embeddings_outside_vespa - self.port = port - self.url = url - self.groupname = groupname - self.tenant = tenant - self.application = application - self.key_location = key_location - self.key_content = key_content - self.auth_client_token_id = auth_client_token_id - self.kwargs = kwargs + super().__init__( + application_package=application_package, + namespace=namespace, + default_schema_name=default_schema_name, + deployment_target=deployment_target, + port=port, + embeddings_outside_vespa=embeddings_outside_vespa, + url=url, + groupname=groupname, + tenant=tenant, + application=application, + key_location=key_location, + key_content=key_content, + auth_client_token_id=auth_client_token_id, + kwargs=kwargs, + ) + if self.url is None: - self.app = self._deploy() + self._app = self._deploy() else: - self.app = self._try_get_running_app() + self._app = self._try_get_running_app() + + @classmethod + def class_name(cls) -> str: + return "VespaVectorStore" @property def client(self) -> Vespa: """Get client.""" - return self.app + return self._app def _try_get_running_app(self) -> Vespa: app = Vespa(url=f"{self.url}:{self.port}") @@ -226,7 +250,7 @@ class VespaVectorStore(VectorStore): data_to_insert.append(entry) ids.append(node.node_id) - self.app.feed_iterable( + self._app.feed_iterable( data_to_insert, schema=schema or self.default_schema_name, namespace=self.namespace, @@ -256,7 +280,7 @@ class VespaVectorStore(VectorStore): total_timeout (int): Total timeout for all requests kwargs (Any): Additional kwargs for Vespa application """ - semaphore = asyncio.Semaphore(max_concurrent_requests) + semaphore = asyncio.Semaphore(num_concurrent_requests) ids = [] data_to_insert = [] for node in nodes: @@ -277,9 +301,10 @@ class VespaVectorStore(VectorStore): data_to_insert.append(entry) ids.append(node.node_id) - async with self.app.asyncio( + async with self._app.asyncio( connections=max_connections, total_timeout=total_timeout ) as async_app: + tasks = [] for doc in data_to_insert: async with semaphore: task = asyncio.create_task( @@ -308,7 +333,7 @@ class VespaVectorStore(VectorStore): """ Delete nodes using with ref_doc_id. """ - response: VespaResponse = self.app.delete_data( + response: VespaResponse = self._app.delete_data( schema=self.default_schema_name, namespace=namespace or self.namespace, data_id=ref_doc_id, @@ -449,7 +474,7 @@ class VespaVectorStore(VectorStore): vector_top_k=vector_top_k, ) logger.info(f"Vespa Query body:\n {body}") - with self.app.syncio() as session: + with self._app.syncio() as session: response = session.query( body=body, ) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/pyproject.toml index 9830a544a..0ad6eb674 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-vespa/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-vespa" readme = "README.md" -version = "0.0.1" +version = "0.0.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/llama_index/vector_stores/zep/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/llama_index/vector_stores/zep/base.py index 17fdec948..33bf75d3d 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/llama_index/vector_stores/zep/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/llama_index/vector_stores/zep/base.py @@ -2,10 +2,11 @@ import logging from typing import Any, Dict, List, Optional, Tuple, Union import zep_python +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode from llama_index.core.vector_stores.types import ( MetadataFilters, - VectorStore, + BasePydanticVectorStore, VectorStoreQuery, VectorStoreQueryResult, ) @@ -20,7 +21,7 @@ from zep_python.document import DocumentCollection logger = logging.getLogger(__name__) -class ZepVectorStore(VectorStore): +class ZepVectorStore(BasePydanticVectorStore): """Zep Vector Store for storing and retrieving embeddings. Zep supports both normalized and non-normalized embeddings. Cosine similarity is @@ -57,6 +58,9 @@ class ZepVectorStore(VectorStore): stores_text = True flat_metadata = False + _client: ZepClient = PrivateAttr() + _collection: DocumentCollection = PrivateAttr() + def __init__( self, collection_name: str, @@ -69,13 +73,13 @@ class ZepVectorStore(VectorStore): **kwargs: Any, ) -> None: """Init params.""" + super().__init__() + self._client = ZepClient(base_url=api_url, api_key=api_key) - self._collection: Union[DocumentCollection, None] = None + collection: Union[DocumentCollection, None] = None try: - self._collection = self._client.document.get_collection( - name=collection_name - ) + collection = self._client.document.get_collection(name=collection_name) except zep_python.NotFoundError: if embedding_dimensions is None: raise ValueError( @@ -87,7 +91,7 @@ class ZepVectorStore(VectorStore): f"will try creating one with dimensions={embedding_dimensions}" ) - self._collection = self._client.document.add_collection( + collection = self._client.document.add_collection( name=collection_name, embedding_dimensions=embedding_dimensions, is_auto_embedded=is_auto_embedded, @@ -95,6 +99,13 @@ class ZepVectorStore(VectorStore): metadata=collection_metadata, ) + assert collection is not None + self._collection = collection + + @classmethod + def class_name(cls) -> str: + return "ZepVectorStore" + @property def client(self) -> Any: """Get client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/pyproject.toml index 7d8de98be..ffb0063aa 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-zep" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/tests/test_vector_stores_zep.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/tests/test_vector_stores_zep.py index f6ada5330..58a58f93c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/tests/test_vector_stores_zep.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-zep/tests/test_vector_stores_zep.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.zep import ZepVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in ZepVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py index b389d5054..ee0b0ce90 100644 --- a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py +++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py @@ -78,14 +78,6 @@ def test_query_by_item_name( assert len(results.response) >= 1 and results.response != "None" -def test_get_tool(code_hierarchy_nodes: Sequence[BaseNode]) -> None: - """Test querying the index by signature.""" - index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes) - query = "CodeHierarchyNodeParser" - results = index.as_langchain_tool().run(query) - assert len(results) >= 1 and results != "None" - - def test_query_by_all_uuids(code_hierarchy_nodes: Sequence[BaseNode]) -> None: """Test querying the index by signature.""" index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes) diff --git a/llama-index-packs/llama-index-packs-cohere-citation-chat/tests/BUILD b/llama-index-packs/llama-index-packs-cohere-citation-chat/tests/BUILD index dabf212d7..a1431c235 100644 --- a/llama-index-packs/llama-index-packs-cohere-citation-chat/tests/BUILD +++ b/llama-index-packs/llama-index-packs-cohere-citation-chat/tests/BUILD @@ -1 +1,3 @@ -python_tests() +python_tests( + interpreter_constraints=["==3.10.*"], +) diff --git a/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/base.py b/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/base.py index 4dc1834b8..e2d8e8fe0 100644 --- a/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/base.py +++ b/llama-index-packs/llama-index-packs-raptor/llama_index/packs/raptor/base.py @@ -27,7 +27,7 @@ from llama_index.core.schema import ( from llama_index.core.vector_stores.types import ( MetadataFilter, MetadataFilters, - VectorStore, + BasePydanticVectorStore, ) from llama_index.packs.raptor.clustering import get_clusters @@ -112,7 +112,7 @@ class RaptorRetriever(BaseRetriever): similarity_top_k: int = 2, llm: Optional[LLM] = None, embed_model: Optional[BaseEmbedding] = None, - vector_store: Optional[VectorStore] = None, + vector_store: Optional[BasePydanticVectorStore] = None, transformations: Optional[List[TransformComponent]] = None, summary_module: Optional[SummaryModule] = None, existing_index: Optional[VectorStoreIndex] = None, @@ -338,7 +338,7 @@ class RaptorPack(BaseLlamaPack): documents: List[BaseNode], llm: Optional[LLM] = None, embed_model: Optional[BaseEmbedding] = None, - vector_store: Optional[VectorStore] = None, + vector_store: Optional[BasePydanticVectorStore] = None, similarity_top_k: int = 2, mode: QueryModes = "collapsed", verbose: bool = True, -- GitLab