diff --git a/docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb b/docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb index e8f44d20891867f18d822bebd57cdaedd6d03b46..9362be78e1e1bdf29fe29125cd3d5383f8c31caf 100644 --- a/docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb +++ b/docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb @@ -39,7 +39,11 @@ "outputs": [], "source": [ "!pip install llama-index\n", - "!pip install wget" + "!pip install wget\n", + "%pip install llama-index-vector-stores-azureaisearch\n", + "%pip install azure-search-documents==11.4.0\n", + "%llama-index-embeddings-azure-openai\n", + "%llama-index-llms-azure-openai" ] }, { @@ -54,15 +58,16 @@ "from azure.search.documents import SearchClient\n", "from azure.search.documents.indexes import SearchIndexClient\n", "from IPython.display import Markdown, display\n", - "from llama_index import (\n", - " ServiceContext,\n", + "from llama_index.core import (\n", " SimpleDirectoryReader,\n", " StorageContext,\n", " VectorStoreIndex,\n", ")\n", - "from llama_index.embeddings import AzureOpenAIEmbedding\n", - "from llama_index.llms import AzureOpenAI\n", - "from llama_index.vector_stores import AzureAISearchVectorStore\n", + "from llama_index.core.settings import Settings\n", + "\n", + "from llama_index.llms.azure_openai import AzureOpenAI\n", + "from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding\n", + "from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore\n", "from llama_index.vector_stores.azureaisearch import (\n", " IndexManagement,\n", " MetadataIndexFieldType,\n", @@ -187,6 +192,8 @@ " embedding_dimensionality=1536,\n", " metadata_string_field_key=\"metadata\",\n", " doc_id_field_key=\"doc_id\",\n", + " language_analyzer=\"en.lucene\",\n", + " vector_algorithm_type=\"exhaustiveKnn\",\n", ")" ] }, @@ -216,13 +223,12 @@ "source": [ "# Load documents\n", "documents = SimpleDirectoryReader(\"../data/paul_graham/\").load_data()\n", - "\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", - "service_context = ServiceContext.from_defaults(\n", - " llm=llm, embed_model=embed_model\n", - ")\n", + "\n", + "Settings.llm = llm\n", + "Settings.embed_model = embed_model\n", "index = VectorStoreIndex.from_documents(\n", - " documents, storage_context=storage_context, service_context=service_context\n", + " documents, storage_context=storage_context\n", ")" ] }, @@ -316,11 +322,9 @@ "outputs": [], "source": [ "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", - "service_context = ServiceContext.from_defaults(\n", - " llm=llm, embed_model=embed_model\n", - ")\n", "index = VectorStoreIndex.from_documents(\n", - " [], storage_context=storage_context, service_context=service_context\n", + " [],\n", + " storage_context=storage_context,\n", ")" ] }, @@ -441,7 +445,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_index import Document\n", + "from llama_index.core import Document\n", "\n", "index.insert_nodes([Document(text=\"The sky is indigo today\")])" ] @@ -482,7 +486,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_index.schema import TextNode\n", + "from llama_index.core.schema import TextNode\n", "\n", "nodes = [\n", " TextNode(\n", @@ -534,7 +538,10 @@ } ], "source": [ - "from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters\n", + "from llama_index.core.vector_stores.types import (\n", + " MetadataFilters,\n", + " ExactMatchFilter,\n", + ")\n", "\n", "\n", "filters = MetadataFilters(\n", @@ -611,7 +618,7 @@ } ], "source": [ - "from llama_index.vector_stores.types import VectorStoreQueryMode\n", + "from llama_index.core.vector_stores.types import VectorStoreQueryMode\n", "\n", "default_retriever = index.as_retriever(\n", " vector_store_query_mode=VectorStoreQueryMode.DEFAULT\n", @@ -665,7 +672,7 @@ } ], "source": [ - "from llama_index.vector_stores.types import VectorStoreQueryMode\n", + "from llama_index.core.vector_stores.types import VectorStoreQueryMode\n", "\n", "hybrid_retriever = index.as_retriever(\n", " vector_store_query_mode=VectorStoreQueryMode.HYBRID\n", diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/__init__.py index 232ec980430d1e64bc84b8099c6ac77d8060ce3e..d801dfd6f165d80c5ceaa3f715246849225c730b 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/__init__.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/__init__.py @@ -2,6 +2,12 @@ from llama_index.vector_stores.azureaisearch.base import ( AzureAISearchVectorStore, CognitiveSearchVectorStore, IndexManagement, + MetadataIndexFieldType, ) -__all__ = ["AzureAISearchVectorStore", "CognitiveSearchVectorStore", "IndexManagement"] +__all__ = [ + "AzureAISearchVectorStore", + "CognitiveSearchVectorStore", + "IndexManagement", + "MetadataIndexFieldType", +] diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py index b81e5e5512761a7104f1d29070a88fd92ffd395d..18a82a0f485d2e5a4fac0aa659e14ff666c78dbb 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py @@ -4,12 +4,13 @@ import json import logging from enum import auto from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast - +from llama_index.core.bridge.pydantic import PrivateAttr from llama_index.core.schema import BaseNode, MetadataMode, TextNode + from llama_index.core.vector_stores.types import ( ExactMatchFilter, + BasePydanticVectorStore, MetadataFilters, - VectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, @@ -20,6 +21,9 @@ from llama_index.core.vector_stores.utils import ( node_to_metadata_dict, ) +from azure.search.documents.indexes import SearchIndexClient +from azure.search.documents import SearchClient + logger = logging.getLogger(__name__) @@ -45,10 +49,24 @@ class IndexManagement(int, enum.Enum): CREATE_IF_NOT_EXISTS = auto() -class AzureAISearchVectorStore(VectorStore): +class AzureAISearchVectorStore(BasePydanticVectorStore): stores_text: bool = True flat_metadata: bool = True + _index_client: SearchIndexClient = PrivateAttr() + _search_client: SearchClient = PrivateAttr() + _embedding_dimensionality: int = PrivateAttr() + _language_analyzer: str = PrivateAttr() + _field_mapping: Dict[str, str] = PrivateAttr() + _index_management: IndexManagement = PrivateAttr() + _index_mapping: Callable[ + [Dict[str, str], Dict[str, Any]], Dict[str, str] + ] = PrivateAttr() + _metadata_to_index_field_map: Dict[ + str, Tuple[str, MetadataIndexFieldType] + ] = PrivateAttr() + _vector_profile_name: str = PrivateAttr() + def _normalise_metadata_to_index_fields( self, filterable_metadata_field_keys: Union[ @@ -143,14 +161,14 @@ class AzureAISearchVectorStore(VectorStore): SearchableField( name=self._field_mapping["chunk"], type="Edm.String", - analyzer_name=self.language_analyzer, + analyzer_name=self._language_analyzer, ), SearchField( name=self._field_mapping["embedding"], type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, - vector_search_dimensions=self.embedding_dimensionality, - vector_search_profile_name=self.vector_profile_name, + vector_search_dimensions=self._embedding_dimensionality, + vector_search_profile_name=self._vector_profile_name, ), SimpleField(name=self._field_mapping["metadata"], type="Edm.String"), SimpleField( @@ -308,18 +326,19 @@ class AzureAISearchVectorStore(VectorStore): self._index_client: SearchIndexClient = cast(SearchIndexClient, None) self._search_client: SearchClient = cast(SearchClient, None) - self.embedding_dimensionality = embedding_dimensionality + self._embedding_dimensionality = embedding_dimensionality if vector_algorithm_type == "exhaustiveKnn": - self.vector_profile_name = "myExhaustiveKnnProfile" + self._vector_profile_name = "myExhaustiveKnnProfile" elif vector_algorithm_type == "hnsw": - self.vector_profile_name = "myHnswProfile" + self._vector_profile_name = "myHnswProfile" else: raise ValueError( "Only 'exhaustiveKnn' and 'hnsw' are supported for vector_algorithm_type" ) - self.language_analyzer = language_analyzer + self._language_analyzer = language_analyzer + # Validate search_or_index_client if search_or_index_client is not None: if isinstance(search_or_index_client, SearchIndexClient): @@ -394,6 +413,8 @@ class AzureAISearchVectorStore(VectorStore): if self._index_management == IndexManagement.VALIDATE_INDEX: self._validate_index(index_name) + super().__init__() + @property def client(self) -> Any: """Get client.""" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml index db2698e58d185de7975c546a9beee9933711cca7..d67e58f397cef2b0f7cdbe086b1f9d45ac4e248a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/pyproject.toml @@ -24,7 +24,7 @@ description = "llama-index vector_stores azureaisearch integration" license = "MIT" name = "llama-index-vector-stores-azureaisearch" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<3.12" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_vector_stores_cogsearch.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_vector_stores_cogsearch.py index 20a43bfa674a956bf439a9fefd692d48a4d46390..4c9cca538ad8f398d2a4ac738bd788b7b125e544 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_vector_stores_cogsearch.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/tests/test_vector_stores_cogsearch.py @@ -1,7 +1,7 @@ -from llama_index.core.vector_stores.types import VectorStore +from llama_index.core.vector_stores.types import BasePydanticVectorStore from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore def test_class(): names_of_base_classes = [b.__name__ for b in AzureAISearchVectorStore.__mro__] - assert VectorStore.__name__ in names_of_base_classes + assert BasePydanticVectorStore.__name__ in names_of_base_classes