diff --git a/docs/examples/vector_stores/UpstashVectorDemo.ipynb b/docs/examples/vector_stores/UpstashVectorDemo.ipynb index 7a388e6b990e9f3d7278fa73c1ef71b3002f4e77..32ba1d9a20f4c74ac06c7e65c7f00514060128b5 100644 --- a/docs/examples/vector_stores/UpstashVectorDemo.ipynb +++ b/docs/examples/vector_stores/UpstashVectorDemo.ipynb @@ -157,6 +157,81 @@ "res2 = query_engine.query(\"What is the author's opinion on startups?\")\n", "print(textwrap.fill(str(res2), 100))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Metadata Filtering\n", + "\n", + "You can pass `MetadataFilters` with your `VectorStoreQuery` to filter the nodes returned from Upstash vector store." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from llama_index.vector_stores.upstash import UpstashVectorStore\n", + "from llama_index.core.vector_stores.types import (\n", + " MetadataFilter,\n", + " MetadataFilters,\n", + " FilterOperator,\n", + ")\n", + "\n", + "vector_store = UpstashVectorStore(\n", + " url=os.environ.get(\"UPSTASH_VECTOR_URL\") or \"\",\n", + " token=os.environ.get(\"UPSTASH_VECTOR_TOKEN\") or \"\",\n", + ")\n", + "\n", + "index = VectorStoreIndex.from_vector_store(vector_store=vector_store)\n", + "\n", + "filters = MetadataFilters(\n", + " filters=[\n", + " MetadataFilter(\n", + " key=\"author\", value=\"Marie Curie\", operator=FilterOperator.EQ\n", + " )\n", + " ],\n", + ")\n", + "\n", + "retriever = index.as_retriever(filters=filters)\n", + "\n", + "retriever.retrieve(\"What is inception about?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also combine multiple `MetadataFilters` with `AND` or `OR` condition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core.vector_stores import FilterOperator, FilterCondition\n", + "\n", + "filters = MetadataFilters(\n", + " filters=[\n", + " MetadataFilter(\n", + " key=\"theme\",\n", + " value=[\"Fiction\", \"Horror\"],\n", + " operator=FilterOperator.IN,\n", + " ),\n", + " MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n", + " ],\n", + " condition=FilterCondition.AND,\n", + ")\n", + "\n", + "retriever = index.as_retriever(filters=filters)\n", + "retriever.retrieve(\"Harry Potter?\")" + ] } ], "metadata": { diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py index c0e4109d387e8a54ea8d1e65e91e57a442cd4865..e09addbfe54ca5a66d8a5ab6b3f4bfe5d0641118 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/llama_index/vector_stores/upstash/base.py @@ -16,6 +16,9 @@ from llama_index.core.vector_stores.types import ( VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult, + MetadataFilters, + MetadataFilter, + FilterOperator, ) from llama_index.core.vector_stores.utils import ( metadata_dict_to_node, @@ -27,6 +30,62 @@ logger = logging.getLogger(__name__) DEFAULT_BATCH_SIZE = 128 +def _transform_upstash_filter_operator(operator: str) -> str: + """Translate standard metadata filter operator to Upstash specific spec.""" + if operator == FilterOperator.EQ: + return "=" + elif operator == FilterOperator.GT: + return ">" + elif operator == FilterOperator.LT: + return "<" + elif operator == FilterOperator.NE: + return "!=" + elif operator == FilterOperator.GTE: + return ">=" + elif operator == FilterOperator.LTE: + return "<=" + elif operator == FilterOperator.IN: + return "IN" + elif operator == FilterOperator.NIN: + return "NOT IN" + elif operator == FilterOperator.CONTAINS: + return "CONTAINS" + else: + raise ValueError(f"Filter operator {operator} not supported") + + +def _to_upstash_filter_string(filter: MetadataFilter) -> str: + key = filter.key + value = filter.value + operator = filter.operator + operator_str = _transform_upstash_filter_operator(operator) + + if filter.operator in [ + FilterOperator.IN, + FilterOperator.NIN, + ]: + value_str = ", ".join( + str(v) if not isinstance(v, str) else f"'{v}'" for v in value + ) + return f"{key} {operator_str} ({value_str})" + value_str = f"'{value}'" if isinstance(value, str) else str(value) + return f"{key} {operator_str} {value_str}" + + +def _to_upstash_filters(filters: MetadataFilters) -> str: + if not filters: + return "" + sql_filters = [] + + for metadata_filter in filters.filters: + sql_filters.append(_to_upstash_filter_string(metadata_filter)) + + # Combine filters using AND or OR condition + condition_str = filters.condition.value.upper() + return f" {condition_str} ".join(sql_filters) + # print(combined_filters) + + class UpstashVectorStore(VectorStore): """ Upstash Vector Store. @@ -118,14 +177,15 @@ class UpstashVectorStore(VectorStore): if query.mode != VectorStoreQueryMode.DEFAULT: raise ValueError(f"Query mode {query.mode} not supported") - if query.filters: - raise ValueError("Metadata filtering not supported") + # if query.filters: + # raise ValueError("Metadata filtering not supported") res = self.client.query( vector=query.query_embedding, top_k=query.similarity_top_k, include_vectors=True, include_metadata=True, + filter=_to_upstash_filters(query.filters), ) top_k_nodes = [] diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml index 296d66efa87d173f812f0d7c336cbcdcd0bf6739..8c0737e7511e69bedd929aea501fffddab578cd6 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-vector-stores-upstash" readme = "README.md" -version = "0.1.2" +version = "0.1.3" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_upstash.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_upstash.py index b091ed1b6fb1b47dfccf311e3546969b9f686006..f20b633c0a44b4ab3f8539e35e8f91381153e677 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_upstash.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-upstash/tests/test_upstash.py @@ -3,8 +3,14 @@ from importlib.util import find_spec from typing import List import pytest -from llama_index.core.schema import TextNode -from llama_index.core.vector_stores.types import VectorStoreQuery +from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode +from llama_index.core.vector_stores.types import ( + VectorStoreQuery, + MetadataFilter, + MetadataFilters, + FilterOperator, + FilterCondition, +) from llama_index.vector_stores.upstash import UpstashVectorStore try: @@ -29,16 +35,73 @@ def upstash_vector_store() -> UpstashVectorStore: def text_nodes() -> List[TextNode]: return [ TextNode( - text="llama_index_node_1", - id_="test_node_1", - metadata={"hello": "hola"}, - embedding=[0.25] * 256, + text="lorem ipsum", + id_="c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="test-0")}, + metadata={ + "author": "Stephen King", + "theme": "Friendship", + "rating": 4.1, + }, + embedding=[1.0, 0.0, 0.0] * 512, ), TextNode( - text="llama_index_node_2", - id_="test_node_2", - metadata={"hello": "hola"}, - embedding=[0.33] * 256, + text="lorem ipsum", + id_="c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="test-1")}, + metadata={ + "director": "Francis Ford Coppola", + "theme": "Mafia", + "rating": 3.3, + }, + embedding=[0.0, 1.0, 0.0] * 512, + ), + TextNode( + text="lorem ipsum", + id_="c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="test-2")}, + metadata={ + "director": "Christopher Nolan", + "rating": 4.3, + "theme": "Action", + }, + embedding=[0.0, 0.0, 1.0] * 512, + ), + TextNode( + text="I was taught that the way of progress was neither swift nor easy.", + id_="0b31ae71-b797-4e88-8495-031371a7752e", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="text-3")}, + metadata={ + "author": "Marie Curie", + "rating": 2.3, + }, + embedding=[0.0, 0.0, 0.9] * 512, + ), + TextNode( + text=( + "The important thing is not to stop questioning." + + " Curiosity has its own reason for existing." + ), + id_="bd2e080b-159a-4030-acc3-d98afd2ba49b", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="text-4")}, + metadata={ + "author": "Albert Einstein", + "rating": 4.8, + }, + embedding=[0.0, 0.0, 0.5] * 512, + ), + TextNode( + text=( + "I am no bird; and no net ensnares me;" + + " I am a free human being with an independent will." + ), + id_="f658de3b-8cef-4d1c-8bed-9a263c907251", + relationships={NodeRelationship.SOURCE: RelatedNodeInfo(node_id="text-5")}, + metadata={ + "author": "Charlotte Bronte", + "rating": 1.5, + }, + embedding=[0.0, 0.0, 0.3] * 512, ), ] @@ -48,7 +111,14 @@ def test_upstash_vector_add( upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] ) -> None: res = upstash_vector_store.add(nodes=text_nodes) - assert res == ["test_node_1", "test_node_2"] + assert res == [ + "c330d77f-90bd-4c51-9ed2-57d8d693b3b0", + "c3d1e1dd-8fb4-4b8f-b7ea-7fa96038d39d", + "c3ew11cd-8fb4-4b8f-b7ea-7fa96038d39d", + "0b31ae71-b797-4e88-8495-031371a7752e", + "bd2e080b-159a-4030-acc3-d98afd2ba49b", + "f658de3b-8cef-4d1c-8bed-9a263c907251", + ] @pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") @@ -58,8 +128,105 @@ def test_upstash_vector_query( upstash_vector_store.add(nodes=text_nodes) res = upstash_vector_store.query( VectorStoreQuery( - query_embedding=[0.25] * 256, + query_embedding=[1.0, 0.0, 0.0] * 512, + similarity_top_k=1, + ) + ) + assert res.nodes + assert res.nodes[0].get_content() == "lorem ipsum" + # assert res.nodes and res.nodes[0].id_ in ["test_node_1", "test_node_2"] + + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_filtering_eq( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + filters = MetadataFilters( + filters=[ + MetadataFilter( + key="author", value="Marie Curie", operator=FilterOperator.EQ + ) + ], + ) + upstash_vector_store.add(nodes=text_nodes) + res = upstash_vector_store.query( + VectorStoreQuery( + query_embedding=[0.1] * 1536, + filters=filters, + similarity_top_k=1, ) ) + assert len(res.nodes) == 1 + assert ( + res.nodes[0].get_content() + == "I was taught that the way of progress was neither swift nor easy." + ) - assert res.nodes and res.nodes[0].id_ in ["test_node_1", "test_node_2"] + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_filtering_gte( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + filters = MetadataFilters( + filters=[MetadataFilter(key="rating", value=4.3, operator=FilterOperator.GTE)], + ) + upstash_vector_store.add(nodes=text_nodes) + res = upstash_vector_store.query( + VectorStoreQuery( + query_embedding=[0.1] * 1536, + filters=filters, + ) + ) + assert res.nodes + for node in res.nodes: + assert node.metadata["rating"] >= 4.3 + + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_filtering_in( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + values_contained = ["Friendship", "Mafia"] + + filters = MetadataFilters( + filters=[ + MetadataFilter( + key="theme", value=values_contained, operator=FilterOperator.IN + ) + ], + ) + upstash_vector_store.add(nodes=text_nodes) + res = upstash_vector_store.query( + VectorStoreQuery( + query_embedding=[0.1] * 1536, + filters=filters, + ) + ) + assert res.nodes + + for node in res.nodes: + assert node.metadata["theme"] in values_contained + + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_filtering_composite( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + filters = MetadataFilters( + filters=[ + MetadataFilter(key="rating", value=3, operator=FilterOperator.LT), + MetadataFilter( + key="author", value="Charlotte Bronte", operator=FilterOperator.EQ + ), + ], + condition=FilterCondition.AND, + ) + upstash_vector_store.add(nodes=text_nodes) + res = upstash_vector_store.query( + VectorStoreQuery( + query_embedding=[0.1] * 1536, + filters=filters, + ) + ) + assert len(res.nodes) == 1 + assert res.nodes[0].node_id == "f658de3b-8cef-4d1c-8bed-9a263c907251"