diff --git a/docs/community/integrations/vector_stores.md b/docs/community/integrations/vector_stores.md index 9d6ef1ad9b488bbb88b6ab3d6e512a25bebaa2ac..d7406108820d69cf61a2cfd721b35e855190143d 100644 --- a/docs/community/integrations/vector_stores.md +++ b/docs/community/integrations/vector_stores.md @@ -34,6 +34,7 @@ as the storage backend for `VectorStoreIndex`. - Redis (`RedisVectorStore`). [Installation](https://redis.io/docs/getting-started/installation/). - Supabase (`SupabaseVectorStore`). [Quickstart](https://supabase.github.io/vecs/api/). - TimeScale (`TimescaleVectorStore`). [Installation](https://github.com/timescale/python-vector). +- Upstash (`UpstashVectorStore`). [Quickstart](https://upstash.com/docs/vector/overall/getstarted) - Weaviate (`WeaviateVectorStore`). [Installation](https://weaviate.io/developers/weaviate/installation). [Python Client](https://weaviate.io/developers/weaviate/client-libraries/python). - Zep (`ZepVectorStore`). [Installation](https://docs.getzep.com/deployment/quickstart/). [Python Client](https://docs.getzep.com/sdk/). - Zilliz (`MilvusVectorStore`). [Quickstart](https://zilliz.com/doc/quick_start) @@ -568,6 +569,14 @@ vector_store = TimescaleVectorStore.from_params( ) ``` +**Upstash** + +```python +from llama_index.vector_stores import UpstashVectorStore + +vector_store = UpstashVectorStore(url="YOUR_URL", token="YOUR_TOKEN") +``` + **Weaviate** ```python @@ -780,6 +789,7 @@ maxdepth: 1 ../../examples/vector_stores/TairIndexDemo.ipynb ../../examples/vector_stores/TencentVectorDBIndexDemo.ipynb ../../examples/vector_stores/Timescalevector.ipynb +../../examples/vector_stores/UpstashVectorDemo.ipynb ../../examples/vector_stores/WeaviateIndexDemo.ipynb ../../examples/vector_stores/WeaviateIndexDemo-Hybrid.ipynb ../../examples/vector_stores/ZepIndexDemo.ipynb diff --git a/docs/examples/vector_stores/UpstashVectorDemo.ipynb b/docs/examples/vector_stores/UpstashVectorDemo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..293146d07620424145fa786c4138710ae3e3cd23 --- /dev/null +++ b/docs/examples/vector_stores/UpstashVectorDemo.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Upstash Vector Store\n", + "\n", + "We're going to look at how to use LlamaIndex to interface with Upstash Vector!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-index in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (0.9.35)\n", + "Requirement already satisfied: upstash-vector in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (0.2.0)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index) (2.0.25)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (3.9.1)\n", + "Requirement already satisfied: dataclasses-json in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (0.6.3)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (1.2.14)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (2023.12.2)\n", + "Requirement already satisfied: httpx in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (0.25.2)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (1.6.0)\n", + "Requirement already satisfied: networkx>=3.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (3.1)\n", + "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (3.8.1)\n", + "Requirement already satisfied: numpy in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (1.24.4)\n", + "Requirement already satisfied: openai>=1.1.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (1.9.0)\n", + "Requirement already satisfied: pandas in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (2.0.3)\n", + "Requirement already satisfied: requests>=2.31.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (8.2.3)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (0.5.2)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (4.9.0)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from llama-index) (0.9.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index) (4.0.3)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from deprecated>=1.2.9.3->llama-index) (1.16.0)\n", + "Requirement already satisfied: anyio in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpx->llama-index) (4.2.0)\n", + "Requirement already satisfied: certifi in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpx->llama-index) (2023.11.17)\n", + "Requirement already satisfied: httpcore==1.* in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpx->llama-index) (1.0.2)\n", + "Requirement already satisfied: idna in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpx->llama-index) (3.6)\n", + "Requirement already satisfied: sniffio in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpx->llama-index) (1.3.0)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from httpcore==1.*->httpx->llama-index) (0.14.0)\n", + "Requirement already satisfied: click in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index) (8.1.7)\n", + "Requirement already satisfied: joblib in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index) (2023.12.25)\n", + "Requirement already satisfied: tqdm in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from nltk<4.0.0,>=3.8.1->llama-index) (4.66.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from openai>=1.1.0->llama-index) (1.9.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from openai>=1.1.0->llama-index) (1.10.14)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from requests>=2.31.0->llama-index) (3.3.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from requests>=2.31.0->llama-index) (1.26.18)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index) (3.0.3)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from typing-inspect>=0.8.0->llama-index) (1.0.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from dataclasses-json->llama-index) (3.20.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from pandas->llama-index) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from pandas->llama-index) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from pandas->llama-index) (2023.4)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from anyio->httpx->llama-index) (1.2.0)\n", + "Requirement already satisfied: packaging>=17.0 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index) (23.2)\n", + "Requirement already satisfied: six>=1.5 in /home/meshan/.cache/pypoetry/virtualenvs/llama-index-g12BQvjh-py3.8/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->llama-index) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "! pip install llama-index upstash-vector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n", + "from llama_index.vector_stores import UpstashVectorStore\n", + "from llama_index.storage.storage_context import StorageContext\n", + "import textwrap\n", + "import openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup the OpenAI API\n", + "openai.api_key = \"sk-...\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-02-03 20:04:25-- https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 75042 (73K) [text/plain]\n", + "Saving to: ‘data/paul_graham/paul_graham_essay.txt’\n", + "\n", + "data/paul_graham/pa 100%[===================>] 73.28K --.-KB/s in 0.01s \n", + "\n", + "2024-02-03 20:04:25 (5.96 MB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]\n", + "\n" + ] + } + ], + "source": [ + "# Download data\n", + "! mkdir -p 'data/paul_graham/'\n", + "! wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can load the documents using the LlamaIndex SimpleDirectoryReader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Documents: 1\n" + ] + } + ], + "source": [ + "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n", + "\n", + "print(\"# Documents:\", len(documents))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To create an index on Upstash, visit https://console.upstash.com/vector, create an index with 1536 dimensions and `Cosine` distance metric. Copy the URL and token below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vector_store = UpstashVectorStore(url=\"https://...\", token=\"...\")\n", + "\n", + "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", + "index = VectorStoreIndex.from_documents(\n", + " documents, storage_context=storage_context\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we've successfully created an index and populated it with vectors from the essay! The data will take a second to index and then it'll be ready for querying." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The author learned that the study of philosophy in college did not live up to their expectations.\n", + "They found that other fields took up most of the space of ideas, leaving little room for what they\n", + "perceived as the ultimate truths that philosophy was supposed to explore. As a result, they decided\n", + "to switch to studying AI.\n", + "\n", + "\n", + "The author's opinion on startups is that they are in need of help and support, especially in the\n", + "beginning stages. The author believes that founders of startups are often helpless and face various\n", + "challenges, such as getting incorporated and understanding the intricacies of running a company. The\n", + "author's investment firm, Y Combinator, aims to provide seed funding and comprehensive support to\n", + "startups, offering them the guidance and resources they need to succeed.\n" + ] + } + ], + "source": [ + "query_engine = index.as_query_engine()\n", + "res1 = query_engine.query(\"What did the author learn?\")\n", + "print(textwrap.fill(str(res1), 100))\n", + "\n", + "print(\"\\n\")\n", + "\n", + "res2 = query_engine.query(\"What is the author's opinion on startups?\")\n", + "print(textwrap.fill(str(res2), 100))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/module_guides/storing/customization.md b/docs/module_guides/storing/customization.md index 36f6cb5dce89714fa6a1637ef72caee1b8f3bac9..f0f3af8cdfc9933b0ac7d65e8cfdfb3e402f96b2 100644 --- a/docs/module_guides/storing/customization.md +++ b/docs/module_guides/storing/customization.md @@ -100,6 +100,7 @@ The vector stores that support this practice are: - PineconeVectorStore - QdrantVectorStore - RedisVectorStore +- UpstashVectorStore - WeaviateVectorStore A small example using Pinecone is below: diff --git a/docs/module_guides/storing/vector_stores.md b/docs/module_guides/storing/vector_stores.md index 26fd3944e72953b653a95431cbd6986b9dae75a8..156d1d84a82b237ca2d349a1ce6b14bea60f8972 100644 --- a/docs/module_guides/storing/vector_stores.md +++ b/docs/module_guides/storing/vector_stores.md @@ -49,6 +49,7 @@ We are actively adding more integrations and improving feature coverage for each | TencentVectorDB | cloud | ✓ | ✓ | ✓ | ✓ | | | Timescale | | ✓ | | ✓ | ✓ | ✓ | | Typesense | self-hosted / cloud | ✓ | | ✓ | ✓ | | +| Upstash | cloud | | | | ✓ | | | Weaviate | self-hosted / cloud | ✓ | ✓ | ✓ | ✓ | | For more details, see [Vector Store Integrations](/community/integrations/vector_stores.md). diff --git a/llama_index/vector_stores/__init__.py b/llama_index/vector_stores/__init__.py index 17f83b07ce5f26876768b9e074f0802444d7a7f5..a720026257f7a7eb205629335f3467869b77e843 100644 --- a/llama_index/vector_stores/__init__.py +++ b/llama_index/vector_stores/__init__.py @@ -53,6 +53,7 @@ from llama_index.vector_stores.types import ( VectorStoreQuery, VectorStoreQueryResult, ) +from llama_index.vector_stores.upstash import UpstashVectorStore from llama_index.vector_stores.weaviate import WeaviateVectorStore from llama_index.vector_stores.zep import ZepVectorStore @@ -103,4 +104,5 @@ __all__ = [ "AzureCosmosDBMongoDBVectorSearch", "LanternVectorStore", "MongoDBAtlasVectorSearch", + "UpstashVectorStore", ] diff --git a/llama_index/vector_stores/registry.py b/llama_index/vector_stores/registry.py index 1b55736a5a57ee29c863d28e29a574081e94389e..258ac643267a067e1c1e74a5a55a086c1ea83dd5 100644 --- a/llama_index/vector_stores/registry.py +++ b/llama_index/vector_stores/registry.py @@ -21,6 +21,7 @@ from llama_index.vector_stores.simple import SimpleVectorStore from llama_index.vector_stores.supabase import SupabaseVectorStore from llama_index.vector_stores.txtai import TxtaiVectorStore from llama_index.vector_stores.types import VectorStore +from llama_index.vector_stores.upstash import UpstashVectorStore from llama_index.vector_stores.weaviate import WeaviateVectorStore @@ -45,6 +46,7 @@ class VectorStoreType(str, Enum): BAGEL = "bagel" EPSILLA = "epsilla" JAGUAR = "jaguar" + UPSTASH = "upstash" VECTOR_STORE_TYPE_TO_VECTOR_STORE_CLASS: Dict[VectorStoreType, Type[VectorStore]] = { @@ -68,6 +70,7 @@ VECTOR_STORE_TYPE_TO_VECTOR_STORE_CLASS: Dict[VectorStoreType, Type[VectorStore] VectorStoreType.BAGEL: BagelVectorStore, VectorStoreType.EPSILLA: EpsillaVectorStore, VectorStoreType.JAGUAR: JaguarVectorStore, + VectorStoreType.UPSTASH: UpstashVectorStore, } VECTOR_STORE_CLASS_TO_VECTOR_STORE_TYPE: Dict[Type[VectorStore], VectorStoreType] = { diff --git a/llama_index/vector_stores/upstash.py b/llama_index/vector_stores/upstash.py new file mode 100644 index 0000000000000000000000000000000000000000..a6c18a4bf4cf48ec5f0023a0217dcc77522be8eb --- /dev/null +++ b/llama_index/vector_stores/upstash.py @@ -0,0 +1,142 @@ +""" +Upstash vector store index. + +An index that is built with Upstash Vector. + +https://upstash.com/docs/vector/overall/getstarted +""" +import logging +from typing import Any, List + +from llama_index.schema import BaseNode +from llama_index.utils import iter_batch +from llama_index.vector_stores.types import ( + VectorStore, + VectorStoreQuery, + VectorStoreQueryMode, + VectorStoreQueryResult, +) +from llama_index.vector_stores.utils import ( + metadata_dict_to_node, + node_to_metadata_dict, +) + +logger = logging.getLogger(__name__) + +DEFAULT_BATCH_SIZE = 128 + + +class UpstashVectorStore(VectorStore): + """ + Upstash Vector Store. + """ + + stores_text: bool = True + flat_metadata: bool = False + + @classmethod + def class_name(cls) -> str: + return "UpstashVectorStore" + + @property + def client(self) -> Any: + """Return the Upstash client.""" + return self._index + + def __init__( + self, url: str, token: str, batch_size: int = DEFAULT_BATCH_SIZE + ) -> None: + """ + Create a UpstashVectorStore. The index can be created using the Upstash console. + + Args: + url (String): URL of the Upstash Vector instance, found in the Upstash console. + token (String): Token for the Upstash Vector Index, found in the Upstash console. + batch_size (Optional[int]): Batch size for adding nodes to the vector store. + + Raises: + ImportError: If the upstash-vector python package is not installed. + """ + self.batch_size = batch_size + + try: + from upstash_vector import Index + except ImportError: + raise ImportError( + "Could not import upstash_vector.Index, Please install it with `pip install upstash-vector`" + ) + + self._index = Index(url=url, token=token) + + def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: + """ + Add nodes to the vector store. + + Args: + nodes: List of nodes to add to the vector store. + add_kwargs: Additional arguments to pass to the add method. + + Returns: + List of ids of the added nodes. + """ + ids = [] + vectors = [] + for node_batch in iter_batch(nodes, self.batch_size): + for node in node_batch: + metadata_dict = node_to_metadata_dict(node) + ids.append(node.node_id) + vectors.append((node.node_id, node.embedding, metadata_dict)) + + self.client.upsert(vectors=vectors) + + return ids + + def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: + """ + Delete node from the vector store. + + Args: + ref_doc_id: Reference doc id of the node to delete. + delete_kwargs: Additional arguments to pass to the delete method. + """ + raise NotImplementedError( + "Delete is not currently supported, but will be in the future." + ) + + def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: + """ + Query the vector store. + + Args: + query: Query to run against the vector store. + kwargs: Additional arguments to pass to the query method. + + Returns: + Query result. + """ + if query.mode != VectorStoreQueryMode.DEFAULT: + raise ValueError(f"Query mode {query.mode} not supported") + + if query.filters: + raise ValueError("Metadata filtering not supported") + + res = self.client.query( + vector=query.query_embedding, + top_k=query.similarity_top_k, + include_vectors=True, + include_metadata=True, + ) + + top_k_nodes = [] + top_k_ids = [] + top_k_scores = [] + for vector in res: + node = metadata_dict_to_node(vector.metadata) + node.embedding = vector.vector + top_k_nodes.append(node) + top_k_ids.append(vector.id) + top_k_scores.append(vector.score) + + return VectorStoreQueryResult( + nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids + ) diff --git a/tests/vector_stores/test_upstash.py b/tests/vector_stores/test_upstash.py new file mode 100644 index 0000000000000000000000000000000000000000..86dd889b41a8288786bb6e065974059412bb625f --- /dev/null +++ b/tests/vector_stores/test_upstash.py @@ -0,0 +1,65 @@ +import os +from importlib.util import find_spec +from typing import List + +import pytest +from llama_index.schema import TextNode +from llama_index.vector_stores import UpstashVectorStore +from llama_index.vector_stores.types import VectorStoreQuery + +try: + find_spec("upstash-vector") + if os.environ.get("UPSTASH_VECTOR_URL") and os.environ.get("UPSTASH_VECTOR_TOKEN"): + upstash_installed = True + else: + upstash_installed = False +except ImportError: + upstash_installed = False + + +@pytest.fixture() +def upstash_vector_store() -> UpstashVectorStore: + return UpstashVectorStore( + url=os.environ.get("UPSTASH_VECTOR_URL") or "", + token=os.environ.get("UPSTASH_VECTOR_TOKEN") or "", + ) + + +@pytest.fixture() +def text_nodes() -> List[TextNode]: + return [ + TextNode( + text="llama_index_node_1", + id_="test_node_1", + metadata={"hello": "hola"}, + embedding=[0.25] * 256, + ), + TextNode( + text="llama_index_node_2", + id_="test_node_2", + metadata={"hello": "hola"}, + embedding=[0.33] * 256, + ), + ] + + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_add( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + res = upstash_vector_store.add(nodes=text_nodes) + assert res == ["test_node_1", "test_node_2"] + + +@pytest.mark.skipif(not upstash_installed, reason="upstash-vector not installed") +def test_upstash_vector_query( + upstash_vector_store: UpstashVectorStore, text_nodes: List[TextNode] +) -> None: + upstash_vector_store.add(nodes=text_nodes) + res = upstash_vector_store.query( + VectorStoreQuery( + query_embedding=[0.25] * 256, + ) + ) + + assert res.nodes and res.nodes[0].id_ in ["test_node_1", "test_node_2"]