From cfc165d1c2f126a97b08c8028ba7710f7265896c Mon Sep 17 00:00:00 2001
From: Jael Gu <mengjia.gu@zilliz.com>
Date: Sat, 11 May 2024 01:42:57 +0800
Subject: [PATCH] Update ZCP Managed Index & Milvus Index demos (#13170)

---
 docs/docs/examples/managed/zcpDemo.ipynb      | 48 +++++++++----------
 .../vector_stores/MilvusIndexDemo.ipynb       | 32 ++++++-------
 .../llama_index/vector_stores/milvus/base.py  |  4 +-
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/docs/docs/examples/managed/zcpDemo.ipynb b/docs/docs/examples/managed/zcpDemo.ipynb
index 68292d3ee2..0d691a3b00 100644
--- a/docs/docs/examples/managed/zcpDemo.ipynb
+++ b/docs/docs/examples/managed/zcpDemo.ipynb
@@ -19,7 +19,7 @@
     "\n",
     "## Setup\n",
     "\n",
-    "1. Install llama-index"
+    "1. Install llama-index dependencies"
    ]
   },
   {
@@ -39,7 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ! pip install llama-index"
+    "%pip install llama-index"
    ]
   },
   {
@@ -58,9 +58,6 @@
    "outputs": [],
    "source": [
     "from getpass import getpass\n",
-    "import os\n",
-    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API Key:\")\n",
     "\n",
     "ZILLIZ_PROJECT_ID = getpass(\"Enter your Zilliz Project ID:\")\n",
     "ZILLIZ_CLUSTER_ID = getpass(\"Enter your Zilliz Cluster ID:\")\n",
@@ -95,18 +92,10 @@
    "id": "97d5c934",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No available pipelines. Please create pipelines first.\n",
-      "Pipelines are automatically created.\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "{'token_usage': 984, 'doc_name': 'milvus_doc_22.md', 'num_chunks': 7}"
+       "{'token_usage': 984, 'doc_name': 'milvus_doc_22.md', 'num_chunks': 3}"
       ]
      },
      "execution_count": null,
@@ -145,10 +134,6 @@
    "source": [
     "> It is optional to add metadata for each document. The metadata can be used to filter doc chunks during retrieval.\n",
     "\n",
-    "### From Local File\n",
-    "\n",
-    "Coming soon.\n",
-    "\n",
     "### From Raw Text\n",
     "\n",
     "Coming soon."
@@ -167,6 +152,18 @@
     "- **output_metadata**: What metadata fields to return with the retrieved text node. Optional, defaults to []."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9bc0343",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API Key:\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -184,6 +181,7 @@
     "        ]  # version == \"2.3\"\n",
     "    ),\n",
     "    output_metadata=[\"version\"],\n",
+    "    llm=None,\n",
     ")"
    ]
   },
@@ -207,7 +205,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[NodeWithScore(node=TextNode(id_='447198459513870883', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Delete Entities\\nThis topic describes how to delete entities in Milvus.  \\nMilvus supports deleting entities by primary key or complex boolean expressions. Deleting entities by primary key is much faster and lighter than deleting them by complex boolean expressions. This is because Milvus executes queries first when deleting data by complex boolean expressions.  \\nDeleted entities can still be retrieved immediately after the deletion if the consistency level is set lower than Strong.\\nEntities deleted beyond the pre-specified span of time for Time Travel cannot be retrieved again.\\nFrequent deletion operations will impact the system performance.  \\nBefore deleting entities by comlpex boolean expressions, make sure the collection has been loaded.\\nDeleting entities by complex boolean expressions is not an atomic operation. Therefore, if it fails halfway through, some data may still be deleted.\\nDeleting entities by complex boolean expressions is supported only when the consistency is set to Bounded. For details, see Consistency.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.728226900100708), NodeWithScore(node=TextNode(id_='447198459513870886', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Delete Entities\\n## Prepare boolean expression\\n### Complex boolean expression\\nTo filter entities that meet specific conditions, define complex boolean expressions.  \\nFilter entities whose word_count is greater than or equal to 11000:  \\n```python\\nexpr = \"word_count >= 11000\"\\n```  \\nFilter entities whose book_name is not Unknown:  \\n```python\\nexpr = \"book_name != Unknown\"\\n```  \\nFilter entities whose primary key values are greater than 5 and word_count is smaller than or equal to 9999:  \\n```python\\nexpr = \"book_id > 5 && word_count <= 9999\"\\n```', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.687866747379303), NodeWithScore(node=TextNode(id_='447198459513870884', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Delete Entities\\n## Prepare boolean expression\\nPrepare the boolean expression that filters the entities to delete.  \\nMilvus supports deleting entities by primary key or complex boolean expressions. For more information on expression rules and supported operators, see Boolean Expression Rules.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6814976334571838)]\n"
+      "[NodeWithScore(node=TextNode(id_='448986959334710210', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Delete Entities\\nThis topic describes how to delete entities in Milvus.  \\nMilvus supports deleting entities by primary key or complex boolean expressions. Deleting entities by primary key is much faster and lighter than deleting them by complex boolean expressions. This is because Milvus executes queries first when deleting data by complex boolean expressions.  \\nDeleted entities can still be retrieved immediately after the deletion if the consistency level is set lower than Strong.\\nEntities deleted beyond the pre-specified span of time for Time Travel cannot be retrieved again.\\nFrequent deletion operations will impact the system performance.  \\nBefore deleting entities by comlpex boolean expressions, make sure the collection has been loaded.\\nDeleting entities by complex boolean expressions is not an atomic operation. Therefore, if it fails halfway through, some data may still be deleted.\\nDeleting entities by complex boolean expressions is supported only when the consistency is set to Bounded. For details, see Consistency.\\\\\\n\\\\\\n# Delete Entities\\n## Prepare boolean expression\\nPrepare the boolean expression that filters the entities to delete.  \\nMilvus supports deleting entities by primary key or complex boolean expressions. For more information on expression rules and supported operators, see Boolean Expression Rules.\\\\\\n\\\\\\n# Delete Entities\\n## Prepare boolean expression\\n### Simple boolean expression\\nUse a simple expression to filter data with primary key values of 0 and 1:  \\n```python\\nexpr = \"book_id in [0,1]\"\\n```\\\\\\n\\\\\\n# Delete Entities\\n## Prepare boolean expression\\n### Complex boolean expression\\nTo filter entities that meet specific conditions, define complex boolean expressions.  \\nFilter entities whose word_count is greater than or equal to 11000:  \\n```python\\nexpr = \"word_count >= 11000\"\\n```  \\nFilter entities whose book_name is not Unknown:  \\n```python\\nexpr = \"book_name != Unknown\"\\n```  \\nFilter entities whose primary key values are greater than 5 and word_count is smaller than or equal to 9999:  \\n```python\\nexpr = \"book_id > 5 && word_count <= 9999\"\\n```', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.742070198059082), NodeWithScore(node=TextNode(id_='448986959334710211', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Delete Entities\\n## Delete entities\\nDelete the entities with the boolean expression you created. Milvus returns the ID list of the deleted entities.\\n```python\\nfrom pymilvus import Collection\\ncollection = Collection(\"book\")      # Get an existing collection.\\ncollection.delete(expr)\\n```  \\nParameter\\tDescription\\nexpr\\tBoolean expression that specifies the entities to delete.\\npartition_name (optional)\\tName of the partition to delete entities from.\\\\\\n\\\\\\n# Upsert Entities\\nThis topic describes how to upsert entities in Milvus.  \\nUpserting is a combination of insert and delete operations. In the context of a Milvus vector database, an upsert is a data-level operation that will overwrite an existing entity if a specified field already exists in a collection, and insert a new entity if the specified value doesn’t already exist.  \\nThe following example upserts 3,000 rows of randomly generated data as the example data. When performing upsert operations, it\\'s important to note that the operation may compromise performance. This is because the operation involves deleting data during execution.\\\\\\n\\\\\\n# Upsert Entities\\n## Prepare data\\nFirst, prepare the data to upsert. The type of data to upsert must match the schema of the collection, otherwise Milvus will raise an exception.  \\nMilvus supports default values for scalar fields, excluding a primary key field. This indicates that some fields can be left empty during data inserts or upserts. For more information, refer to Create a Collection.  \\n```python\\n# Generate data to upsert\\n\\nimport random\\nnb = 3000\\ndim = 8\\nvectors = [[random.random() for _ in range(dim)] for _ in range(nb)]\\ndata = [\\n[i for i in range(nb)],\\n[str(i) for i in range(nb)],\\n[i for i in range(10000, 10000+nb)],\\nvectors,\\n[str(\"dy\"*i) for i in range(nb)]\\n]\\n```', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6409814953804016), NodeWithScore(node=TextNode(id_='448986959334710212', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='# Upsert Entities\\n## Upsert data\\nUpsert the data to the collection.  \\n```python\\nfrom pymilvus import Collection\\ncollection = Collection(\"book\") # Get an existing collection.\\nmr = collection.upsert(data)\\n```  \\nParameter\\tDescription\\ndata\\tData to upsert into Milvus.\\npartition_name (optional)\\tName of the partition to upsert data into.\\ntimeout (optional)\\tAn optional duration of time in seconds to allow for the RPC. If it is set to None, the client keeps waiting until the server responds or error occurs.\\nAfter upserting entities into a collection that has previously been indexed, you do not need to re-index the collection, as Milvus will automatically create an index for the newly upserted data. For more information, refer to Can indexes be created after inserting vectors?\\\\\\n\\\\\\n# Upsert Entities\\n## Flush data\\nWhen data is upserted into Milvus it is updated and inserted into segments. Segments have to reach a certain size to be sealed and indexed. Unsealed segments will be searched brute force. In order to avoid this with any remainder data, it is best to call flush(). The flush() call will seal any remaining segments and send them for indexing. It is important to only call this method at the end of an upsert session. Calling it too often will cause fragmented data that will need to be cleaned later on.\\\\\\n\\\\\\n# Upsert Entities\\n## Limits\\nUpdating primary key fields is not supported by upsert().\\nupsert() is not applicable and an error can occur if autoID is set to True for primary key fields.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.5456743240356445)]\n"
      ]
     }
    ],
@@ -243,7 +241,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Yes, users can delete entities by filtering non-primary fields using complex boolean expressions in Milvus. The complex boolean expressions allow users to define specific conditions to filter entities based on non-primary fields, such as word_count or book_name. By specifying the desired conditions in the boolean expression, users can delete entities that meet those conditions. However, it is important to note that deleting entities by complex boolean expressions is not an atomic operation, and if it fails halfway through, some data may still be deleted.\n"
+      "Users can delete entities by filtering non-primary fields using complex boolean expressions in Milvus.\n"
      ]
     }
    ],
@@ -314,9 +312,9 @@
     {
      "data": {
       "text/plain": [
-       "{'INGESTION': 'pipe-220572b2597efba9a91ed5',\n",
-       " 'SEARCH': 'pipe-8de59599229631c72d4d2c',\n",
-       " 'DELETION': 'pipe-2813fbf9eb09b352e81efa'}"
+       "{'INGESTION': 'pipe-9b58a7a79b25ae31467fa4',\n",
+       " 'SEARCH': 'pipe-ea117c9922961a565929eb',\n",
+       " 'DELETION': 'pipe-26d76179b259b67e641b33'}"
       ]
      },
      "execution_count": null,
@@ -353,7 +351,7 @@
     {
      "data": {
       "text/plain": [
-       "{'token_usage': 1247, 'doc_name': 'milvus_doc.md', 'num_chunks': 10}"
+       "{'token_usage': 1247, 'doc_name': 'milvus_doc.md', 'num_chunks': 4}"
       ]
      },
      "execution_count": null,
@@ -412,7 +410,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Yes, you can delete entities by filtering non-primary fields. Milvus supports deleting entities by complex boolean expressions, which allows you to filter entities based on specific conditions on non-primary fields. You can define complex boolean expressions using operators such as greater than or equal to, not equal to, and logical operators like AND and OR. By using these expressions, you can filter entities based on the values of non-primary fields and delete them accordingly.\n"
+      "Yes, you can delete entities by filtering non-primary fields using complex boolean expressions in Milvus.\n"
      ]
     }
    ],
diff --git a/docs/docs/examples/vector_stores/MilvusIndexDemo.ipynb b/docs/docs/examples/vector_stores/MilvusIndexDemo.ipynb
index 3fa41f2a33..66d6c69b44 100644
--- a/docs/docs/examples/vector_stores/MilvusIndexDemo.ipynb
+++ b/docs/docs/examples/vector_stores/MilvusIndexDemo.ipynb
@@ -53,7 +53,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! pip install llama-index"
+    "%pip install llama-index"
    ]
   },
   {
@@ -72,7 +72,6 @@
     "\n",
     "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document\n",
     "from llama_index.vector_stores.milvus import MilvusVectorStore\n",
-    "from IPython.display import Markdown, display\n",
     "import textwrap"
    ]
   },
@@ -138,7 +137,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Document ID: d33f0397-b51a-4455-9b0f-88a101254d95\n"
+      "Document ID: 4d28b248-a306-4a42-9ace-8fd88df7e484\n"
      ]
     }
    ],
@@ -158,7 +157,7 @@
     "### Create an index across the data\n",
     "Now that we have a document, we can can create an index and insert the document. For the index we will use a GPTMilvusIndex. GPTMilvusIndex takes in a few arguments:\n",
     "\n",
-    "- `uri (str, optional)`: The URI to connect to, comes in the form of \"http://address:port\". Defaults to \"http://localhost:19530\".\n",
+    "- `uri (str, optional)`: The URI to connect to, comes in the form of \"https://address:port\" if using Milvus or Zilliz Cloud service, or \"path/to/local/milvus.db\" is using a lite local Milvus. Defaults to \"http://localhost:19530\".\n",
     "- `token (str, optional)`: The token for log in. Empty if not using rbac, if using rbac it will most likely be \"username:password\". Defaults to \"\".\n",
     "- `collection_name (str, optional)`: The name of the collection where data will be stored. Defaults to \"llamalection\".\n",
     "- `dim (int, optional)`: The dimension of the embeddings. If it is not provided, collection creation will be done on first insert. Defaults to None.\n",
@@ -169,7 +168,7 @@
     "- `overwrite (bool, optional)`: Whether to overwrite existing collection with same name. Defaults to False.\n",
     "- `text_key (str, optional)`: What key text is stored in in the passed collection. Used when bringing your own collection. Defaults to None.\n",
     "- `index_config (dict, optional)`: The configuration used for building the Milvus index. Defaults to None.\n",
-    "- `search_config (dict, optional)`: The configuration used for searching the Milvus index. Note that this must be compatible with the index type specified by index_config. Defaults to None.\n"
+    "- `search_config (dict, optional)`: The configuration used for searching the Milvus index. Note that this must be compatible with the index type specified by index_config. Defaults to None."
    ]
   },
   {
@@ -183,7 +182,9 @@
     "from llama_index.core import StorageContext\n",
     "\n",
     "\n",
-    "vector_store = MilvusVectorStore(dim=1536, overwrite=True)\n",
+    "vector_store = MilvusVectorStore(\n",
+    "    uri=\"./milvus_demo.db\", dim=1536, overwrite=True\n",
+    ")\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
     "index = VectorStoreIndex.from_documents(\n",
     "    documents, storage_context=storage_context\n",
@@ -210,14 +211,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The author learned several things during their time at Interleaf. They learned that it's better for\n",
-      "technology companies to be run by product people than sales people, that code edited by too many\n",
-      "people leads to bugs, that cheap office space is not worth it if it's depressing, that planned\n",
-      "meetings are inferior to corridor conversations, that big bureaucratic customers can be a dangerous\n",
-      "source of money, and that there's not much overlap between conventional office hours and the optimal\n",
-      "time for hacking. However, the most important thing the author learned is that the low end eats the\n",
-      "high end, meaning that it's advantageous to be the \"entry level\" option because if you're not,\n",
-      "someone else will be and will surpass you.\n"
+      "The author learned about programming on early computers like the IBM 1401, where programs were typed\n",
+      "on punch cards, loaded into memory, and run to produce output. Later, with the advent of\n",
+      "microcomputers like the TRS-80, the author was able to have a computer directly in front of them,\n",
+      "responding to keystrokes in real-time. This shift in technology allowed the author to write simple\n",
+      "games, predictive programs, and even a word processor. Additionally, the author explored philosophy\n",
+      "in college but found it lacking in ultimate truths, leading to a switch to studying AI, inspired by\n",
+      "works like Heinlein's \"The Moon is a Harsh Mistress\" and seeing SHRDLU in a PBS documentary.\n"
      ]
     }
    ],
@@ -267,7 +267,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Res: I'm sorry, but based on the given context information, there is no information provided about the author.\n"
+      "Res: The author is the one who wrote the text or created the content.\n"
      ]
     }
    ],
@@ -329,7 +329,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Res: The author of the given context is Paul Graham.\n"
+      "Res: Paul Graham\n"
      ]
     }
    ],
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-milvus/llama_index/vector_stores/milvus/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-milvus/llama_index/vector_stores/milvus/base.py
index 88ec933574..106b1e2bba 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-milvus/llama_index/vector_stores/milvus/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-milvus/llama_index/vector_stores/milvus/base.py
@@ -72,7 +72,9 @@ class MilvusVectorStore(BasePydanticVectorStore):
 
     Args:
         uri (str, optional): The URI to connect to, comes in the form of
-            "http://address:port".
+            "https://address:port" for Milvus or Zilliz Cloud service,
+            or "path/to/local/milvus.db" for the lite local Milvus. Defaults to
+            "http://localhost:19530".
         token (str, optional): The token for log in. Empty if not using rbac, if
             using rbac it will most likely be "username:password".
         collection_name (str, optional): The name of the collection where data will be
-- 
GitLab