From 9e73564c6d39fccc71c90d2e51e88c72fe49410e Mon Sep 17 00:00:00 2001 From: Jael Gu <mengjia.gu@zilliz.com> Date: Wed, 27 Dec 2023 00:57:32 +0800 Subject: [PATCH] Update zcp docs (#9698) --- .../community/integrations/managed_indices.md | 18 +++--- docs/examples/managed/zcpDemo.ipynb | 63 +++++++++++-------- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/docs/community/integrations/managed_indices.md b/docs/community/integrations/managed_indices.md index f25f6ba3d4..2ec4d23ba4 100644 --- a/docs/community/integrations/managed_indices.md +++ b/docs/community/integrations/managed_indices.md @@ -122,24 +122,26 @@ zcp_index = ZillizCloudPipelineIndex.from_document_url( url="https://publicdataset.zillizcloud.com/milvus_doc.md", cluster_id=os.getenv("ZILLIZ_CLUSTER_ID"), token=os.getenv("ZILLIZ_TOKEN"), - metadata={"version": "2.3"}, # optional + metadata={"version": "2.3"}, ) -# Insert more docs into index, eg. a Milvus v2.0 document +# Insert more docs into index, eg. a Milvus v2.2 document zcp_index.insert_doc_url( - url="https://milvus.io/docs/v2.0.x/delete_data.md", - metadata={"version": "2.0"}, + url="https://publicdataset.zillizcloud.com/milvus_doc_22.md", + metadata={"version": "2.2"}, ) # Query index from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters -query_engine_with_filters = zcp_index.as_query_engine( +query_engine_milvus23 = zcp_index.as_query_engine( search_top_k=3, filters=MetadataFilters( - filters=[ExactMatchFilter(key="version", value="2.3")] - ), # optional, here we will only retrieve info of Milvus 2.3 - output_metadata=["version"], # optional + filters=[ + ExactMatchFilter(key="version", value="2.3") + ] # version == "2.3" + ), + output_metadata=["version"], ) ``` diff --git a/docs/examples/managed/zcpDemo.ipynb b/docs/examples/managed/zcpDemo.ipynb index 1b8dc253cd..7ed859e084 100644 --- a/docs/examples/managed/zcpDemo.ipynb +++ b/docs/examples/managed/zcpDemo.ipynb @@ -81,13 +81,13 @@ " url=\"https://publicdataset.zillizcloud.com/milvus_doc.md\", # a public or pre-signed url of a file stored on s3 or gcs\n", " cluster_id=ZILLIZ_CLUSTER_ID,\n", " token=ZILLIZ_TOKEN,\n", - " metadata={\"version\": \"2.3\"}, # optional\n", + " metadata={\"version\": \"2.3\"},\n", ")\n", "\n", - "# Insert more docs, eg. a Milvus v2.0 document\n", + "# Insert more docs, eg. a Milvus v2.2 document\n", "zcp_index.insert_doc_url(\n", - " url=\"https://milvus.io/docs/v2.0.x/delete_data.md\",\n", - " metadata={\"version\": \"2.0\"},\n", + " url=\"https://publicdataset.zillizcloud.com/milvus_doc_22.md\",\n", + " metadata={\"version\": \"2.2\"},\n", ")" ] }, @@ -96,8 +96,6 @@ "id": "d16a498e", "metadata": {}, "source": [ - "- It is optional to add metadata for each document.\n", - "\n", "### From Local File\n", "\n", "Coming soon.\n", @@ -114,11 +112,13 @@ "source": [ "## Working as Query Engine\n", "\n", - "A Zilliz Cloud Pipeline's Index can work as a Query Engine in Llama-Index.\n", + "A Zilliz Cloud Pipeline's Index can work as a Query Engine in LlamaIndex.\n", "It allows users to customize some parameters:\n", "- search_top_k: How many text nodes/chunks retrieved. Optional, defaults to `DEFAULT_SIMILARITY_TOP_K` (2).\n", "- filters: Metadata filters. Optional, defaults to None.\n", - "- output_metadata: What metadata fields included in each retrieved text node. Optional, defaults to []." + "- output_metadata: What metadata fields included in each retrieved text node. Optional, defaults to [].\n", + "\n", + "It is optional to apply filters. For example, if we want to ask about Milvus 2.3, then we can set version as 2.3 in filters." ] }, { @@ -139,12 +139,14 @@ "\n", "from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters\n", "\n", - "query_engine_with_filters = zcp_index.as_query_engine(\n", + "query_engine_milvus23 = zcp_index.as_query_engine(\n", " search_top_k=3,\n", " filters=MetadataFilters(\n", - " filters=[ExactMatchFilter(key=\"version\", value=\"2.3\")]\n", - " ), # optional, here we will only retrieve info of Milvus 2.3\n", - " output_metadata=[\"version\"], # optional\n", + " filters=[\n", + " ExactMatchFilter(key=\"version\", value=\"2.3\")\n", + " ] # version == \"2.3\"\n", + " ),\n", + " output_metadata=[\"version\"],\n", ")" ] }, @@ -153,7 +155,7 @@ "id": "9803232e", "metadata": {}, "source": [ - "Then the query engine is ready for Semantic Search or Retrieval Augmented Generation:\n", + "Then the query engine is ready for Semantic Search or Retrieval Augmented Generation with Milvus 2.3 documents:\n", "\n", "- **Retrieve** (Semantic search powered by Zilliz Cloud Pipeline's Index):" ] @@ -163,18 +165,19 @@ "execution_count": null, "id": "8ab92af7", "metadata": {}, - "outputs": [], - "source": [ - "question = \"Can users delete entities by complex boolean expressions?\"\n", - "query_engine_with_filters.retrieve(question)" - ] - }, - { - "cell_type": "markdown", - "id": "a503d6e0", - "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NodeWithScore(node=TextNode(id_='446268394525283746', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='c3254bc65319b52914d6e68fbce69161fcf0e2998e4619287a8560258a2fe53d', text='Delete Entities\\nThis topic describes how to delete entities in Milvus.\\nMilvus supports deleting entities by primary key or complex boolean expressions. Deleting entities by primary key is much faster and lighter than deleting them by complex boolean expressions. This is because Milvus executes queries first when deleting data by complex boolean expressions.\\nDeleted entities can still be retrieved immediately after the deletion if the consistency level is set lower than Strong.\\nEntities deleted beyond the pre-specified span of time for Time Travel cannot be retrieved again.\\nFrequent deletion operations will impact the system performance.\\nBefore deleting entities by comlpex boolean expressions, make sure the collection has been loaded.\\nDeleting entities by complex boolean expressions is not an atomic operation. Therefore, if it fails halfway through, some data may still be deleted.\\nDeleting entities by complex boolean expressions is supported only when the consistency is set to Bounded. For details, see Consistency.\\nPrepare boolean expression\\nPrepare the boolean expression that filters the entities to delete.\\nMilvus supports deleting entities by primary key or complex boolean expressions. For more information on expression rules and supported operators, see Boolean Expression Rules.\\nSimple boolean expression\\nUse a simple expression to filter data with primary key values of 0 and 1:\\npython\\nexpr = \"book_id in [0,1]\"\\nComplex boolean expression', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8668166995048523), NodeWithScore(node=TextNode(id_='446268394525283747', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='3ec8b3a992fb72d081145b7859c70453dc9d71be714c0f5f99ad2b2c2cb1f7ea', text='To filter entities that meet specific conditions, define complex boolean expressions.\\nFilter entities whose word_count is greater than or equal to 11000:\\npython\\nexpr = \"word_count >= 11000\"\\nFilter entities whose book_name is not Unknown:\\npython\\nexpr = \"book_name != Unknown\"\\nFilter entities whose primary key values are greater than 5 and word_count is smaller than or equal to 9999:\\npython\\nexpr = \"book_id > 5 && word_count <= 9999\"\\nDelete entities\\nDelete the entities with the boolean expression you created. Milvus returns the ID list of the deleted entities.\\npython\\nfrom pymilvus import Collection\\ncollection = Collection(\"book\") # Get an existing collection.\\ncollection.delete(expr)\\nParameter Description\\nexpr Boolean expression that specifies the entities to delete.\\npartition_name (optional) Name of the partition to delete entities from.\\nUpsert Entities\\nThis topic describes how to upsert entities in Milvus.\\nUpserting is a combination of insert and delete operations. In the context of a Milvus vector database, an upsert is a data-level operation that will overwrite an existing entity if a specified field already exists in a collection, and insert a new entity if the specified value doesn’t already exist.\\nThe following example upserts 3,000 rows of randomly generated data as the example data. When performing upsert operations, it\\'s important to note that the operation may compromise performance. This is because the operation involves deleting data during execution.\\nPrepare data', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.841397762298584), NodeWithScore(node=TextNode(id_='446268394525283749', embedding=None, metadata={'version': '2.3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='42656e32ce6baa2897419b8bae612412db94f1d570ab1702f2ae6c5557f248a9', text='When data is upserted into Milvus it is updated and inserted into segments. Segments have to reach a certain size to be sealed and indexed. Unsealed segments will be searched brute force. In order to avoid this with any remainder data, it is best to call flush(). The flush() call will seal any remaining segments and send them for indexing. It is important to only call this method at the end of an upsert session. Calling it too often will cause fragmented data that will need to be cleaned later on.\\nLimits\\nUpdating primary key fields is not supported by upsert().\\nupsert() is not applicable and an error can occur if autoID is set to True for primary key fields.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.793336033821106)]\n" + ] + } + ], "source": [ - "> The query engine with filters retrieves only text nodes with version 2.3." + "question = \"Can users delete entities by filtering non-primary fields?\"\n", + "retrieved_nodes = query_engine_milvus23.retrieve(question)\n", + "print(retrieved_nodes)" ] }, { @@ -190,9 +193,17 @@ "execution_count": null, "id": "fc7b01b7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Yes, users can delete entities by filtering non-primary fields. Milvus supports deleting entities by complex boolean expressions, which can include conditions based on non-primary fields. Users can define complex boolean expressions to filter entities based on specific conditions and then delete those entities using the expression.\n" + ] + } + ], "source": [ - "response = query_engine_with_filters.query(question)\n", + "response = query_engine_milvus23.query(question)\n", "print(response.response)" ] } -- GitLab