From 050cd53450ad7bafeff332a1a5c959a31c5edeb2 Mon Sep 17 00:00:00 2001
From: ANKIT VARSHNEY <132201033+AVtheking@users.noreply.github.com>
Date: Tue, 18 Mar 2025 21:33:42 +0530
Subject: [PATCH] fix: delete by id in pinecone vector store (#1758)

---
 .changeset/itchy-seas-enjoy.md                |  5 ++
 .../tests/node-parser/text-splitter.test.ts   |  1 +
 .../pinecone/src/PineconeVectorStore.ts       | 49 +++++++++++++++++--
 3 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 .changeset/itchy-seas-enjoy.md

diff --git a/.changeset/itchy-seas-enjoy.md b/.changeset/itchy-seas-enjoy.md
new file mode 100644
index 000000000..a97fec417
--- /dev/null
+++ b/.changeset/itchy-seas-enjoy.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/pinecone": minor
+---
+
+Fix deleting of document by id in PineconeVectorStore
diff --git a/packages/core/tests/node-parser/text-splitter.test.ts b/packages/core/tests/node-parser/text-splitter.test.ts
index 531896492..89e17fb57 100644
--- a/packages/core/tests/node-parser/text-splitter.test.ts
+++ b/packages/core/tests/node-parser/text-splitter.test.ts
@@ -126,6 +126,7 @@ describe("sentence splitter", () => {
       id_: docId,
       text: "This is a test sentence. This is another test sentence.",
     });
+
     const nodes = sentenceSplitter.getNodesFromDocuments([doc]);
     nodes.forEach((node) => {
       // test node id should match uuid regex
diff --git a/packages/providers/storage/pinecone/src/PineconeVectorStore.ts b/packages/providers/storage/pinecone/src/PineconeVectorStore.ts
index 83e335296..e32956838 100644
--- a/packages/providers/storage/pinecone/src/PineconeVectorStore.ts
+++ b/packages/providers/storage/pinecone/src/PineconeVectorStore.ts
@@ -117,7 +117,15 @@ export class PineconeVectorStore extends BaseVectorStore {
     }
 
     const idx: Index = await this.index();
-    const nodes = embeddingResults.map(this.nodeToRecord);
+    const nodes = embeddingResults.map((node) => {
+      const nodeRecord = this.nodeToRecord(node);
+
+      if (nodeRecord.metadata.ref_doc_id) {
+        // adding refDoc id as prefix to the chunk to find them using refDoc id
+        nodeRecord.id = `${nodeRecord.metadata.ref_doc_id}_chunk_${nodeRecord.id}`;
+      }
+      return nodeRecord;
+    });
 
     for (let i = 0; i < nodes.length; i += this.chunkSize) {
       const chunk = nodes.slice(i, i + this.chunkSize);
@@ -148,8 +156,43 @@ export class PineconeVectorStore extends BaseVectorStore {
    * @returns Promise that resolves if the delete query did not throw an error.
    */
   async delete(refDocId: string, deleteKwargs?: object): Promise<void> {
-    const idx = await this.index();
-    return idx.deleteOne(refDocId);
+    const [idx, index] = await Promise.all([
+      this.index(),
+      //to get the information about the index
+      this.db?.describeIndex(this.indexName),
+    ]);
+
+    if (index?.spec?.pod) {
+      //if the index is a pod, delete the document by the metadata
+      await idx.deleteMany({
+        metadata: {
+          ref_doc_id: refDocId,
+        },
+      });
+    } else if (index?.spec?.serverless) {
+      // filtering on metadata is not supported in serverless indexes
+      // for serverless indexes, we can delete document by ID prefix
+      // ref:https://docs.pinecone.io/guides/data/delete-data#delete-records-by-metadata
+      // get the list of ids with the prefix (not supportered in non serverless indexes)
+      let list = await idx.listPaginated({
+        prefix: refDocId,
+      });
+      //do while loop to delete the document if there is no next paginationToken
+      do {
+        const ids = list?.vectors?.map((v) => v.id);
+
+        if (ids && ids.length > 0) {
+          await idx.deleteMany(ids);
+        }
+
+        if (list.pagination?.next) {
+          list = await idx.listPaginated({
+            prefix: refDocId,
+            paginationToken: list.pagination?.next,
+          });
+        }
+      } while (list.pagination?.next);
+    }
   }
 
   /**
-- 
GitLab