From 83c38975395daea13cb75046f9ffd691966642e5 Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Tue, 19 Nov 2024 17:51:45 -0800
Subject: [PATCH] fix: pinecone vector store (#1512)

---
 .changeset/sharp-plums-grab.md                |  5 ++
 e2e/.env.ci                                   |  3 +
 e2e/node/vector-store/pinecone.e2e.ts         | 66 +++++++++++++++++++
 .../src/vector-store/PineconeVectorStore.ts   | 34 +++++++---
 packages/llamaindex/src/vector-store/utils.ts | 14 +++-
 5 files changed, 111 insertions(+), 11 deletions(-)
 create mode 100644 .changeset/sharp-plums-grab.md
 create mode 100644 e2e/node/vector-store/pinecone.e2e.ts

diff --git a/.changeset/sharp-plums-grab.md b/.changeset/sharp-plums-grab.md
new file mode 100644
index 000000000..5c43df28c
--- /dev/null
+++ b/.changeset/sharp-plums-grab.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+fix: pinecone vector store search
diff --git a/e2e/.env.ci b/e2e/.env.ci
index 080df233e..25f35a7bd 100644
--- a/e2e/.env.ci
+++ b/e2e/.env.ci
@@ -1 +1,4 @@
 POSTGRES_USER=runner
+PINECONE_API_KEY=
+PINECONE_INDEX_NAME=
+PINECONE_NAMESPACE=
diff --git a/e2e/node/vector-store/pinecone.e2e.ts b/e2e/node/vector-store/pinecone.e2e.ts
new file mode 100644
index 000000000..e15b08e5e
--- /dev/null
+++ b/e2e/node/vector-store/pinecone.e2e.ts
@@ -0,0 +1,66 @@
+import { Document, MetadataMode } from "@llamaindex/core/schema";
+import { config } from "dotenv";
+import {
+  OpenAIEmbedding,
+  PineconeVectorStore,
+  VectorStoreIndex,
+} from "llamaindex";
+import assert from "node:assert";
+import { test } from "node:test";
+
+config({ path: [".env.local", ".env", ".env.ci"] });
+
+await test("pinecone", async (t) => {
+  if (
+    !process.env.PINECONE_API_KEY ||
+    !process.env.PINECONE_NAMESPACE ||
+    !process.env.PINECONE_INDEX_NAME
+  ) {
+    return t.skip(
+      "PINECONE_API_KEY, PINECONE_NAMESPACE, and PINECONE_INDEX_NAME must be set to run this test",
+    );
+  }
+  const openaiEmbedding = new OpenAIEmbedding({
+    model: "text-embedding-3-large",
+  });
+
+  const vectorStore = new PineconeVectorStore({
+    embeddingModel: openaiEmbedding,
+  });
+
+  t.after(async () => {
+    await vectorStore.clearIndex();
+  });
+
+  const index = await VectorStoreIndex.fromVectorStore(vectorStore);
+
+  const retriever = index.asRetriever({
+    similarityTopK: 3,
+  });
+  const text = "We are open from 9am to 5pm";
+
+  await vectorStore.add([
+    new Document({
+      text,
+      embedding: await openaiEmbedding.getTextEmbedding(text),
+    }),
+  ]);
+
+  const results = await retriever.retrieve({
+    query: "When are you open?",
+  });
+  results.every((result) => {
+    assert.ok(result.node.embedding instanceof Array);
+    result.node.embedding.every((embedding, idx) =>
+      assert.ok(
+        typeof embedding === "number",
+        `Embedding at index ${idx} should be a number`,
+      ),
+    );
+    assert.ok(typeof result.score === "number", "Score should be a number");
+    assert.ok(
+      result.node.getContent(MetadataMode.NONE).length > 0,
+      "Content should not be empty",
+    );
+  });
+});
diff --git a/packages/llamaindex/src/vector-store/PineconeVectorStore.ts b/packages/llamaindex/src/vector-store/PineconeVectorStore.ts
index 3b6105bae..e2dca1494 100644
--- a/packages/llamaindex/src/vector-store/PineconeVectorStore.ts
+++ b/packages/llamaindex/src/vector-store/PineconeVectorStore.ts
@@ -26,6 +26,7 @@ type PineconeParams = {
   chunkSize?: number;
   namespace?: string;
   textKey?: string;
+  apiKey?: string;
 } & VectorStoreBaseParams;
 
 /**
@@ -48,6 +49,8 @@ export class PineconeVectorStore extends BaseVectorStore {
   chunkSize: number;
   textKey: string;
 
+  apiKey: string;
+
   constructor(params?: PineconeParams) {
     super(params);
     this.indexName =
@@ -57,12 +60,19 @@ export class PineconeVectorStore extends BaseVectorStore {
       params?.chunkSize ??
       Number.parseInt(getEnv("PINECONE_CHUNK_SIZE") ?? "100");
     this.textKey = params?.textKey ?? "text";
+    const apiKey = params?.apiKey ?? getEnv("PINECONE_API_KEY");
+    if (!apiKey) {
+      throw new Error("PINECONE_API_KEY is required");
+    }
+    this.apiKey = apiKey;
   }
 
   private async getDb(): Promise<Pinecone> {
     if (!this.db) {
       const { Pinecone } = await import("@pinecone-database/pinecone");
-      this.db = await new Pinecone();
+      this.db = new Pinecone({
+        apiKey: this.apiKey,
+      });
     }
 
     return Promise.resolve(this.db);
@@ -100,7 +110,7 @@ export class PineconeVectorStore extends BaseVectorStore {
    */
   async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
     if (embeddingResults.length == 0) {
-      return Promise.resolve([]);
+      return [];
     }
 
     const idx: Index = await this.index();
@@ -110,10 +120,10 @@ export class PineconeVectorStore extends BaseVectorStore {
       const chunk = nodes.slice(i, i + this.chunkSize);
       const result = await this.saveChunk(idx, chunk);
       if (!result) {
-        return Promise.reject(new Error("Failed to save chunk"));
+        throw new Error("Failed to save chunk");
       }
     }
-    return Promise.resolve([]);
+    return [];
   }
 
   protected async saveChunk(idx: Index, chunk: PineconeRecord[]) {
@@ -157,13 +167,19 @@ export class PineconeVectorStore extends BaseVectorStore {
       topK: query.similarityTopK,
       includeValues: true,
       includeMetadata: true,
-      filter: filter,
     };
 
+    if (filter) {
+      defaultOptions.filter = filter;
+    }
+
     const idx = await this.index();
     const results = await idx.query(defaultOptions);
 
     const idList = results.matches.map((row) => row.id);
+    if (idList.length == 0) {
+      return { nodes: [], similarities: [], ids: [] };
+    }
     const records: FetchResponse = await idx.fetch(idList);
     const rows = Object.values(records.records);
 
@@ -179,13 +195,11 @@ export class PineconeVectorStore extends BaseVectorStore {
       return node;
     });
 
-    const ret = {
+    return {
       nodes: nodes,
       similarities: results.matches.map((row) => row.score || 999),
       ids: results.matches.map((row) => row.id),
     };
-
-    return Promise.resolve(ret);
   }
 
   /**
@@ -197,8 +211,8 @@ export class PineconeVectorStore extends BaseVectorStore {
     return Promise.resolve();
   }
 
-  toPineconeFilter(stdFilters?: MetadataFilters): object {
-    if (!stdFilters) return {};
+  toPineconeFilter(stdFilters?: MetadataFilters): object | undefined {
+    if (stdFilters == null) return undefined;
 
     const transformCondition = (
       condition: `${FilterCondition}` = "and",
diff --git a/packages/llamaindex/src/vector-store/utils.ts b/packages/llamaindex/src/vector-store/utils.ts
index 34b03cb0f..79e02062f 100644
--- a/packages/llamaindex/src/vector-store/utils.ts
+++ b/packages/llamaindex/src/vector-store/utils.ts
@@ -65,7 +65,19 @@ export function metadataDictToNode(
     }
   } else {
     nodeObj = JSON.parse(nodeContent);
-    nodeObj.metadata = rest;
+    nodeObj = {
+      ...rest,
+      ...options?.fallback,
+      ...nodeObj,
+    };
+    nodeObj.metadata = {
+      ...(options?.fallback &&
+      "metadata" in options.fallback &&
+      typeof options.fallback.metadata === "object"
+        ? options?.fallback.metadata
+        : {}),
+      ...rest,
+    };
   }
 
   // Note: we're using the name of the class stored in `_node_type`
-- 
GitLab