diff --git a/.changeset/sharp-plums-grab.md b/.changeset/sharp-plums-grab.md new file mode 100644 index 0000000000000000000000000000000000000000..5c43df28c47f59f1b9d70be36e5ba73154d25ffd --- /dev/null +++ b/.changeset/sharp-plums-grab.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +fix: pinecone vector store search diff --git a/e2e/.env.ci b/e2e/.env.ci index 080df233eff9547bc9566a3803f3fabefd418c95..25f35a7bdbf360b1d0800c0a4006d34306890a63 100644 --- a/e2e/.env.ci +++ b/e2e/.env.ci @@ -1 +1,4 @@ POSTGRES_USER=runner +PINECONE_API_KEY= +PINECONE_INDEX_NAME= +PINECONE_NAMESPACE= diff --git a/e2e/node/vector-store/pinecone.e2e.ts b/e2e/node/vector-store/pinecone.e2e.ts new file mode 100644 index 0000000000000000000000000000000000000000..e15b08e5e1b10c569e5e33c0aaaf0d2830103a70 --- /dev/null +++ b/e2e/node/vector-store/pinecone.e2e.ts @@ -0,0 +1,66 @@ +import { Document, MetadataMode } from "@llamaindex/core/schema"; +import { config } from "dotenv"; +import { + OpenAIEmbedding, + PineconeVectorStore, + VectorStoreIndex, +} from "llamaindex"; +import assert from "node:assert"; +import { test } from "node:test"; + +config({ path: [".env.local", ".env", ".env.ci"] }); + +await test("pinecone", async (t) => { + if ( + !process.env.PINECONE_API_KEY || + !process.env.PINECONE_NAMESPACE || + !process.env.PINECONE_INDEX_NAME + ) { + return t.skip( + "PINECONE_API_KEY, PINECONE_NAMESPACE, and PINECONE_INDEX_NAME must be set to run this test", + ); + } + const openaiEmbedding = new OpenAIEmbedding({ + model: "text-embedding-3-large", + }); + + const vectorStore = new PineconeVectorStore({ + embeddingModel: openaiEmbedding, + }); + + t.after(async () => { + await vectorStore.clearIndex(); + }); + + const index = await VectorStoreIndex.fromVectorStore(vectorStore); + + const retriever = index.asRetriever({ + similarityTopK: 3, + }); + const text = "We are open from 9am to 5pm"; + + await vectorStore.add([ + new Document({ + text, + embedding: await openaiEmbedding.getTextEmbedding(text), + }), + ]); + + const results = await retriever.retrieve({ + query: "When are you open?", + }); + results.every((result) => { + assert.ok(result.node.embedding instanceof Array); + result.node.embedding.every((embedding, idx) => + assert.ok( + typeof embedding === "number", + `Embedding at index ${idx} should be a number`, + ), + ); + assert.ok(typeof result.score === "number", "Score should be a number"); + assert.ok( + result.node.getContent(MetadataMode.NONE).length > 0, + "Content should not be empty", + ); + }); +}); diff --git a/packages/llamaindex/src/vector-store/PineconeVectorStore.ts b/packages/llamaindex/src/vector-store/PineconeVectorStore.ts index 3b6105baef2a52f9a44ce29e8867805c0ed33c55..e2dca1494e9f195a7e5135822c10d94ceeeef63d 100644 --- a/packages/llamaindex/src/vector-store/PineconeVectorStore.ts +++ b/packages/llamaindex/src/vector-store/PineconeVectorStore.ts @@ -26,6 +26,7 @@ type PineconeParams = { chunkSize?: number; namespace?: string; textKey?: string; + apiKey?: string; } & VectorStoreBaseParams; /** @@ -48,6 +49,8 @@ export class PineconeVectorStore extends BaseVectorStore { chunkSize: number; textKey: string; + apiKey: string; + constructor(params?: PineconeParams) { super(params); this.indexName = @@ -57,12 +60,19 @@ export class PineconeVectorStore extends BaseVectorStore { params?.chunkSize ?? Number.parseInt(getEnv("PINECONE_CHUNK_SIZE") ?? "100"); this.textKey = params?.textKey ?? "text"; + const apiKey = params?.apiKey ?? getEnv("PINECONE_API_KEY"); + if (!apiKey) { + throw new Error("PINECONE_API_KEY is required"); + } + this.apiKey = apiKey; } private async getDb(): Promise<Pinecone> { if (!this.db) { const { Pinecone } = await import("@pinecone-database/pinecone"); - this.db = await new Pinecone(); + this.db = new Pinecone({ + apiKey: this.apiKey, + }); } return Promise.resolve(this.db); @@ -100,7 +110,7 @@ export class PineconeVectorStore extends BaseVectorStore { */ async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> { if (embeddingResults.length == 0) { - return Promise.resolve([]); + return []; } const idx: Index = await this.index(); @@ -110,10 +120,10 @@ export class PineconeVectorStore extends BaseVectorStore { const chunk = nodes.slice(i, i + this.chunkSize); const result = await this.saveChunk(idx, chunk); if (!result) { - return Promise.reject(new Error("Failed to save chunk")); + throw new Error("Failed to save chunk"); } } - return Promise.resolve([]); + return []; } protected async saveChunk(idx: Index, chunk: PineconeRecord[]) { @@ -157,13 +167,19 @@ export class PineconeVectorStore extends BaseVectorStore { topK: query.similarityTopK, includeValues: true, includeMetadata: true, - filter: filter, }; + if (filter) { + defaultOptions.filter = filter; + } + const idx = await this.index(); const results = await idx.query(defaultOptions); const idList = results.matches.map((row) => row.id); + if (idList.length == 0) { + return { nodes: [], similarities: [], ids: [] }; + } const records: FetchResponse = await idx.fetch(idList); const rows = Object.values(records.records); @@ -179,13 +195,11 @@ export class PineconeVectorStore extends BaseVectorStore { return node; }); - const ret = { + return { nodes: nodes, similarities: results.matches.map((row) => row.score || 999), ids: results.matches.map((row) => row.id), }; - - return Promise.resolve(ret); } /** @@ -197,8 +211,8 @@ export class PineconeVectorStore extends BaseVectorStore { return Promise.resolve(); } - toPineconeFilter(stdFilters?: MetadataFilters): object { - if (!stdFilters) return {}; + toPineconeFilter(stdFilters?: MetadataFilters): object | undefined { + if (stdFilters == null) return undefined; const transformCondition = ( condition: `${FilterCondition}` = "and", diff --git a/packages/llamaindex/src/vector-store/utils.ts b/packages/llamaindex/src/vector-store/utils.ts index 34b03cb0f9b6815c21ddb5626b48139605d2bc9e..79e02062f7d0fed92283e0eb0b28ce5c33869a25 100644 --- a/packages/llamaindex/src/vector-store/utils.ts +++ b/packages/llamaindex/src/vector-store/utils.ts @@ -65,7 +65,19 @@ export function metadataDictToNode( } } else { nodeObj = JSON.parse(nodeContent); - nodeObj.metadata = rest; + nodeObj = { + ...rest, + ...options?.fallback, + ...nodeObj, + }; + nodeObj.metadata = { + ...(options?.fallback && + "metadata" in options.fallback && + typeof options.fallback.metadata === "object" + ? options?.fallback.metadata + : {}), + ...rest, + }; } // Note: we're using the name of the class stored in `_node_type`