Skip to content
Snippets Groups Projects
Unverified Commit 83c38975 authored by Alex Yang's avatar Alex Yang Committed by GitHub
Browse files

fix: pinecone vector store (#1512)

parent e1cbce1f
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
---
fix: pinecone vector store search
POSTGRES_USER=runner
PINECONE_API_KEY=
PINECONE_INDEX_NAME=
PINECONE_NAMESPACE=
import { Document, MetadataMode } from "@llamaindex/core/schema";
import { config } from "dotenv";
import {
OpenAIEmbedding,
PineconeVectorStore,
VectorStoreIndex,
} from "llamaindex";
import assert from "node:assert";
import { test } from "node:test";
config({ path: [".env.local", ".env", ".env.ci"] });
await test("pinecone", async (t) => {
if (
!process.env.PINECONE_API_KEY ||
!process.env.PINECONE_NAMESPACE ||
!process.env.PINECONE_INDEX_NAME
) {
return t.skip(
"PINECONE_API_KEY, PINECONE_NAMESPACE, and PINECONE_INDEX_NAME must be set to run this test",
);
}
const openaiEmbedding = new OpenAIEmbedding({
model: "text-embedding-3-large",
});
const vectorStore = new PineconeVectorStore({
embeddingModel: openaiEmbedding,
});
t.after(async () => {
await vectorStore.clearIndex();
});
const index = await VectorStoreIndex.fromVectorStore(vectorStore);
const retriever = index.asRetriever({
similarityTopK: 3,
});
const text = "We are open from 9am to 5pm";
await vectorStore.add([
new Document({
text,
embedding: await openaiEmbedding.getTextEmbedding(text),
}),
]);
const results = await retriever.retrieve({
query: "When are you open?",
});
results.every((result) => {
assert.ok(result.node.embedding instanceof Array);
result.node.embedding.every((embedding, idx) =>
assert.ok(
typeof embedding === "number",
`Embedding at index ${idx} should be a number`,
),
);
assert.ok(typeof result.score === "number", "Score should be a number");
assert.ok(
result.node.getContent(MetadataMode.NONE).length > 0,
"Content should not be empty",
);
});
});
......@@ -26,6 +26,7 @@ type PineconeParams = {
chunkSize?: number;
namespace?: string;
textKey?: string;
apiKey?: string;
} & VectorStoreBaseParams;
/**
......@@ -48,6 +49,8 @@ export class PineconeVectorStore extends BaseVectorStore {
chunkSize: number;
textKey: string;
apiKey: string;
constructor(params?: PineconeParams) {
super(params);
this.indexName =
......@@ -57,12 +60,19 @@ export class PineconeVectorStore extends BaseVectorStore {
params?.chunkSize ??
Number.parseInt(getEnv("PINECONE_CHUNK_SIZE") ?? "100");
this.textKey = params?.textKey ?? "text";
const apiKey = params?.apiKey ?? getEnv("PINECONE_API_KEY");
if (!apiKey) {
throw new Error("PINECONE_API_KEY is required");
}
this.apiKey = apiKey;
}
private async getDb(): Promise<Pinecone> {
if (!this.db) {
const { Pinecone } = await import("@pinecone-database/pinecone");
this.db = await new Pinecone();
this.db = new Pinecone({
apiKey: this.apiKey,
});
}
return Promise.resolve(this.db);
......@@ -100,7 +110,7 @@ export class PineconeVectorStore extends BaseVectorStore {
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length == 0) {
return Promise.resolve([]);
return [];
}
const idx: Index = await this.index();
......@@ -110,10 +120,10 @@ export class PineconeVectorStore extends BaseVectorStore {
const chunk = nodes.slice(i, i + this.chunkSize);
const result = await this.saveChunk(idx, chunk);
if (!result) {
return Promise.reject(new Error("Failed to save chunk"));
throw new Error("Failed to save chunk");
}
}
return Promise.resolve([]);
return [];
}
protected async saveChunk(idx: Index, chunk: PineconeRecord[]) {
......@@ -157,13 +167,19 @@ export class PineconeVectorStore extends BaseVectorStore {
topK: query.similarityTopK,
includeValues: true,
includeMetadata: true,
filter: filter,
};
if (filter) {
defaultOptions.filter = filter;
}
const idx = await this.index();
const results = await idx.query(defaultOptions);
const idList = results.matches.map((row) => row.id);
if (idList.length == 0) {
return { nodes: [], similarities: [], ids: [] };
}
const records: FetchResponse = await idx.fetch(idList);
const rows = Object.values(records.records);
......@@ -179,13 +195,11 @@ export class PineconeVectorStore extends BaseVectorStore {
return node;
});
const ret = {
return {
nodes: nodes,
similarities: results.matches.map((row) => row.score || 999),
ids: results.matches.map((row) => row.id),
};
return Promise.resolve(ret);
}
/**
......@@ -197,8 +211,8 @@ export class PineconeVectorStore extends BaseVectorStore {
return Promise.resolve();
}
toPineconeFilter(stdFilters?: MetadataFilters): object {
if (!stdFilters) return {};
toPineconeFilter(stdFilters?: MetadataFilters): object | undefined {
if (stdFilters == null) return undefined;
const transformCondition = (
condition: `${FilterCondition}` = "and",
......
......@@ -65,7 +65,19 @@ export function metadataDictToNode(
}
} else {
nodeObj = JSON.parse(nodeContent);
nodeObj.metadata = rest;
nodeObj = {
...rest,
...options?.fallback,
...nodeObj,
};
nodeObj.metadata = {
...(options?.fallback &&
"metadata" in options.fallback &&
typeof options.fallback.metadata === "object"
? options?.fallback.metadata
: {}),
...rest,
};
}
// Note: we're using the name of the class stored in `_node_type`
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment