Skip to content
Snippets Groups Projects
Commit b3fd87f3 authored by Michael Tutty's avatar Michael Tutty Committed by Marcus Schiesser
Browse files

Add PineconeVectorStore

parent a67f9aaa
Branches tolga/fix-function-call
Tags v0.0.43
No related merge requests found
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
"@datastax/astra-db-ts": "^0.1.2", "@datastax/astra-db-ts": "^0.1.2",
"@mistralai/mistralai": "^0.0.7", "@mistralai/mistralai": "^0.0.7",
"@notionhq/client": "^2.2.14", "@notionhq/client": "^2.2.14",
"@pinecone-database/pinecone": "^1.1.2",
"@xenova/transformers": "^2.10.0", "@xenova/transformers": "^2.10.0",
"assemblyai": "^4.0.0", "assemblyai": "^4.0.0",
"compromise": "^14.10.1", "compromise": "^14.10.1",
......
import { VectorStore, VectorStoreQuery, VectorStoreQueryResult } from "./types";
import { ExactMatchFilter, MetadataFilters } from "../../../dist";
import { BaseNode, Document, Metadata, MetadataMode } from "../../Node";
import { GenericFileSystem } from "../FileSystem";
import {
FetchResponse,
Index,
Pinecone,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";
type PineconeParams = {
indexName?: string;
chunkSize?: number;
};
/**
* Provides support for writing and querying vector data in Postgres.
*/
export class PineconeVectorStore implements VectorStore {
storesText: boolean = true;
/*
FROM @pinecone-database/pinecone:
PINECONE_API_KEY="your_api_key"
PINECONE_ENVIRONMENT="your_environment"
Our addition:
PINECONE_INDEX_NAME="llama"
PINECONE_CHUNK_SIZE=100
*/
db?: Pinecone;
indexName: string;
chunkSize: number;
constructor(params?: PineconeParams) {
this.indexName =
params?.indexName ?? process.env.PINECONE_INDEX_NAME ?? "llama";
this.chunkSize =
params?.chunkSize ??
Number.parseInt(process.env.PINECONE_CHUNK_SIZE ?? "100");
}
private async getDb(): Promise<Pinecone> {
if (!this.db) {
this.db = await new Pinecone();
}
return Promise.resolve(this.db);
}
/**
* Connects to the Pinecone account specified in environment vars.
* This method also checks and creates the named index if not found.
* @returns Pinecone client, or the error encountered while connecting/setting up.
*/
client() {
return this.getDb();
}
async index() {
const db: Pinecone = await this.getDb();
return await db.index(this.indexName);
}
/**
* Delete all records for the current index.
* NOTE: This operation is not supported by Pinecone for "Starter" (free) indexes.
* @returns The result of the delete query.
*/
async clearIndex() {
const db: Pinecone = await this.getDb();
return await db.index(this.indexName).deleteAll();
}
/**
* Adds vector record(s) to the table.
* @TODO Does not create or insert sparse vectors.
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
* @returns Due to limitations in the Pinecone client, does not return the upserted ID list, only a Promise resolve/reject.
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length == 0) {
return Promise.resolve([]);
}
const idx: Index = await this.index();
const nodes = embeddingResults.map(this.nodeToRecord);
for (let i = 0; i < nodes.length; i += this.chunkSize) {
const chunk = nodes.slice(i, i + this.chunkSize);
const result = await this.saveChunk(idx, chunk);
if (!result) {
return Promise.reject();
}
}
return Promise.resolve([]);
}
protected async saveChunk(idx: Index, chunk: any) {
try {
await idx.upsert(chunk);
return true;
} catch (err) {
const msg = `${err}`;
console.log(msg, err);
return false;
}
}
/**
* Deletes a single record from the database by id.
* NOTE: Uses the collection property controlled by setCollection/getCollection.
* @param refDocId Unique identifier for the record to delete.
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
* @returns Promise that resolves if the delete query did not throw an error.
*/
async delete(refDocId: string, deleteKwargs?: any): Promise<void> {
const idx = await this.index();
return idx.deleteOne(refDocId);
}
/**
* Query the vector store for the closest matching data to the query embeddings
* @TODO QUERY TYPES
* @param query The VectorStoreQuery to be used
* @param options Required by VectorStore interface. Currently ignored.
* @returns Zero or more Document instances with data from the vector store.
*/
async query(
query: VectorStoreQuery,
options?: any,
): Promise<VectorStoreQueryResult> {
const filter = this.toPineconeFilter(query.filters);
var options: any = {
vector: query.queryEmbedding,
topK: query.similarityTopK,
// include_values: true,
// include_metadara: true,
// filter: filter
};
const idx = await this.index();
const results = await idx.query(options);
const idList = results.matches.map((row) => row.id);
const records: FetchResponse<any> = await idx.fetch(idList);
const rows = Object.values(records.records);
const nodes = rows.map((row) => {
return new Document({
id_: row.id,
text: this.textFromResultRow(row),
metadata: this.metaWithoutText(row.metadata),
embedding: row.values,
});
});
const ret = {
nodes: nodes,
similarities: results.matches.map((row) => row.score || 999),
ids: results.matches.map((row) => row.id),
};
return Promise.resolve(ret);
}
/**
* Required by VectorStore interface. Currently ignored.
* @param persistPath
* @param fs
* @returns Resolved Promise.
*/
persist(
persistPath: string,
fs?: GenericFileSystem | undefined,
): Promise<void> {
return Promise.resolve();
}
toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return carry;
}, {});
}
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
return row.metadata?.text ?? "";
}
metaWithoutText(meta: Metadata): any {
return Object.keys(meta)
.filter((key) => key != "text")
.reduce((acc: any, key: string) => {
acc[key] = meta[key];
return acc;
}, {});
}
nodeToRecord(node: BaseNode<Metadata>) {
let id: any = node.id_.length ? node.id_ : null;
let meta: any = node.metadata || {};
meta.create_date = new Date();
meta.text = node.getContent(MetadataMode.EMBED);
return {
id: id,
values: node.getEmbedding(),
metadata: meta,
};
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment