Skip to content
Snippets Groups Projects
Commit b3fd87f3 authored by Michael Tutty's avatar Michael Tutty Committed by Marcus Schiesser
Browse files

Add PineconeVectorStore

parent a67f9aaa
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@
"@datastax/astra-db-ts": "^0.1.2",
"@mistralai/mistralai": "^0.0.7",
"@notionhq/client": "^2.2.14",
"@pinecone-database/pinecone": "^1.1.2",
"@xenova/transformers": "^2.10.0",
"assemblyai": "^4.0.0",
"compromise": "^14.10.1",
......
import { VectorStore, VectorStoreQuery, VectorStoreQueryResult } from "./types";
import { ExactMatchFilter, MetadataFilters } from "../../../dist";
import { BaseNode, Document, Metadata, MetadataMode } from "../../Node";
import { GenericFileSystem } from "../FileSystem";
import {
FetchResponse,
Index,
Pinecone,
ScoredPineconeRecord,
} from "@pinecone-database/pinecone";
type PineconeParams = {
indexName?: string;
chunkSize?: number;
};
/**
* Provides support for writing and querying vector data in Postgres.
*/
export class PineconeVectorStore implements VectorStore {
storesText: boolean = true;
/*
FROM @pinecone-database/pinecone:
PINECONE_API_KEY="your_api_key"
PINECONE_ENVIRONMENT="your_environment"
Our addition:
PINECONE_INDEX_NAME="llama"
PINECONE_CHUNK_SIZE=100
*/
db?: Pinecone;
indexName: string;
chunkSize: number;
constructor(params?: PineconeParams) {
this.indexName =
params?.indexName ?? process.env.PINECONE_INDEX_NAME ?? "llama";
this.chunkSize =
params?.chunkSize ??
Number.parseInt(process.env.PINECONE_CHUNK_SIZE ?? "100");
}
private async getDb(): Promise<Pinecone> {
if (!this.db) {
this.db = await new Pinecone();
}
return Promise.resolve(this.db);
}
/**
* Connects to the Pinecone account specified in environment vars.
* This method also checks and creates the named index if not found.
* @returns Pinecone client, or the error encountered while connecting/setting up.
*/
client() {
return this.getDb();
}
async index() {
const db: Pinecone = await this.getDb();
return await db.index(this.indexName);
}
/**
* Delete all records for the current index.
* NOTE: This operation is not supported by Pinecone for "Starter" (free) indexes.
* @returns The result of the delete query.
*/
async clearIndex() {
const db: Pinecone = await this.getDb();
return await db.index(this.indexName).deleteAll();
}
/**
* Adds vector record(s) to the table.
* @TODO Does not create or insert sparse vectors.
* @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
* @returns Due to limitations in the Pinecone client, does not return the upserted ID list, only a Promise resolve/reject.
*/
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
if (embeddingResults.length == 0) {
return Promise.resolve([]);
}
const idx: Index = await this.index();
const nodes = embeddingResults.map(this.nodeToRecord);
for (let i = 0; i < nodes.length; i += this.chunkSize) {
const chunk = nodes.slice(i, i + this.chunkSize);
const result = await this.saveChunk(idx, chunk);
if (!result) {
return Promise.reject();
}
}
return Promise.resolve([]);
}
protected async saveChunk(idx: Index, chunk: any) {
try {
await idx.upsert(chunk);
return true;
} catch (err) {
const msg = `${err}`;
console.log(msg, err);
return false;
}
}
/**
* Deletes a single record from the database by id.
* NOTE: Uses the collection property controlled by setCollection/getCollection.
* @param refDocId Unique identifier for the record to delete.
* @param deleteKwargs Required by VectorStore interface. Currently ignored.
* @returns Promise that resolves if the delete query did not throw an error.
*/
async delete(refDocId: string, deleteKwargs?: any): Promise<void> {
const idx = await this.index();
return idx.deleteOne(refDocId);
}
/**
* Query the vector store for the closest matching data to the query embeddings
* @TODO QUERY TYPES
* @param query The VectorStoreQuery to be used
* @param options Required by VectorStore interface. Currently ignored.
* @returns Zero or more Document instances with data from the vector store.
*/
async query(
query: VectorStoreQuery,
options?: any,
): Promise<VectorStoreQueryResult> {
const filter = this.toPineconeFilter(query.filters);
var options: any = {
vector: query.queryEmbedding,
topK: query.similarityTopK,
// include_values: true,
// include_metadara: true,
// filter: filter
};
const idx = await this.index();
const results = await idx.query(options);
const idList = results.matches.map((row) => row.id);
const records: FetchResponse<any> = await idx.fetch(idList);
const rows = Object.values(records.records);
const nodes = rows.map((row) => {
return new Document({
id_: row.id,
text: this.textFromResultRow(row),
metadata: this.metaWithoutText(row.metadata),
embedding: row.values,
});
});
const ret = {
nodes: nodes,
similarities: results.matches.map((row) => row.score || 999),
ids: results.matches.map((row) => row.id),
};
return Promise.resolve(ret);
}
/**
* Required by VectorStore interface. Currently ignored.
* @param persistPath
* @param fs
* @returns Resolved Promise.
*/
persist(
persistPath: string,
fs?: GenericFileSystem | undefined,
): Promise<void> {
return Promise.resolve();
}
toPineconeFilter(stdFilters?: MetadataFilters) {
return stdFilters?.filters?.reduce((carry: any, item: ExactMatchFilter) => {
carry[item.key] = item.value;
return carry;
}, {});
}
textFromResultRow(row: ScoredPineconeRecord<Metadata>): string {
return row.metadata?.text ?? "";
}
metaWithoutText(meta: Metadata): any {
return Object.keys(meta)
.filter((key) => key != "text")
.reduce((acc: any, key: string) => {
acc[key] = meta[key];
return acc;
}, {});
}
nodeToRecord(node: BaseNode<Metadata>) {
let id: any = node.id_.length ? node.id_ : null;
let meta: any = node.metadata || {};
meta.create_date = new Date();
meta.text = node.getContent(MetadataMode.EMBED);
return {
id: id,
values: node.getEmbedding(),
metadata: meta,
};
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment