diff --git a/.changeset/angry-eagles-dress.md b/.changeset/angry-eagles-dress.md new file mode 100644 index 0000000000000000000000000000000000000000..cb24c3faa2bd917f6e43306761c8c13bb3c6a6f1 --- /dev/null +++ b/.changeset/angry-eagles-dress.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: add numCandidates setting to MongoDBAtlasVectorStore for tuning queries diff --git a/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 3ba30595c24171be18cbd74fbac80c8a3f2215ed..88401cab42be32cb41d44a19817bd107fff335ed 100644 --- a/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -24,7 +24,10 @@ function toMongoDBFilter( return filters; } -// MongoDB Atlas Vector Store class implementing VectorStore +/** + * Vector store that uses MongoDB Atlas for storage and vector search. + * This store uses the $vectorSearch aggregation stage to perform vector similarity search. + */ export class MongoDBAtlasVectorSearch extends VectorStoreBase implements VectorStoreNoEmbedModel @@ -32,13 +35,63 @@ export class MongoDBAtlasVectorSearch storesText: boolean = true; flatMetadata: boolean = true; + /** + * The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable. + */ mongodbClient: MongoClient; + + /** + * Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results. + * + * Default: "default" + */ indexName: string; + + /** + * Name of the key containing the embedding vector. + * + * Default: "embedding" + */ embeddingKey: string; + + /** + * Name of the key containing the node id. + * + * Default: "id" + */ idKey: string; + + /** + * Name of the key containing the node text. + * + * Default: "text" + */ textKey: string; + + /** + * Name of the key containing the node metadata. + * + * Default: "metadata" + */ metadataKey: string; + + /** + * Options to pass to the insertMany function when adding nodes. + */ insertOptions?: BulkWriteOptions; + + /** + * Function to determine the number of candidates to retrieve for a given query. + * In case your results are not good, you might tune this value. + * + * {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/|Run Vector Search Queries} + * + * {@link https://arxiv.org/abs/1603.09320|Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs} + * + * + * Default: query.similarityTopK * 10 + */ + numCandidates: (query: VectorStoreQuery) => number; private collection: Collection; constructor( @@ -69,9 +122,17 @@ export class MongoDBAtlasVectorSearch this.idKey = init.idKey ?? "id"; this.textKey = init.textKey ?? "text"; this.metadataKey = init.metadataKey ?? "metadata"; + this.numCandidates = + init.numCandidates ?? ((query) => query.similarityTopK * 10); this.insertOptions = init.insertOptions; } + /** + * Add nodes to the vector store. + * + * @param nodes Nodes to add to the vector store + * @returns List of node ids that were added + */ async add(nodes: BaseNode[]): Promise<string[]> { if (!nodes || nodes.length === 0) { return []; @@ -101,6 +162,12 @@ export class MongoDBAtlasVectorSearch return nodes.map((node) => node.id_); } + /** + * Delete nodes from the vector store with the given redDocId. + * + * @param refDocId The refDocId of the nodes to delete + * @param deleteOptions Options to pass to the deleteOne function + */ async delete(refDocId: string, deleteOptions?: any): Promise<void> { await this.collection.deleteMany( { @@ -114,6 +181,12 @@ export class MongoDBAtlasVectorSearch return this.mongodbClient; } + /** + * Perform a vector similarity search query. + * + * @param query The query to run + * @returns List of nodes and their similarities + */ async query( query: VectorStoreQuery, options?: any, @@ -121,7 +194,7 @@ export class MongoDBAtlasVectorSearch const params: any = { queryVector: query.queryEmbedding, path: this.embeddingKey, - numCandidates: query.similarityTopK * 10, + numCandidates: this.numCandidates(query), limit: query.similarityTopK, index: this.indexName, };