Skip to content
Snippets Groups Projects
Unverified Commit 11ae9267 authored by Philipp Serrer's avatar Philipp Serrer Committed by GitHub
Browse files

feat: add numCandidates setting to MongoDBAtlasVectorStore for tuning queries (#893)

parent 174cb3e6
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
---
feat: add numCandidates setting to MongoDBAtlasVectorStore for tuning queries
......@@ -24,7 +24,10 @@ function toMongoDBFilter(
return filters;
}
// MongoDB Atlas Vector Store class implementing VectorStore
/**
* Vector store that uses MongoDB Atlas for storage and vector search.
* This store uses the $vectorSearch aggregation stage to perform vector similarity search.
*/
export class MongoDBAtlasVectorSearch
extends VectorStoreBase
implements VectorStoreNoEmbedModel
......@@ -32,13 +35,63 @@ export class MongoDBAtlasVectorSearch
storesText: boolean = true;
flatMetadata: boolean = true;
/**
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
*/
mongodbClient: MongoClient;
/**
* Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results.
*
* Default: "default"
*/
indexName: string;
/**
* Name of the key containing the embedding vector.
*
* Default: "embedding"
*/
embeddingKey: string;
/**
* Name of the key containing the node id.
*
* Default: "id"
*/
idKey: string;
/**
* Name of the key containing the node text.
*
* Default: "text"
*/
textKey: string;
/**
* Name of the key containing the node metadata.
*
* Default: "metadata"
*/
metadataKey: string;
/**
* Options to pass to the insertMany function when adding nodes.
*/
insertOptions?: BulkWriteOptions;
/**
* Function to determine the number of candidates to retrieve for a given query.
* In case your results are not good, you might tune this value.
*
* {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/|Run Vector Search Queries}
*
* {@link https://arxiv.org/abs/1603.09320|Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs}
*
*
* Default: query.similarityTopK * 10
*/
numCandidates: (query: VectorStoreQuery) => number;
private collection: Collection;
constructor(
......@@ -69,9 +122,17 @@ export class MongoDBAtlasVectorSearch
this.idKey = init.idKey ?? "id";
this.textKey = init.textKey ?? "text";
this.metadataKey = init.metadataKey ?? "metadata";
this.numCandidates =
init.numCandidates ?? ((query) => query.similarityTopK * 10);
this.insertOptions = init.insertOptions;
}
/**
* Add nodes to the vector store.
*
* @param nodes Nodes to add to the vector store
* @returns List of node ids that were added
*/
async add(nodes: BaseNode[]): Promise<string[]> {
if (!nodes || nodes.length === 0) {
return [];
......@@ -101,6 +162,12 @@ export class MongoDBAtlasVectorSearch
return nodes.map((node) => node.id_);
}
/**
* Delete nodes from the vector store with the given redDocId.
*
* @param refDocId The refDocId of the nodes to delete
* @param deleteOptions Options to pass to the deleteOne function
*/
async delete(refDocId: string, deleteOptions?: any): Promise<void> {
await this.collection.deleteMany(
{
......@@ -114,6 +181,12 @@ export class MongoDBAtlasVectorSearch
return this.mongodbClient;
}
/**
* Perform a vector similarity search query.
*
* @param query The query to run
* @returns List of nodes and their similarities
*/
async query(
query: VectorStoreQuery,
options?: any,
......@@ -121,7 +194,7 @@ export class MongoDBAtlasVectorSearch
const params: any = {
queryVector: query.queryEmbedding,
path: this.embeddingKey,
numCandidates: query.similarityTopK * 10,
numCandidates: this.numCandidates(query),
limit: query.similarityTopK,
index: this.indexName,
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment