From 11ae9267ae40921e083186d949a4b1c721796bce Mon Sep 17 00:00:00 2001
From: Philipp Serrer <72248290+pserrer1@users.noreply.github.com>
Date: Tue, 4 Jun 2024 11:11:54 +0200
Subject: [PATCH] feat: add numCandidates setting to MongoDBAtlasVectorStore
 for tuning queries (#893)

Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
---
 .changeset/angry-eagles-dress.md              |  5 ++
 .../vectorStore/MongoDBAtlasVectorStore.ts    | 77 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 .changeset/angry-eagles-dress.md

diff --git a/.changeset/angry-eagles-dress.md b/.changeset/angry-eagles-dress.md
new file mode 100644
index 000000000..cb24c3faa
--- /dev/null
+++ b/.changeset/angry-eagles-dress.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+feat: add numCandidates setting to MongoDBAtlasVectorStore for tuning queries
diff --git a/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts
index 3ba30595c..88401cab4 100644
--- a/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts
+++ b/packages/core/src/storage/vectorStore/MongoDBAtlasVectorStore.ts
@@ -24,7 +24,10 @@ function toMongoDBFilter(
   return filters;
 }
 
-// MongoDB Atlas Vector Store class implementing VectorStore
+/**
+ * Vector store that uses MongoDB Atlas for storage and vector search.
+ * This store uses the $vectorSearch aggregation stage to perform vector similarity search.
+ */
 export class MongoDBAtlasVectorSearch
   extends VectorStoreBase
   implements VectorStoreNoEmbedModel
@@ -32,13 +35,63 @@ export class MongoDBAtlasVectorSearch
   storesText: boolean = true;
   flatMetadata: boolean = true;
 
+  /**
+   * The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
+   */
   mongodbClient: MongoClient;
+
+  /**
+   * Name of the vector index. If invalid, Mongo will silently ignore this issue and return 0 results.
+   *
+   * Default: "default"
+   */
   indexName: string;
+
+  /**
+   * Name of the key containing the embedding vector.
+   *
+   * Default: "embedding"
+   */
   embeddingKey: string;
+
+  /**
+   * Name of the key containing the node id.
+   *
+   * Default: "id"
+   */
   idKey: string;
+
+  /**
+   * Name of the key containing the node text.
+   *
+   * Default: "text"
+   */
   textKey: string;
+
+  /**
+   * Name of the key containing the node metadata.
+   *
+   * Default: "metadata"
+   */
   metadataKey: string;
+
+  /**
+   * Options to pass to the insertMany function when adding nodes.
+   */
   insertOptions?: BulkWriteOptions;
+
+  /**
+   * Function to determine the number of candidates to retrieve for a given query.
+   * In case your results are not good, you might tune this value.
+   *
+   * {@link https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/|Run Vector Search Queries}
+   *
+   * {@link https://arxiv.org/abs/1603.09320|Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs}
+   *
+   *
+   * Default: query.similarityTopK * 10
+   */
+  numCandidates: (query: VectorStoreQuery) => number;
   private collection: Collection;
 
   constructor(
@@ -69,9 +122,17 @@ export class MongoDBAtlasVectorSearch
     this.idKey = init.idKey ?? "id";
     this.textKey = init.textKey ?? "text";
     this.metadataKey = init.metadataKey ?? "metadata";
+    this.numCandidates =
+      init.numCandidates ?? ((query) => query.similarityTopK * 10);
     this.insertOptions = init.insertOptions;
   }
 
+  /**
+   * Add nodes to the vector store.
+   *
+   * @param nodes Nodes to add to the vector store
+   * @returns List of node ids that were added
+   */
   async add(nodes: BaseNode[]): Promise<string[]> {
     if (!nodes || nodes.length === 0) {
       return [];
@@ -101,6 +162,12 @@ export class MongoDBAtlasVectorSearch
     return nodes.map((node) => node.id_);
   }
 
+  /**
+   * Delete nodes from the vector store with the given redDocId.
+   *
+   * @param refDocId The refDocId of the nodes to delete
+   * @param deleteOptions Options to pass to the deleteOne function
+   */
   async delete(refDocId: string, deleteOptions?: any): Promise<void> {
     await this.collection.deleteMany(
       {
@@ -114,6 +181,12 @@ export class MongoDBAtlasVectorSearch
     return this.mongodbClient;
   }
 
+  /**
+   * Perform a vector similarity search query.
+   *
+   * @param query The query to run
+   * @returns List of nodes and their similarities
+   */
   async query(
     query: VectorStoreQuery,
     options?: any,
@@ -121,7 +194,7 @@ export class MongoDBAtlasVectorSearch
     const params: any = {
       queryVector: query.queryEmbedding,
       path: this.embeddingKey,
-      numCandidates: query.similarityTopK * 10,
+      numCandidates: this.numCandidates(query),
       limit: query.similarityTopK,
       index: this.indexName,
     };
-- 
GitLab