From 38a8be8d124e25a4c52a31a8c8b5cf8d4bc2bc09 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Thu, 12 Sep 2024 11:34:54 +0700
Subject: [PATCH] fix: filter in mongo vector store  (#269)

---
 .changeset/five-grapes-switch.md                  |  5 +++++
 .../vectordbs/typescript/mongo/generate.ts        | 15 +++++++++------
 .../vectordbs/typescript/mongo/index.ts           | 13 ++++++++++---
 .../vectordbs/typescript/mongo/shared.ts          |  2 ++
 templates/types/streaming/express/package.json    |  2 +-
 templates/types/streaming/nextjs/package.json     |  2 +-
 6 files changed, 28 insertions(+), 11 deletions(-)
 create mode 100644 .changeset/five-grapes-switch.md

diff --git a/.changeset/five-grapes-switch.md b/.changeset/five-grapes-switch.md
new file mode 100644
index 00000000..9b049b9d
--- /dev/null
+++ b/.changeset/five-grapes-switch.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+fix: filter in mongo vector store
diff --git a/templates/components/vectordbs/typescript/mongo/generate.ts b/templates/components/vectordbs/typescript/mongo/generate.ts
index 0f6f2258..73ff8592 100644
--- a/templates/components/vectordbs/typescript/mongo/generate.ts
+++ b/templates/components/vectordbs/typescript/mongo/generate.ts
@@ -1,14 +1,11 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
 import * as dotenv from "dotenv";
-import {
-  MongoDBAtlasVectorSearch,
-  VectorStoreIndex,
-  storageContextFromDefaults,
-} from "llamaindex";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
+import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorStore";
 import { MongoClient } from "mongodb";
 import { getDocuments } from "./loader";
 import { initSettings } from "./settings";
-import { checkRequiredEnvVars } from "./shared";
+import { checkRequiredEnvVars, POPULATED_METADATA_FIELDS } from "./shared";
 
 dotenv.config();
 
@@ -30,6 +27,12 @@ async function loadAndIndex() {
     dbName: databaseName,
     collectionName: vectorCollectionName, // this is where your embeddings will be stored
     indexName: indexName, // this is the name of the index you will need to create
+    indexedMetadataFields: POPULATED_METADATA_FIELDS,
+    embeddingDefinition: {
+      dimensions: process.env.EMBEDDING_DIM
+        ? parseInt(process.env.EMBEDDING_DIM)
+        : 1536,
+    },
   });
 
   // now create an index from all the Documents and store them in Atlas
diff --git a/templates/components/vectordbs/typescript/mongo/index.ts b/templates/components/vectordbs/typescript/mongo/index.ts
index 5aabb131..75c20fb6 100644
--- a/templates/components/vectordbs/typescript/mongo/index.ts
+++ b/templates/components/vectordbs/typescript/mongo/index.ts
@@ -1,16 +1,23 @@
 /* eslint-disable turbo/no-undeclared-env-vars */
-import { MongoDBAtlasVectorSearch, VectorStoreIndex } from "llamaindex";
+import { VectorStoreIndex } from "llamaindex";
+import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorStore";
 import { MongoClient } from "mongodb";
-import { checkRequiredEnvVars } from "./shared";
+import { checkRequiredEnvVars, POPULATED_METADATA_FIELDS } from "./shared";
 
 export async function getDataSource(params?: any) {
   checkRequiredEnvVars();
-  const client = new MongoClient(process.env.MONGO_URI!);
+  const client = new MongoClient(process.env.MONGODB_URI!);
   const store = new MongoDBAtlasVectorSearch({
     mongodbClient: client,
     dbName: process.env.MONGODB_DATABASE!,
     collectionName: process.env.MONGODB_VECTORS!,
     indexName: process.env.MONGODB_VECTOR_INDEX,
+    indexedMetadataFields: POPULATED_METADATA_FIELDS,
+    embeddingDefinition: {
+      dimensions: process.env.EMBEDDING_DIM
+        ? parseInt(process.env.EMBEDDING_DIM)
+        : 1536,
+    },
   });
 
   return await VectorStoreIndex.fromVectorStore(store);
diff --git a/templates/components/vectordbs/typescript/mongo/shared.ts b/templates/components/vectordbs/typescript/mongo/shared.ts
index c6b5f303..5b046f91 100644
--- a/templates/components/vectordbs/typescript/mongo/shared.ts
+++ b/templates/components/vectordbs/typescript/mongo/shared.ts
@@ -5,6 +5,8 @@ const REQUIRED_ENV_VARS = [
   "MONGODB_VECTOR_INDEX",
 ];
 
+export const POPULATED_METADATA_FIELDS = ["private", "doc_id"]; // for filtering in MongoDB VectorSearchIndex
+
 export function checkRequiredEnvVars() {
   const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
     return !process.env[envVar];
diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json
index 21e9edc9..569a6986 100644
--- a/templates/types/streaming/express/package.json
+++ b/templates/types/streaming/express/package.json
@@ -20,7 +20,7 @@
     "dotenv": "^16.3.1",
     "duck-duck-scrape": "^2.2.5",
     "express": "^4.18.2",
-    "llamaindex": "0.5.20",
+    "llamaindex": "0.5.24",
     "pdf2json": "3.0.5",
     "ajv": "^8.12.0",
     "@e2b/code-interpreter": "^0.0.5",
diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json
index a5e1feed..8d81b32d 100644
--- a/templates/types/streaming/nextjs/package.json
+++ b/templates/types/streaming/nextjs/package.json
@@ -25,7 +25,7 @@
     "duck-duck-scrape": "^2.2.5",
     "formdata-node": "^6.0.3",
     "got": "^14.4.1",
-    "llamaindex": "0.5.20",
+    "llamaindex": "0.5.24",
     "lucide-react": "^0.294.0",
     "next": "^14.2.4",
     "react": "^18.2.0",
-- 
GitLab