diff --git a/.changeset/funny-ants-do.md b/.changeset/funny-ants-do.md new file mode 100644 index 0000000000000000000000000000000000000000..bd1977e7c7169941f7371148f7c877b0fe85d059 --- /dev/null +++ b/.changeset/funny-ants-do.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Auto-create index for MongoDB vector store (if not exists) diff --git a/examples/mongodb/2_load_and_index.ts b/examples/mongodb/2_load_and_index.ts index cb71340811e5915be9f523c5c15f569d96125f78..d20d0279de214596b66ac7d5e28bc616281114e7 100644 --- a/examples/mongodb/2_load_and_index.ts +++ b/examples/mongodb/2_load_and_index.ts @@ -45,39 +45,6 @@ async function loadAndIndex() { await client.close(); } -/** - * This method is document in https://www.mongodb.com/docs/atlas/atlas-search/create-index/#create-an-fts-index-programmatically - * But, while testing a 'CommandNotFound' error occurred, so we're not using this here. - */ -async function createSearchIndex() { - const client = new MongoClient(mongoUri); - const database = client.db(databaseName); - const collection = database.collection(vectorCollectionName); - - // define your Atlas Search index - const index = { - name: indexName, - definition: { - /* search index definition fields */ - mappings: { - dynamic: true, - fields: [ - { - type: "vector", - path: "embedding", - numDimensions: 1536, - similarity: "cosine", - }, - ], - }, - }, - }; - // run the helper method - const result = await collection.createSearchIndex(index); - console.log("Successfully created search index:", result); - await client.close(); -} - loadAndIndex().catch(console.error); // you can't query your index yet because you need to create a vector search index in mongodb's UI now diff --git a/examples/mongodb/3_query.ts b/examples/mongodb/3_query.ts index d8158dc496796f98e35a06004d81b3421183a009..1064b0036a8d9f3a68592db639f76996b5a57288 100644 --- a/examples/mongodb/3_query.ts +++ b/examples/mongodb/3_query.ts @@ -21,7 +21,7 @@ async function query() { const retriever = index.asRetriever({ similarityTopK: 20 }); const queryEngine = index.asQueryEngine({ retriever }); const result = await queryEngine.query({ - query: "What does the author think of web frameworks?", + query: "What does author receive when he was 11 years old?", // Isaac Asimov's "Foundation" for Christmas }); console.log(result.response); await client.close(); diff --git a/examples/mongodb/README.md b/examples/mongodb/README.md index 1a478df99f8b765adbdd271458da4c0365eb1333..7498ee27525632a1a0c7593bb5f36f5143bc61a6 100644 --- a/examples/mongodb/README.md +++ b/examples/mongodb/README.md @@ -68,45 +68,6 @@ What you're doing here is creating a Reader which loads the data out of Mongo in Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step. -### Create a vector search index - -Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`. - - - -Now it's time to create the vector search index so that you can query the data. -It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI. -To do so, first, click the 'Atlas Search' tab, and then click "Create Search Index": - - - -We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index: - - - -Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object: - -```json -{ - "fields": [ - { - "type": "vector", - "path": "embedding", - "numDimensions": 1536, - "similarity": "cosine" - } - ] -} -``` - -This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely. - -The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen: - - - -Now you're ready to query your data! - ### Run a test query You can do this by running diff --git a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts index 241e202f46efd63702c452d8f6e5546ae43c846f..06005660a463b91916843e6cd3adac970e78db27 100644 --- a/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MongoDBAtlasVectorStore.ts @@ -35,6 +35,10 @@ export class MongoDBAtlasVectorSearch storesText: boolean = true; flatMetadata: boolean = true; + dbName: string; + collectionName: string; + autoCreateIndex: boolean; + /** * The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable. */ @@ -92,13 +96,28 @@ export class MongoDBAtlasVectorSearch * Default: query.similarityTopK * 10 */ numCandidates: (query: VectorStoreQuery) => number; - private collection: Collection; + private collection?: Collection; + + // define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/ + readonly SEARCH_INDEX_DEFINITION = { + mappings: { + dynamic: true, + fields: { + embedding: { + type: "knnVector", + dimensions: 1536, + similarity: "cosine", + }, + }, + }, + }; constructor( init: Partial<MongoDBAtlasVectorSearch> & { dbName: string; collectionName: string; embedModel?: BaseEmbedding; + autoCreateIndex?: boolean; }, ) { super(init.embedModel); @@ -114,9 +133,9 @@ export class MongoDBAtlasVectorSearch this.mongodbClient = new MongoClient(mongoUri); } - this.collection = this.mongodbClient - .db(init.dbName ?? "default_db") - .collection(init.collectionName ?? "default_collection"); + this.dbName = init.dbName ?? "default_db"; + this.collectionName = init.collectionName ?? "default_collection"; + this.autoCreateIndex = init.autoCreateIndex ?? true; this.indexName = init.indexName ?? "default"; this.embeddingKey = init.embeddingKey ?? "embedding"; this.idKey = init.idKey ?? "id"; @@ -127,6 +146,32 @@ export class MongoDBAtlasVectorSearch this.insertOptions = init.insertOptions; } + async ensureCollection() { + if (!this.collection) { + const collection = await this.mongodbClient + .db(this.dbName) + .createCollection(this.collectionName); + + this.collection = collection; + } + + if (this.autoCreateIndex) { + const searchIndexes = await this.collection.listSearchIndexes().toArray(); + const indexExists = searchIndexes.some( + (index) => index.name === this.indexName, + ); + if (!indexExists) { + await this.collection.createSearchIndex({ + name: this.indexName, + definition: this.SEARCH_INDEX_DEFINITION, + }); + console.log("Created search index: ", this.indexName); + } + } + + return this.collection; + } + /** * Add nodes to the vector store. * @@ -154,7 +199,8 @@ export class MongoDBAtlasVectorSearch }); console.debug("Inserting data into MongoDB: ", dataToInsert); - const insertResult = await this.collection.insertMany( + const collection = await this.ensureCollection(); + const insertResult = await collection.insertMany( dataToInsert, this.insertOptions, ); @@ -169,7 +215,8 @@ export class MongoDBAtlasVectorSearch * @param deleteOptions Options to pass to the deleteOne function */ async delete(refDocId: string, deleteOptions?: any): Promise<void> { - await this.collection.deleteMany( + const collection = await this.ensureCollection(); + await collection.deleteMany( { [`${this.metadataKey}.ref_doc_id`]: refDocId, }, @@ -215,7 +262,8 @@ export class MongoDBAtlasVectorSearch ]; console.debug("Running query pipeline: ", pipeline); - const cursor = await this.collection.aggregate(pipeline); + const collection = await this.ensureCollection(); + const cursor = await collection.aggregate(pipeline); const nodes: BaseNode[] = []; const ids: string[] = [];