Skip to content
Snippets Groups Projects
Unverified Commit 2411c9fb authored by Thuc Pham's avatar Thuc Pham Committed by GitHub
Browse files

feat: Auto-create index for MongoDB vector store (if not exists) (#1139)

parent be3e280f
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
---
Auto-create index for MongoDB vector store (if not exists)
......@@ -45,39 +45,6 @@ async function loadAndIndex() {
await client.close();
}
/**
* This method is document in https://www.mongodb.com/docs/atlas/atlas-search/create-index/#create-an-fts-index-programmatically
* But, while testing a 'CommandNotFound' error occurred, so we're not using this here.
*/
async function createSearchIndex() {
const client = new MongoClient(mongoUri);
const database = client.db(databaseName);
const collection = database.collection(vectorCollectionName);
// define your Atlas Search index
const index = {
name: indexName,
definition: {
/* search index definition fields */
mappings: {
dynamic: true,
fields: [
{
type: "vector",
path: "embedding",
numDimensions: 1536,
similarity: "cosine",
},
],
},
},
};
// run the helper method
const result = await collection.createSearchIndex(index);
console.log("Successfully created search index:", result);
await client.close();
}
loadAndIndex().catch(console.error);
// you can't query your index yet because you need to create a vector search index in mongodb's UI now
......@@ -21,7 +21,7 @@ async function query() {
const retriever = index.asRetriever({ similarityTopK: 20 });
const queryEngine = index.asQueryEngine({ retriever });
const result = await queryEngine.query({
query: "What does the author think of web frameworks?",
query: "What does author receive when he was 11 years old?", // Isaac Asimov's "Foundation" for Christmas
});
console.log(result.response);
await client.close();
......
......@@ -68,45 +68,6 @@ What you're doing here is creating a Reader which loads the data out of Mongo in
Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step.
### Create a vector search index
Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`.
![MongoDB Atlas collections](./docs/3_vectors_in_db.png)
Now it's time to create the vector search index so that you can query the data.
It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI.
To do so, first, click the 'Atlas Search' tab, and then click "Create Search Index":
![MongoDB Atlas create search index](./docs/4_search_tab.png)
We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index:
![MongoDB Atlas JSON editor](./docs/5_json_editor.png)
Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object:
```json
{
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 1536,
"similarity": "cosine"
}
]
}
```
This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely.
The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen:
![MongoDB Atlas index created](./docs/7_index_created.png)
Now you're ready to query your data!
### Run a test query
You can do this by running
......
......@@ -35,6 +35,10 @@ export class MongoDBAtlasVectorSearch
storesText: boolean = true;
flatMetadata: boolean = true;
dbName: string;
collectionName: string;
autoCreateIndex: boolean;
/**
* The used MongoClient. If not given, a new MongoClient is created based on the MONGODB_URI env variable.
*/
......@@ -92,13 +96,28 @@ export class MongoDBAtlasVectorSearch
* Default: query.similarityTopK * 10
*/
numCandidates: (query: VectorStoreQuery) => number;
private collection: Collection;
private collection?: Collection;
// define your Atlas Search index. See detail https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/
readonly SEARCH_INDEX_DEFINITION = {
mappings: {
dynamic: true,
fields: {
embedding: {
type: "knnVector",
dimensions: 1536,
similarity: "cosine",
},
},
},
};
constructor(
init: Partial<MongoDBAtlasVectorSearch> & {
dbName: string;
collectionName: string;
embedModel?: BaseEmbedding;
autoCreateIndex?: boolean;
},
) {
super(init.embedModel);
......@@ -114,9 +133,9 @@ export class MongoDBAtlasVectorSearch
this.mongodbClient = new MongoClient(mongoUri);
}
this.collection = this.mongodbClient
.db(init.dbName ?? "default_db")
.collection(init.collectionName ?? "default_collection");
this.dbName = init.dbName ?? "default_db";
this.collectionName = init.collectionName ?? "default_collection";
this.autoCreateIndex = init.autoCreateIndex ?? true;
this.indexName = init.indexName ?? "default";
this.embeddingKey = init.embeddingKey ?? "embedding";
this.idKey = init.idKey ?? "id";
......@@ -127,6 +146,32 @@ export class MongoDBAtlasVectorSearch
this.insertOptions = init.insertOptions;
}
async ensureCollection() {
if (!this.collection) {
const collection = await this.mongodbClient
.db(this.dbName)
.createCollection(this.collectionName);
this.collection = collection;
}
if (this.autoCreateIndex) {
const searchIndexes = await this.collection.listSearchIndexes().toArray();
const indexExists = searchIndexes.some(
(index) => index.name === this.indexName,
);
if (!indexExists) {
await this.collection.createSearchIndex({
name: this.indexName,
definition: this.SEARCH_INDEX_DEFINITION,
});
console.log("Created search index: ", this.indexName);
}
}
return this.collection;
}
/**
* Add nodes to the vector store.
*
......@@ -154,7 +199,8 @@ export class MongoDBAtlasVectorSearch
});
console.debug("Inserting data into MongoDB: ", dataToInsert);
const insertResult = await this.collection.insertMany(
const collection = await this.ensureCollection();
const insertResult = await collection.insertMany(
dataToInsert,
this.insertOptions,
);
......@@ -169,7 +215,8 @@ export class MongoDBAtlasVectorSearch
* @param deleteOptions Options to pass to the deleteOne function
*/
async delete(refDocId: string, deleteOptions?: any): Promise<void> {
await this.collection.deleteMany(
const collection = await this.ensureCollection();
await collection.deleteMany(
{
[`${this.metadataKey}.ref_doc_id`]: refDocId,
},
......@@ -215,7 +262,8 @@ export class MongoDBAtlasVectorSearch
];
console.debug("Running query pipeline: ", pipeline);
const cursor = await this.collection.aggregate(pipeline);
const collection = await this.ensureCollection();
const cursor = await collection.aggregate(pipeline);
const nodes: BaseNode[] = [];
const ids: string[] = [];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment