diff --git a/.changeset/chatty-parents-smell.md b/.changeset/chatty-parents-smell.md new file mode 100644 index 0000000000000000000000000000000000000000..0bb55c31e7ebee07862d0e3e0335f385c7095842 --- /dev/null +++ b/.changeset/chatty-parents-smell.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat(qdrant): Add Qdrant Vector DB diff --git a/packages/core/package.json b/packages/core/package.json index 76c7da7850799b938a84d3133729fe1f4dd06e52..64e10b30bac8dfc98b5a68ed8731e450b4eea41f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -8,6 +8,7 @@ "@mistralai/mistralai": "^0.0.7", "@notionhq/client": "^2.2.14", "@pinecone-database/pinecone": "^1.1.2", + "@qdrant/js-client-rest": "^1.7.0", "@xenova/transformers": "^2.10.0", "assemblyai": "^4.0.0", "chromadb": "^1.7.3", diff --git a/packages/core/src/storage/index.ts b/packages/core/src/storage/index.ts index 796f8fb9fd3e4f3032228a6335fd8366d855aaa1..ae372ead52586db82e40cc44bd6b5681e81ec72f 100644 --- a/packages/core/src/storage/index.ts +++ b/packages/core/src/storage/index.ts @@ -12,5 +12,6 @@ export { ChromaVectorStore } from "./vectorStore/ChromaVectorStore"; export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore"; export { PGVectorStore } from "./vectorStore/PGVectorStore"; export { PineconeVectorStore } from "./vectorStore/PineconeVectorStore"; +export { QdrantVectorStore } from "./vectorStore/QdrantVectorStore"; export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore"; export * from "./vectorStore/types"; diff --git a/packages/core/src/storage/vectorStore/QdrantVectorStore.ts b/packages/core/src/storage/vectorStore/QdrantVectorStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..d5764c5625ed242c64fc18c6a40c55d6469e0093 --- /dev/null +++ b/packages/core/src/storage/vectorStore/QdrantVectorStore.ts @@ -0,0 +1,339 @@ +import { BaseNode } from "../../Node"; +import { VectorStore, VectorStoreQuery, VectorStoreQueryResult } from "./types"; + +import { QdrantClient } from "@qdrant/js-client-rest"; +import { metadataDictToNode, nodeToMetadata } from "./utils"; + +type PointStruct = { + id: string; + payload: Record<string, string>; + vector: number[]; +}; + +type QdrantParams = { + collectionName?: string; + client?: QdrantClient; + url?: string; + apiKey?: string; + batchSize?: number; +}; + +type QuerySearchResult = { + id: string; + score: number; + payload: Record<string, unknown>; + vector: number[] | null; + version: number; +}; + +/** + * Qdrant vector store. + */ +export class QdrantVectorStore implements VectorStore { + storesText: boolean = true; + + db: QdrantClient; + + collectionName: string; + batchSize: number; + + private _collectionInitialized: boolean = false; + + /** + * Creates a new QdrantVectorStore. + * @param collectionName Qdrant collection name + * @param client Qdrant client + * @param url Qdrant URL + * @param apiKey Qdrant API key + * @param batchSize Number of vectors to upload in a single batch + */ + constructor({ + collectionName, + client, + url, + apiKey, + batchSize, + }: QdrantParams) { + if (!client && (!url || !apiKey)) { + if (!url || !apiKey || !collectionName) { + throw new Error( + "QdrantVectorStore requires url, apiKey and collectionName", + ); + } + } + + if (client) { + this.db = client; + } else { + this.db = new QdrantClient({ + url: url, + apiKey: apiKey, + }); + } + + this.collectionName = collectionName ?? "default"; + this.batchSize = batchSize ?? 100; + } + + /** + * Returns the Qdrant client. + * @returns Qdrant client + */ + client() { + return this.db; + } + + /** + * Creates a collection in Qdrant. + * @param collectionName Qdrant collection name + * @param vectorSize Dimensionality of the vectors + */ + async createCollection(collectionName: string, vectorSize: number) { + await this.db.createCollection(collectionName, { + vectors: { + size: vectorSize, + distance: "Cosine", + }, + }); + } + + /** + * Checks if the collection exists in Qdrant and creates it if not. + * @param collectionName Qdrant collection name + * @returns + */ + async collectionExists(collectionName: string): Promise<boolean> { + try { + await this.db.getCollection(collectionName); + return true; + } catch (e) { + return false; + } + } + + /** + * Initializes the collection in Qdrant. + * @param vectorSize Dimensionality of the vectors + */ + async initializeCollection(vectorSize: number) { + const exists = await this.collectionExists(this.collectionName); + if (!exists) { + await this.createCollection(this.collectionName, vectorSize); + } + this._collectionInitialized = true; + } + + /** + * Builds a list of points from the given nodes. + * @param nodes + * @returns + */ + async buildPoints(nodes: BaseNode[]): Promise<{ + points: PointStruct[]; + ids: string[]; + }> { + const points: PointStruct[] = []; + const ids = []; + + for (let i = 0; i < nodes.length; i++) { + const nodeIds = []; + const vectors = []; + const payloads = []; + + for (let j = 0; j < this.batchSize && i < nodes.length; j++, i++) { + const node = nodes[i]; + + nodeIds.push(node); + + vectors.push(node.getEmbedding()); + + const metadata = nodeToMetadata(node); + + payloads.push(metadata); + } + + for (let k = 0; k < nodeIds.length; k++) { + const point: PointStruct = { + id: nodeIds[k].id_, + payload: payloads[k], + vector: vectors[k], + }; + + points.push(point); + } + + ids.push(...nodeIds.map((node) => node.id_)); + } + + return { + points: points, + ids: ids, + }; + } + + /** + * Adds the given nodes to the vector store. + * @param embeddingResults List of nodes + * @returns List of node IDs + */ + async add(embeddingResults: BaseNode[]): Promise<string[]> { + if (embeddingResults.length > 0 && !this._collectionInitialized) { + await this.initializeCollection( + embeddingResults[0].getEmbedding().length, + ); + } + + const { points, ids } = await this.buildPoints(embeddingResults); + + const batchUpsert = async (points: PointStruct[]) => { + await this.db.upsert(this.collectionName, { + points: points, + }); + }; + + for (let i = 0; i < points.length; i += this.batchSize) { + const chunk = points.slice(i, i + this.batchSize); + await batchUpsert(chunk); + } + + return ids; + } + + /** + * Deletes the given nodes from the vector store. + * @param refDocId Node ID + */ + async delete(refDocId: string): Promise<void> { + const mustFilter = [ + { + key: "doc_id", + match: { + value: refDocId, + }, + }, + ]; + + await this.db.delete(this.collectionName, { + filter: { + must: mustFilter, + }, + }); + } + + /** + * Converts the result of a query to a VectorStoreQueryResult. + * @param response Query response + * @returns VectorStoreQueryResult + */ + private parseToQueryResult( + response: Array<QuerySearchResult>, + ): VectorStoreQueryResult { + const nodes = []; + const similarities = []; + const ids = []; + + for (let i = 0; i < response.length; i++) { + const item = response[i]; + const payload = item.payload; + + const node = metadataDictToNode(payload); + + ids.push(item.id); + nodes.push(node); + similarities.push(item.score); + } + + return { + nodes: nodes, + similarities: similarities, + ids: ids, + }; + } + + /** + * Queries the vector store for the closest matching data to the query embeddings. + * @param query The VectorStoreQuery to be used + * @param options Required by VectorStore interface. Currently ignored. + * @returns Zero or more Document instances with data from the vector store. + */ + async query( + query: VectorStoreQuery, + options?: any, + ): Promise<VectorStoreQueryResult> { + const qdrantFilters = options?.qdrant_filters; + + let queryFilters; + + if (!query.queryEmbedding) { + throw new Error("No query embedding provided"); + } + + if (qdrantFilters) { + queryFilters = qdrantFilters; + } else { + queryFilters = await this.buildQueryFilter(query); + } + + const result = (await this.db.search(this.collectionName, { + vector: query.queryEmbedding, + limit: query.similarityTopK, + ...(queryFilters && { filter: queryFilters }), + })) as Array<QuerySearchResult>; + + return this.parseToQueryResult(result); + } + + /** + * Qdrant filter builder + * @param query The VectorStoreQuery to be used + */ + private async buildQueryFilter(query: VectorStoreQuery) { + if (!query.docIds && !query.queryStr) { + return null; + } + + const mustConditions = []; + + if (query.docIds) { + mustConditions.push({ + key: "doc_id", + match: { + any: query.docIds, + }, + }); + } + + if (!query.filters) { + return { + must: mustConditions, + }; + } + + const metadataFilters = query.filters.filters; + + for (let i = 0; i < metadataFilters.length; i++) { + const filter = metadataFilters[i]; + + if (typeof filter.key === "number") { + mustConditions.push({ + key: filter.key, + match: { + gt: filter.value, + lt: filter.value, + }, + }); + } else { + mustConditions.push({ + key: filter.key, + match: { + value: filter.value, + }, + }); + } + } + + return { + must: mustConditions, + }; + } +} diff --git a/packages/core/src/tests/mocks/TestableQdrantVectorStore.ts b/packages/core/src/tests/mocks/TestableQdrantVectorStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..c88ff3d5e331b8729e9643d82b8fb17864f4210c --- /dev/null +++ b/packages/core/src/tests/mocks/TestableQdrantVectorStore.ts @@ -0,0 +1,20 @@ +import { BaseNode } from "../../Node"; +import { QdrantVectorStore } from "../../storage"; + +export class TestableQdrantVectorStore extends QdrantVectorStore { + public nodes: BaseNode[] = []; + + public add(nodes: BaseNode[]): Promise<string[]> { + this.nodes.push(...nodes); + return super.add(nodes); + } + + public delete(refDocId: string): Promise<void> { + this.nodes = this.nodes.filter((node) => node.id_ !== refDocId); + return super.delete(refDocId); + } + + public getNodes(): BaseNode[] { + return this.nodes; + } +} diff --git a/packages/core/src/tests/vectorStores/QdrantVectorStore.test.ts b/packages/core/src/tests/vectorStores/QdrantVectorStore.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..01833e18380de93caf8eb96583c2ce8f32ecf868 --- /dev/null +++ b/packages/core/src/tests/vectorStores/QdrantVectorStore.test.ts @@ -0,0 +1,135 @@ +import { BaseNode, TextNode } from "../../Node"; + +import { QdrantClient } from "@qdrant/js-client-rest"; +import { VectorStoreQueryMode } from "../../storage"; +import { TestableQdrantVectorStore } from "../mocks/TestableQdrantVectorStore"; + +jest.mock("@qdrant/js-client-rest"); + +describe("QdrantVectorStore", () => { + let store: TestableQdrantVectorStore; + let mockQdrantClient: jest.Mocked<QdrantClient>; + + beforeEach(() => { + mockQdrantClient = new QdrantClient() as jest.Mocked<QdrantClient>; + store = new TestableQdrantVectorStore({ + client: mockQdrantClient, + collectionName: "testCollection", + url: "http://example.com", + apiKey: "testApiKey", + batchSize: 100, + }); + }); + + describe("[QdrantVectorStore] createCollection", () => { + it("should create a new collection", async () => { + mockQdrantClient.createCollection.mockResolvedValue(true); + + await store.createCollection("testCollection", 128); + + expect(mockQdrantClient.createCollection).toHaveBeenCalledWith( + "testCollection", + { + vectors: { + size: 128, + distance: "Cosine", + }, + }, + ); + }); + + describe("[QdrantVectorStore] add", () => { + it("should add nodes to the vector store", async () => { + // Mocking the dependent methods and Qdrant client responses + const mockInitializeCollection = jest + .spyOn(store, "initializeCollection") + .mockResolvedValue(); + + const mockBuildPoints = jest + .spyOn(store, "buildPoints") + .mockResolvedValue({ + points: [{ id: "1", payload: {}, vector: [0.1, 0.2] }], + ids: ["1"], + }); + + mockQdrantClient.upsert.mockResolvedValue({ + operation_id: 1, + status: "completed", + }); + + const nodes: BaseNode[] = [ + new TextNode({ + embedding: [0.1, 0.2], + metadata: { meta1: "Some metadata" }, + }), + ]; + + const ids = await store.add(nodes); + + expect(mockInitializeCollection).toHaveBeenCalledWith( + nodes[0].getEmbedding().length, + ); + expect(mockBuildPoints).toHaveBeenCalledWith(nodes); + expect(mockQdrantClient.upsert).toHaveBeenCalled(); + + expect(ids).toEqual(["1"]); + }); + }); + + describe("[QdrantVectorStore] delete", () => { + it("should delete from the vector store", async () => { + jest.spyOn(store, "initializeCollection").mockResolvedValue(); + + jest.spyOn(store, "buildPoints").mockResolvedValue({ + points: [{ id: "1", payload: {}, vector: [0.1, 0.2] }], + ids: ["1"], + }); + + mockQdrantClient.upsert.mockResolvedValue({ + operation_id: 1, + status: "completed", + }); + + const nodes: BaseNode[] = [ + new TextNode({ + id_: "1", + embedding: [0.1, 0.2], + metadata: { meta1: "Some metadata" }, + }), + ]; + + await store.add(nodes); + + expect(store.getNodes()).toContain(nodes[0]); + + await store.delete("1"); + + expect(store.getNodes()).not.toContain(nodes[0]); + expect(mockQdrantClient.upsert).toHaveBeenCalled(); + }); + }); + + describe("[QdrantVectorStore] search", () => { + it("should search in the vector store", async () => { + mockQdrantClient.search.mockResolvedValue([ + { + id: "1", + score: 0.1, + version: 1, + payload: { _node_content: JSON.stringify({ text: "hello world" }) }, + }, + ]); + + const searchResult = await store.query({ + queryEmbedding: [0.1, 0.2], + similarityTopK: 1, + mode: VectorStoreQueryMode.DEFAULT, + }); + + expect(mockQdrantClient.search).toHaveBeenCalled(); + expect(searchResult.ids).toEqual(["1"]); + expect(searchResult.similarities).toEqual([0.1]); + }); + }); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 699849d9fa4ea9beea0750c17e3eb3f21879d34e..51576f527b8c807474508655b373a86819fa979d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -164,6 +164,9 @@ importers: '@pinecone-database/pinecone': specifier: ^1.1.2 version: 1.1.2 + '@qdrant/js-client-rest': + specifier: ^1.7.0 + version: 1.7.0(typescript@5.3.3) '@xenova/transformers': specifier: ^2.10.0 version: 2.10.0 @@ -3164,6 +3167,11 @@ packages: resolution: {integrity: sha512-gMsVel9D7f2HLkBma9VbtzZRehRogVRfbr++f06nL2vnCGCNlzOD+/MUov/F4p8myyAHspEhVobgjpX64q5m6A==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} + /@fastify/busboy@2.1.0: + resolution: {integrity: sha512-+KpH+QxZU7O4675t3mnkQKcZZg56u+K/Ct2K+N2AZYNVK8kyeo/bI18tI8aPm3tvNNRyTWfj6s5tnGNlcbQRsA==} + engines: {node: '>=14'} + dev: false + /@fastify/deepmerge@1.3.0: resolution: {integrity: sha512-J8TOSBq3SoZbDhM9+R/u77hP93gz/rajSA+K2kGyijPpORPWUXHUpTaleoj+92As0S9uPRP7Oi8IqMf0u+ro6A==} dev: true @@ -3769,6 +3777,23 @@ packages: resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} dev: false + /@qdrant/js-client-rest@1.7.0(typescript@5.3.3): + resolution: {integrity: sha512-16O0EQfrrybcPVipodxykr6dMUlBzKW7a63cSDUFVgc5a1AWESwERykwjuvW5KqvKdkPcxZ2NssrvgUO1W3MgA==} + engines: {node: '>=18.0.0', pnpm: '>=8'} + peerDependencies: + typescript: '>=4.1' + dependencies: + '@qdrant/openapi-typescript-fetch': 1.2.1 + '@sevinf/maybe': 0.5.0 + typescript: 5.3.3 + undici: 5.28.2 + dev: false + + /@qdrant/openapi-typescript-fetch@1.2.1: + resolution: {integrity: sha512-oiBJRN1ME7orFZocgE25jrM3knIF/OKJfMsZPBbtMMKfgNVYfps0MokGvSJkBmecj6bf8QoLXWIGlIoaTM4Zmw==} + engines: {node: '>=12.0.0', pnpm: '>=8'} + dev: false + /@rollup/plugin-commonjs@25.0.7(rollup@4.9.5): resolution: {integrity: sha512-nEvcR+LRjEjsaSsc4x3XZfCCvZIaSMenZu/OiwOKGN2UhQpAYI7ru7czFvyWbErlpoGjnSX3D5Ch5FcMA3kRWQ==} engines: {node: '>=14.0.0'} @@ -3981,6 +4006,10 @@ packages: resolution: {integrity: sha512-sXo/qW2/pAcmT43VoRKOJbDOfV3cYpq3szSVfIThQXNt+E4DfKj361vaAt3c88U5tPUxzEswam7GW48PJqtKAg==} dev: false + /@sevinf/maybe@0.5.0: + resolution: {integrity: sha512-ARhyoYDnY1LES3vYI0fiG6e9esWfTNcXcO6+MPJJXcnyMV3bim4lnFt45VXouV7y82F4x3YH8nOQ6VztuvUiWg==} + dev: false + /@sideway/address@4.1.4: resolution: {integrity: sha512-7vwq+rOHVWjyXxVlR76Agnvhy8I9rpzjosTESvmhNeXOXdZZB15Fl+TI9x1SiHZH5Jv2wTGduSxFDIaq0m3DUw==} dependencies: @@ -15859,6 +15888,13 @@ packages: /undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + /undici@5.28.2: + resolution: {integrity: sha512-wh1pHJHnUeQV5Xa8/kyQhO7WFa8M34l026L5P/+2TYiakvGy5Rdc8jWZVyG7ieht/0WgJLEd3kcU5gKx+6GC8w==} + engines: {node: '>=14.0'} + dependencies: + '@fastify/busboy': 2.1.0 + dev: false + /unherit@1.1.3: resolution: {integrity: sha512-Ft16BJcnapDKp0+J/rqFC3Rrk6Y/Ng4nzsC028k2jdDII/rdZ7Wd3pPT/6+vIIxRagwRc9K0IUX0Ra4fKvw+WQ==} dependencies: