diff --git a/.changeset/yellow-birds-protect.md b/.changeset/yellow-birds-protect.md new file mode 100644 index 0000000000000000000000000000000000000000..0406205a0b04103ebc55bde65b8f200da072dd2f --- /dev/null +++ b/.changeset/yellow-birds-protect.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: add metadata filters for Qdrant vector store diff --git a/examples/qdrantdb/preFilters.ts b/examples/qdrantdb/preFilters.ts index 2047ea517d170d44d3f52a15e116c70cfdecbed6..bfd59da2eac987403f29346b2d32697113ffc010 100644 --- a/examples/qdrantdb/preFilters.ts +++ b/examples/qdrantdb/preFilters.ts @@ -39,6 +39,12 @@ async function main() { dogId: "2", }, }), + new Document({ + text: "The dog is black", + metadata: { + dogId: "3", + }, + }), ]; console.log("Creating QdrantDB vector store"); const qdrantVs = new QdrantVectorStore({ url: qdrantUrl, collectionName }); @@ -73,6 +79,42 @@ async function main() { query: "What is the color of the dog?", }); console.log("Filter with dogId 2 response:", response.toString()); + + console.log("Querying index with dogId !=2: Expected output: Not red"); + const queryEngineNotDogId2 = index.asQueryEngine({ + preFilters: { + filters: [ + { + key: "dogId", + value: "2", + operator: "!=", + }, + ], + }, + }); + const responseNotDogId2 = await queryEngineNotDogId2.query({ + query: "What is the color of the dog?", + }); + console.log(responseNotDogId2.toString()); + + console.log( + "Querying index with dogId 2 or 3: Expected output: Red, Black", + ); + const queryEngineIn = index.asQueryEngine({ + preFilters: { + filters: [ + { + key: "dogId", + value: ["2", "3"], + operator: "in", + }, + ], + }, + }); + const responseIn = await queryEngineIn.query({ + query: "List all dogs", + }); + console.log(responseIn.toString()); } catch (e) { console.error(e); } diff --git a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts index 20612481b2b5123b0452e87ecf6aa858ee206033..4204ae6197d2cf79a6b4ff6190ccc9a3d7158a39 100644 --- a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts +++ b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts @@ -1,16 +1,22 @@ import type { BaseNode } from "@llamaindex/core/schema"; import { + FilterCondition, + FilterOperator, VectorStoreBase, type IEmbedModel, + type MetadataFilters, type VectorStoreNoEmbedModel, type VectorStoreQuery, type VectorStoreQueryResult, } from "./types.js"; -import type { QdrantClientParams } from "@qdrant/js-client-rest"; +import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest"; import { QdrantClient } from "@qdrant/js-client-rest"; import { metadataDictToNode, nodeToMetadata } from "./utils.js"; +type QdrantFilter = Schemas["Filter"]; +type QdrantMustConditions = QdrantFilter["must"]; + type PointStruct = { id: string; payload: Record<string, string>; @@ -272,7 +278,7 @@ export class QdrantVectorStore ): Promise<VectorStoreQueryResult> { const qdrantFilters = options?.qdrant_filters; - let queryFilters; + let queryFilters: QdrantFilter | undefined; if (!query.queryEmbedding) { throw new Error("No query embedding provided"); @@ -281,7 +287,7 @@ export class QdrantVectorStore if (qdrantFilters) { queryFilters = qdrantFilters; } else { - queryFilters = await this.buildQueryFilter(query); + queryFilters = buildQueryFilter(query); } const result = (await this.db.search(this.collectionName, { @@ -292,58 +298,118 @@ export class QdrantVectorStore return this.parseToQueryResult(result); } +} - /** - * Qdrant filter builder - * @param query The VectorStoreQuery to be used - */ - private async buildQueryFilter(query: VectorStoreQuery) { - if (!query.docIds && !query.queryStr && !query.filters) { - return null; - } - - const mustConditions = []; - - if (query.docIds) { - mustConditions.push({ - key: "doc_id", - match: { - any: query.docIds, - }, - }); - } - - if (!query.filters) { - return { - must: mustConditions, - }; - } +/** + * Qdrant filter builder + * @param query The VectorStoreQuery to be used + */ +function buildQueryFilter(query: VectorStoreQuery): QdrantFilter | undefined { + if (!query.docIds && !query.queryStr && !query.filters) return undefined; + + const mustConditions: QdrantMustConditions = []; + if (query.docIds) { + mustConditions.push({ + key: "doc_id", + match: { any: query.docIds }, + }); + } - const metadataFilters = query.filters.filters; + const metadataFilters = toQdrantMetadataFilters(query.filters); + if (metadataFilters) { + mustConditions.push(metadataFilters); + } - for (let i = 0; i < metadataFilters.length; i++) { - const filter = metadataFilters[i]!; + return { must: mustConditions }; +} - if (typeof filter.key === "number") { - mustConditions.push({ - key: filter.key, - match: { - gt: filter.value, - lt: filter.value, +/** + * Converts metadata filters to Qdrant-compatible filters + * @param subFilters The metadata filters to be converted + * @returns A QdrantFilter object or undefined if no valid filters are provided + */ +function toQdrantMetadataFilters( + subFilters?: MetadataFilters, +): QdrantFilter | undefined { + if (!subFilters?.filters.length) return undefined; + + const conditions: QdrantMustConditions = []; + + for (const subfilter of subFilters.filters) { + if (subfilter.operator === FilterOperator.EQ) { + if (typeof subfilter.value === "number") { + conditions.push({ + key: subfilter.key, + range: { + gte: subfilter.value, + lte: subfilter.value, }, }); } else { - mustConditions.push({ - key: filter.key, - match: { - value: filter.value, - }, + conditions.push({ + key: subfilter.key, + match: { value: subfilter.value }, }); } + } else if (subfilter.operator === FilterOperator.LT) { + conditions.push({ + key: subfilter.key, + range: { lt: subfilter.value }, + }); + } else if (subfilter.operator === FilterOperator.GT) { + conditions.push({ + key: subfilter.key, + range: { gt: subfilter.value }, + }); + } else if (subfilter.operator === FilterOperator.GTE) { + conditions.push({ + key: subfilter.key, + range: { gte: subfilter.value }, + }); + } else if (subfilter.operator === FilterOperator.LTE) { + conditions.push({ + key: subfilter.key, + range: { lte: subfilter.value }, + }); + } else if (subfilter.operator === FilterOperator.TEXT_MATCH) { + conditions.push({ + key: subfilter.key, + match: { text: subfilter.value }, + }); + } else if (subfilter.operator === FilterOperator.NE) { + conditions.push({ + key: subfilter.key, + match: { except: [subfilter.value] }, + }); + } else if (subfilter.operator === FilterOperator.IN) { + const values = Array.isArray(subfilter.value) + ? subfilter.value.map(String) + : String(subfilter.value).split(","); + conditions.push({ + key: subfilter.key, + match: { any: values }, + }); + } else if (subfilter.operator === FilterOperator.NIN) { + const values = Array.isArray(subfilter.value) + ? subfilter.value.map(String) + : String(subfilter.value).split(","); + conditions.push({ + key: subfilter.key, + match: { except: values }, + }); + } else if (subfilter.operator === FilterOperator.IS_EMPTY) { + conditions.push({ + is_empty: { key: subfilter.key }, + }); } + } - return { - must: mustConditions, - }; + const filter: QdrantFilter = {}; + if (subFilters.condition === FilterCondition.OR) { + filter.should = conditions; + } else { + filter.must = conditions; } + + return filter; }