diff --git a/.changeset/small-oranges-develop.md b/.changeset/small-oranges-develop.md new file mode 100644 index 0000000000000000000000000000000000000000..95b2398a038b79e6dcbda072aeb5caecc75a949a --- /dev/null +++ b/.changeset/small-oranges-develop.md @@ -0,0 +1,6 @@ +--- +"llamaindex": patch +"@llamaindex/llamaindex-test": patch +--- + +Add is_empty operator for filtering vector store diff --git a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts index 468f5966d068803b021ba95bbc7fd7b1334d7c28..7b56fcfba613772ad0d4ef1d638902bca918bc7e 100644 --- a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts @@ -273,7 +273,7 @@ export class PGVectorStore const paramIndex = params.length + 1; whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`); // TODO: support filter with other operators - if (!Array.isArray(filter.value)) { + if (!Array.isArray(filter.value) && filter.value) { params.push(filter.value); } }); diff --git a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts index 7daac5b276c6b0f1bf229c17fde1889a61ba2d25..342a52966b465afff43334ad95139a8cf81db7b0 100644 --- a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts @@ -36,7 +36,7 @@ type MetadataValue = Record<string, any>; // Mapping of filter operators to metadata filter functions const OPERATOR_TO_FILTER: { - [key in FilterOperator]: ( + [key in FilterOperator]?: ( { key, value }: MetadataFilter, metadata: MetadataValue, ) => boolean; @@ -94,7 +94,20 @@ const buildFilterFn = ( const queryCondition = condition || "and"; // default to and const itemFilterFn = (filter: MetadataFilter): boolean => { - if (metadata[filter.key] === undefined) return false; // always return false if the metadata key is not present + if (filter.operator === FilterOperator.IS_EMPTY) { + // for `is_empty` operator, return true if the metadata key is not present or the value is empty + const value = metadata[filter.key]; + return ( + value === undefined || + value === null || + value === "" || + (Array.isArray(value) && value.length === 0) + ); + } + if (metadata[filter.key] === undefined) { + // for other operators, always return false if the metadata key is not present + return false; + } const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator]; if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`); diff --git a/packages/llamaindex/src/storage/vectorStore/types.ts b/packages/llamaindex/src/storage/vectorStore/types.ts index c13378687a02d368751a6efe69e7e5e5cb8b484b..475ab67d077b31c3b1162989bccdd8664a4049f3 100644 --- a/packages/llamaindex/src/storage/vectorStore/types.ts +++ b/packages/llamaindex/src/storage/vectorStore/types.ts @@ -33,6 +33,7 @@ export enum FilterOperator { ALL = "all", // Contains all (array of strings) TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field) CONTAINS = "contains", // metadata array contains value (string or number) + IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array) } export enum FilterCondition { @@ -44,7 +45,7 @@ export type MetadataFilterValue = string | number | string[] | number[]; export interface MetadataFilter { key: string; - value: MetadataFilterValue; + value?: MetadataFilterValue; operator: `${FilterOperator}`; // ==, any, all,... } diff --git a/packages/llamaindex/src/storage/vectorStore/utils.ts b/packages/llamaindex/src/storage/vectorStore/utils.ts index 5b27f2c5644d2aec10fa175d030741f91944c2a2..0e85d4bbde276db012381f16560e034d09b90fd8 100644 --- a/packages/llamaindex/src/storage/vectorStore/utils.ts +++ b/packages/llamaindex/src/storage/vectorStore/utils.ts @@ -80,7 +80,7 @@ export function metadataDictToNode( } export const parsePrimitiveValue = ( - value: MetadataFilterValue, + value?: MetadataFilterValue, ): string | number => { if (typeof value !== "number" && typeof value !== "string") { throw new Error("Value must be a string or number"); @@ -89,7 +89,7 @@ export const parsePrimitiveValue = ( }; export const parseArrayValue = ( - value: MetadataFilterValue, + value?: MetadataFilterValue, ): string[] | number[] => { const isPrimitiveArray = Array.isArray(value) && diff --git a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts index c27cef602764fe877df4656a299d1b54c1d908b3..ce6890750ecc694fb44e532cea1c510823ba9390 100644 --- a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts +++ b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts @@ -256,6 +256,18 @@ describe("SimpleVectorStore", () => { }, expected: 1, }, + { + title: "Filter IS_EMPTY", + filters: { + filters: [ + { + key: "not-exist-key", + operator: "is_empty", + }, + ], + }, + expected: 3, + }, { title: "Filter OR", filters: {