From 01c184c6089fd37f8c8230c9730eaf5c43833b7b Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 9 Aug 2024 14:50:57 +0700 Subject: [PATCH] feat: add is_empty operator for filtering vector store (#1107) --- .changeset/small-oranges-develop.md | 6 ++++++ .../src/storage/vectorStore/PGVectorStore.ts | 2 +- .../storage/vectorStore/SimpleVectorStore.ts | 17 +++++++++++++++-- .../llamaindex/src/storage/vectorStore/types.ts | 3 ++- .../llamaindex/src/storage/vectorStore/utils.ts | 4 ++-- .../vectorStores/SimpleVectorStore.test.ts | 12 ++++++++++++ 6 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 .changeset/small-oranges-develop.md diff --git a/.changeset/small-oranges-develop.md b/.changeset/small-oranges-develop.md new file mode 100644 index 000000000..95b2398a0 --- /dev/null +++ b/.changeset/small-oranges-develop.md @@ -0,0 +1,6 @@ +--- +"llamaindex": patch +"@llamaindex/llamaindex-test": patch +--- + +Add is_empty operator for filtering vector store diff --git a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts index 468f5966d..7b56fcfba 100644 --- a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts @@ -273,7 +273,7 @@ export class PGVectorStore const paramIndex = params.length + 1; whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`); // TODO: support filter with other operators - if (!Array.isArray(filter.value)) { + if (!Array.isArray(filter.value) && filter.value) { params.push(filter.value); } }); diff --git a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts index 7daac5b27..342a52966 100644 --- a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts @@ -36,7 +36,7 @@ type MetadataValue = Record<string, any>; // Mapping of filter operators to metadata filter functions const OPERATOR_TO_FILTER: { - [key in FilterOperator]: ( + [key in FilterOperator]?: ( { key, value }: MetadataFilter, metadata: MetadataValue, ) => boolean; @@ -94,7 +94,20 @@ const buildFilterFn = ( const queryCondition = condition || "and"; // default to and const itemFilterFn = (filter: MetadataFilter): boolean => { - if (metadata[filter.key] === undefined) return false; // always return false if the metadata key is not present + if (filter.operator === FilterOperator.IS_EMPTY) { + // for `is_empty` operator, return true if the metadata key is not present or the value is empty + const value = metadata[filter.key]; + return ( + value === undefined || + value === null || + value === "" || + (Array.isArray(value) && value.length === 0) + ); + } + if (metadata[filter.key] === undefined) { + // for other operators, always return false if the metadata key is not present + return false; + } const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator]; if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`); diff --git a/packages/llamaindex/src/storage/vectorStore/types.ts b/packages/llamaindex/src/storage/vectorStore/types.ts index c13378687..475ab67d0 100644 --- a/packages/llamaindex/src/storage/vectorStore/types.ts +++ b/packages/llamaindex/src/storage/vectorStore/types.ts @@ -33,6 +33,7 @@ export enum FilterOperator { ALL = "all", // Contains all (array of strings) TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field) CONTAINS = "contains", // metadata array contains value (string or number) + IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array) } export enum FilterCondition { @@ -44,7 +45,7 @@ export type MetadataFilterValue = string | number | string[] | number[]; export interface MetadataFilter { key: string; - value: MetadataFilterValue; + value?: MetadataFilterValue; operator: `${FilterOperator}`; // ==, any, all,... } diff --git a/packages/llamaindex/src/storage/vectorStore/utils.ts b/packages/llamaindex/src/storage/vectorStore/utils.ts index 5b27f2c56..0e85d4bbd 100644 --- a/packages/llamaindex/src/storage/vectorStore/utils.ts +++ b/packages/llamaindex/src/storage/vectorStore/utils.ts @@ -80,7 +80,7 @@ export function metadataDictToNode( } export const parsePrimitiveValue = ( - value: MetadataFilterValue, + value?: MetadataFilterValue, ): string | number => { if (typeof value !== "number" && typeof value !== "string") { throw new Error("Value must be a string or number"); @@ -89,7 +89,7 @@ export const parsePrimitiveValue = ( }; export const parseArrayValue = ( - value: MetadataFilterValue, + value?: MetadataFilterValue, ): string[] | number[] => { const isPrimitiveArray = Array.isArray(value) && diff --git a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts index c27cef602..ce6890750 100644 --- a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts +++ b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts @@ -256,6 +256,18 @@ describe("SimpleVectorStore", () => { }, expected: 1, }, + { + title: "Filter IS_EMPTY", + filters: { + filters: [ + { + key: "not-exist-key", + operator: "is_empty", + }, + ], + }, + expected: 3, + }, { title: "Filter OR", filters: { -- GitLab