From 01c184c6089fd37f8c8230c9730eaf5c43833b7b Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Fri, 9 Aug 2024 14:50:57 +0700
Subject: [PATCH] feat: add is_empty operator for filtering vector store
 (#1107)

---
 .changeset/small-oranges-develop.md             |  6 ++++++
 .../src/storage/vectorStore/PGVectorStore.ts    |  2 +-
 .../storage/vectorStore/SimpleVectorStore.ts    | 17 +++++++++++++++--
 .../llamaindex/src/storage/vectorStore/types.ts |  3 ++-
 .../llamaindex/src/storage/vectorStore/utils.ts |  4 ++--
 .../vectorStores/SimpleVectorStore.test.ts      | 12 ++++++++++++
 6 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100644 .changeset/small-oranges-develop.md

diff --git a/.changeset/small-oranges-develop.md b/.changeset/small-oranges-develop.md
new file mode 100644
index 000000000..95b2398a0
--- /dev/null
+++ b/.changeset/small-oranges-develop.md
@@ -0,0 +1,6 @@
+---
+"llamaindex": patch
+"@llamaindex/llamaindex-test": patch
+---
+
+Add is_empty operator for filtering vector store
diff --git a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
index 468f5966d..7b56fcfba 100644
--- a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
+++ b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts
@@ -273,7 +273,7 @@ export class PGVectorStore
       const paramIndex = params.length + 1;
       whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`);
       // TODO: support filter with other operators
-      if (!Array.isArray(filter.value)) {
+      if (!Array.isArray(filter.value) && filter.value) {
         params.push(filter.value);
       }
     });
diff --git a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts
index 7daac5b27..342a52966 100644
--- a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts
+++ b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts
@@ -36,7 +36,7 @@ type MetadataValue = Record<string, any>;
 
 // Mapping of filter operators to metadata filter functions
 const OPERATOR_TO_FILTER: {
-  [key in FilterOperator]: (
+  [key in FilterOperator]?: (
     { key, value }: MetadataFilter,
     metadata: MetadataValue,
   ) => boolean;
@@ -94,7 +94,20 @@ const buildFilterFn = (
   const queryCondition = condition || "and"; // default to and
 
   const itemFilterFn = (filter: MetadataFilter): boolean => {
-    if (metadata[filter.key] === undefined) return false; // always return false if the metadata key is not present
+    if (filter.operator === FilterOperator.IS_EMPTY) {
+      // for `is_empty` operator, return true if the metadata key is not present or the value is empty
+      const value = metadata[filter.key];
+      return (
+        value === undefined ||
+        value === null ||
+        value === "" ||
+        (Array.isArray(value) && value.length === 0)
+      );
+    }
+    if (metadata[filter.key] === undefined) {
+      // for other operators, always return false if the metadata key is not present
+      return false;
+    }
     const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
     if (!metadataLookupFn)
       throw new Error(`Unsupported operator: ${filter.operator}`);
diff --git a/packages/llamaindex/src/storage/vectorStore/types.ts b/packages/llamaindex/src/storage/vectorStore/types.ts
index c13378687..475ab67d0 100644
--- a/packages/llamaindex/src/storage/vectorStore/types.ts
+++ b/packages/llamaindex/src/storage/vectorStore/types.ts
@@ -33,6 +33,7 @@ export enum FilterOperator {
   ALL = "all", // Contains all (array of strings)
   TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
   CONTAINS = "contains", // metadata array contains value (string or number)
+  IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
 }
 
 export enum FilterCondition {
@@ -44,7 +45,7 @@ export type MetadataFilterValue = string | number | string[] | number[];
 
 export interface MetadataFilter {
   key: string;
-  value: MetadataFilterValue;
+  value?: MetadataFilterValue;
   operator: `${FilterOperator}`; // ==, any, all,...
 }
 
diff --git a/packages/llamaindex/src/storage/vectorStore/utils.ts b/packages/llamaindex/src/storage/vectorStore/utils.ts
index 5b27f2c56..0e85d4bbd 100644
--- a/packages/llamaindex/src/storage/vectorStore/utils.ts
+++ b/packages/llamaindex/src/storage/vectorStore/utils.ts
@@ -80,7 +80,7 @@ export function metadataDictToNode(
 }
 
 export const parsePrimitiveValue = (
-  value: MetadataFilterValue,
+  value?: MetadataFilterValue,
 ): string | number => {
   if (typeof value !== "number" && typeof value !== "string") {
     throw new Error("Value must be a string or number");
@@ -89,7 +89,7 @@ export const parsePrimitiveValue = (
 };
 
 export const parseArrayValue = (
-  value: MetadataFilterValue,
+  value?: MetadataFilterValue,
 ): string[] | number[] => {
   const isPrimitiveArray =
     Array.isArray(value) &&
diff --git a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts
index c27cef602..ce6890750 100644
--- a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts
+++ b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts
@@ -256,6 +256,18 @@ describe("SimpleVectorStore", () => {
         },
         expected: 1,
       },
+      {
+        title: "Filter IS_EMPTY",
+        filters: {
+          filters: [
+            {
+              key: "not-exist-key",
+              operator: "is_empty",
+            },
+          ],
+        },
+        expected: 3,
+      },
       {
         title: "Filter OR",
         filters: {
-- 
GitLab