From a2691ee163addbe0216a2722c22d3afdcc19b1ed Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:52:00 +0700 Subject: [PATCH] fix: always return false when key not exist in metadata (#1056) --- .../storage/vectorStore/MilvusVectorStore.ts | 3 +- .../storage/vectorStore/SimpleVectorStore.ts | 37 +++++++++---------- .../src/storage/vectorStore/utils.ts | 17 ++++----- .../vectorStores/SimpleVectorStore.test.ts | 13 +++++++ 4 files changed, 39 insertions(+), 31 deletions(-) diff --git a/packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts index 15c3fe6e9..46f9cd51d 100644 --- a/packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/MilvusVectorStore.ts @@ -20,7 +20,6 @@ import { metadataDictToNode, nodeToMetadata, parseArrayValue, - parseNumberValue, parsePrimitiveValue, } from "./utils.js"; @@ -60,7 +59,7 @@ function parseScalarFilters(scalarFilters: MetadataFilters): string { case ">": case ">=": { filters.push( - `metadata["${filter.key}"] ${filter.operator} ${parseNumberValue(filter.value)}`, + `metadata["${filter.key}"] ${filter.operator} ${parsePrimitiveValue(filter.value)}`, ); break; } diff --git a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts index 6d0847f3e..7daac5b27 100644 --- a/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/SimpleVectorStore.ts @@ -21,7 +21,6 @@ import { import { nodeToMetadata, parseArrayValue, - parseNumberValue, parsePrimitiveValue, } from "./utils.js"; @@ -43,46 +42,43 @@ const OPERATOR_TO_FILTER: { ) => boolean; } = { [FilterOperator.EQ]: ({ key, value }, metadata) => { - return parsePrimitiveValue(metadata[key]) === parsePrimitiveValue(value); + return metadata[key] === parsePrimitiveValue(value); }, [FilterOperator.NE]: ({ key, value }, metadata) => { - return parsePrimitiveValue(metadata[key]) !== parsePrimitiveValue(value); + return metadata[key] !== parsePrimitiveValue(value); }, [FilterOperator.IN]: ({ key, value }, metadata) => { - return parseArrayValue(value).includes(parsePrimitiveValue(metadata[key])); + return !!parseArrayValue(value).find((v) => metadata[key] === v); }, [FilterOperator.NIN]: ({ key, value }, metadata) => { - return !parseArrayValue(value).includes(parsePrimitiveValue(metadata[key])); + return !parseArrayValue(value).find((v) => metadata[key] === v); }, [FilterOperator.ANY]: ({ key, value }, metadata) => { - return parseArrayValue(value).some((v) => - parseArrayValue(metadata[key]).includes(v), - ); + if (!Array.isArray(metadata[key])) return false; + return parseArrayValue(value).some((v) => metadata[key].includes(v)); }, [FilterOperator.ALL]: ({ key, value }, metadata) => { - return parseArrayValue(value).every((v) => - parseArrayValue(metadata[key]).includes(v), - ); + if (!Array.isArray(metadata[key])) return false; + return parseArrayValue(value).every((v) => metadata[key].includes(v)); }, [FilterOperator.TEXT_MATCH]: ({ key, value }, metadata) => { - return parsePrimitiveValue(metadata[key]).includes( - parsePrimitiveValue(value), - ); + return metadata[key].includes(parsePrimitiveValue(value)); }, [FilterOperator.CONTAINS]: ({ key, value }, metadata) => { - return parseArrayValue(metadata[key]).includes(parsePrimitiveValue(value)); + if (!Array.isArray(metadata[key])) return false; + return !!parseArrayValue(metadata[key]).find((v) => v === value); }, [FilterOperator.GT]: ({ key, value }, metadata) => { - return parseNumberValue(metadata[key]) > parseNumberValue(value); + return metadata[key] > parsePrimitiveValue(value); }, [FilterOperator.LT]: ({ key, value }, metadata) => { - return parseNumberValue(metadata[key]) < parseNumberValue(value); + return metadata[key] < parsePrimitiveValue(value); }, [FilterOperator.GTE]: ({ key, value }, metadata) => { - return parseNumberValue(metadata[key]) >= parseNumberValue(value); + return metadata[key] >= parsePrimitiveValue(value); }, [FilterOperator.LTE]: ({ key, value }, metadata) => { - return parseNumberValue(metadata[key]) <= parseNumberValue(value); + return metadata[key] <= parsePrimitiveValue(value); }, }; @@ -97,7 +93,8 @@ const buildFilterFn = ( const { filters, condition } = preFilters; const queryCondition = condition || "and"; // default to and - const itemFilterFn = (filter: MetadataFilter) => { + const itemFilterFn = (filter: MetadataFilter): boolean => { + if (metadata[filter.key] === undefined) return false; // always return false if the metadata key is not present const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator]; if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`); diff --git a/packages/llamaindex/src/storage/vectorStore/utils.ts b/packages/llamaindex/src/storage/vectorStore/utils.ts index 8bdc39474..5b27f2c56 100644 --- a/packages/llamaindex/src/storage/vectorStore/utils.ts +++ b/packages/llamaindex/src/storage/vectorStore/utils.ts @@ -79,24 +79,23 @@ export function metadataDictToNode( } } -export const parseNumberValue = (value: MetadataFilterValue): number => { - if (typeof value !== "number") throw new Error("Value must be a number"); - return value; -}; - -export const parsePrimitiveValue = (value: MetadataFilterValue): string => { +export const parsePrimitiveValue = ( + value: MetadataFilterValue, +): string | number => { if (typeof value !== "number" && typeof value !== "string") { throw new Error("Value must be a string or number"); } - return value.toString(); + return value; }; -export const parseArrayValue = (value: MetadataFilterValue): string[] => { +export const parseArrayValue = ( + value: MetadataFilterValue, +): string[] | number[] => { const isPrimitiveArray = Array.isArray(value) && value.every((v) => typeof v === "string" || typeof v === "number"); if (!isPrimitiveArray) { throw new Error("Value must be an array of strings or numbers"); } - return value.map(String); + return value; }; diff --git a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts index 6e4fece1a..c27cef602 100644 --- a/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts +++ b/packages/llamaindex/tests/vectorStores/SimpleVectorStore.test.ts @@ -87,6 +87,19 @@ describe("SimpleVectorStore", () => { title: "No filter", expected: 3, }, + { + title: "Filter with non-exist key", + filters: { + filters: [ + { + key: "non-exist-key", + value: "cat", + operator: "==", + }, + ], + }, + expected: 0, + }, { title: "Filter EQ", filters: { -- GitLab