From d9d6c56ed5443f1a5b7dca678591fd64c74333cb Mon Sep 17 00:00:00 2001 From: Sebastian van Gerwen <40266360+sebvg@users.noreply.github.com> Date: Fri, 23 Aug 2024 17:40:39 +1000 Subject: [PATCH] pgvectorstore support new conditions and operations (#1131) Co-authored-by: Sebastian van Gerwen <svangerwen@invertigro.com> Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com> --- .changeset/gentle-sloths-walk.md | 5 + .../src/storage/vectorStore/PGVectorStore.ts | 138 +++++++++++++++++- .../src/storage/vectorStore/utils.ts | 4 + 3 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 .changeset/gentle-sloths-walk.md diff --git a/.changeset/gentle-sloths-walk.md b/.changeset/gentle-sloths-walk.md new file mode 100644 index 000000000..ebc314a45 --- /dev/null +++ b/.changeset/gentle-sloths-walk.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Add support for MetadataFilters for PostgreSQL diff --git a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts index 7b56fcfba..df32b3ddc 100644 --- a/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts +++ b/packages/llamaindex/src/storage/vectorStore/PGVectorStore.ts @@ -1,13 +1,19 @@ import type pg from "pg"; import { + FilterCondition, + FilterOperator, VectorStoreBase, type IEmbedModel, + type MetadataFilter, + type MetadataFilterValue, type VectorStoreNoEmbedModel, type VectorStoreQuery, type VectorStoreQueryResult, } from "./types.js"; +import { escapeLikeString } from "./utils.js"; + import type { BaseNode, Metadata } from "@llamaindex/core/schema"; import { Document, MetadataMode } from "@llamaindex/core/schema"; @@ -246,6 +252,120 @@ export class PGVectorStore return Promise.resolve(); } + private toPostgresCondition(condition: `${FilterCondition}`) { + if (condition === FilterCondition.AND) { + return "AND"; + } + if (condition === FilterCondition.OR) { + return "OR"; + } + // fallback to AND + else { + return "AND"; + } + } + + private toPostgresOperator(operator: `${FilterOperator}`) { + if (operator === FilterOperator.EQ) { + return "="; + } + if (operator === FilterOperator.GT) { + return ">"; + } + if (operator === FilterOperator.LT) { + return "<"; + } + if (operator === FilterOperator.NE) { + return "!="; + } + if (operator === FilterOperator.GTE) { + return ">="; + } + if (operator === FilterOperator.LTE) { + return "<="; + } + if (operator === FilterOperator.IN) { + return "= ANY"; + } + if (operator === FilterOperator.NIN) { + return "!= ANY"; + } + if (operator === FilterOperator.CONTAINS) { + return "@>"; + } + if (operator === FilterOperator.ANY) { + return "?|"; + } + if (operator === FilterOperator.ALL) { + return "?&"; + } + // fallback to "=" + return "="; + } + + private buildFilterClause( + filter: MetadataFilter, + paramIndex: number, + ): { + clause: string; + param: string | string[] | number | number[] | undefined; + } { + if ( + filter.operator === FilterOperator.IN || + filter.operator === FilterOperator.NIN + ) { + return { + clause: `metadata->>'${filter.key}' ${this.toPostgresOperator(filter.operator)}($${paramIndex})`, + param: filter.value, + }; + } + + if ( + filter.operator === FilterOperator.ALL || + filter.operator === FilterOperator.ANY + ) { + return { + clause: `metadata->'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}::text[]`, + param: filter.value, + }; + } + + if (filter.operator === FilterOperator.CONTAINS) { + return { + clause: `metadata->'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}::jsonb`, + param: JSON.stringify([filter.value]), + }; + } + + if (filter.operator === FilterOperator.IS_EMPTY) { + return { + clause: `(NOT (metadata ? '${filter.key}') OR metadata->>'${filter.key}' IS NULL OR metadata->>'${filter.key}' = '' OR metadata->'${filter.key}' = '[]'::jsonb)`, + param: undefined, + }; + } + + if (filter.operator === FilterOperator.TEXT_MATCH) { + const escapedValue = escapeLikeString(filter.value as string); + return { + clause: `metadata->>'${filter.key}' LIKE $${paramIndex}`, + param: `%${escapedValue}%`, + }; + } + + // if value is number, coerce metadata value to float + if (typeof filter.value === "number") { + return { + clause: `(metadata->>'${filter.key}')::float ${this.toPostgresOperator(filter.operator)} $${paramIndex}`, + param: filter.value, + }; + } + + return { + clause: `metadata->>'${filter.key}' ${this.toPostgresOperator(filter.operator)} $${paramIndex}`, + param: filter.value, + }; + } + /** * Query the vector store for the closest matching data to the query embeddings * @param query The VectorStoreQuery to be used @@ -265,19 +385,27 @@ export class PGVectorStore const max = query.similarityTopK ?? 2; const whereClauses = this.collection.length ? ["collection = $2"] : []; - const params: Array<string | number> = this.collection.length + const params: Array<MetadataFilterValue> = this.collection.length ? [embedding, this.collection] : [embedding]; + const filterClauses: string[] = []; query.filters?.filters.forEach((filter, index) => { const paramIndex = params.length + 1; - whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`); - // TODO: support filter with other operators - if (!Array.isArray(filter.value) && filter.value) { - params.push(filter.value); + const { clause, param } = this.buildFilterClause(filter, paramIndex); + filterClauses.push(clause); + if (param) { + params.push(param); } }); + if (filterClauses.length > 0) { + const condition = this.toPostgresCondition( + query.filters?.condition ?? FilterCondition.AND, + ); + whereClauses.push(`(${filterClauses.join(` ${condition} `)})`); + } + const where = whereClauses.length > 0 ? `WHERE ${whereClauses.join(" AND ")}` : ""; diff --git a/packages/llamaindex/src/storage/vectorStore/utils.ts b/packages/llamaindex/src/storage/vectorStore/utils.ts index d664c4f3d..da40eeb9b 100644 --- a/packages/llamaindex/src/storage/vectorStore/utils.ts +++ b/packages/llamaindex/src/storage/vectorStore/utils.ts @@ -104,3 +104,7 @@ export const parseNumberValue = (value?: MetadataFilterValue): number => { if (typeof value !== "number") throw new Error("Value must be a number"); return value; }; + +export const escapeLikeString = (value: string) => { + return value.replace(/[%_\\]/g, "\\$&"); +}; -- GitLab