diff --git a/.changeset/tame-squids-clean.md b/.changeset/tame-squids-clean.md new file mode 100644 index 0000000000000000000000000000000000000000..b3263d6a7b056c888fb2789a7b31a3b32c295234 --- /dev/null +++ b/.changeset/tame-squids-clean.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +fix: preFilters does not work with asQueryEngine diff --git a/examples/metadata-filter/preFilters.ts b/examples/metadata-filter/preFilters.ts new file mode 100644 index 0000000000000000000000000000000000000000..9ccf4bc8c83478229e79604f4e248632ad585087 --- /dev/null +++ b/examples/metadata-filter/preFilters.ts @@ -0,0 +1,51 @@ +import { + Document, + MetadataFilters, + Settings, + SimpleDocumentStore, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; + +async function getDataSource() { + const docs = [ + new Document({ text: "The dog is brown", metadata: { dogId: "1" } }), + new Document({ text: "The dog is yellow", metadata: { dogId: "2" } }), + ]; + const storageContext = await storageContextFromDefaults({ + persistDir: "./cache", + }); + const numberOfDocs = Object.keys( + (storageContext.docStore as SimpleDocumentStore).toDict(), + ).length; + if (numberOfDocs === 0) { + return await VectorStoreIndex.fromDocuments(docs, { storageContext }); + } + return await VectorStoreIndex.init({ + storageContext, + }); +} + +Settings.callbackManager.on("retrieve-end", (event) => { + const { nodes, query } = event.detail; + console.log(`${query.query} - Number of retrieved nodes:`, nodes.length); +}); + +async function main() { + const index = await getDataSource(); + const filters: MetadataFilters = { + filters: [{ key: "dogId", value: "2", operator: "==" }], + }; + + const retriever = index.asRetriever({ similarityTopK: 3, filters }); + const queryEngine = index.asQueryEngine({ + similarityTopK: 3, + preFilters: filters, + }); + + console.log("Retriever and query engine should only retrieve 1 node:"); + await retriever.retrieve({ query: "Retriever: get dog" }); + await queryEngine.query({ query: "QueryEngine: get dog" }); +} + +void main(); diff --git a/examples/vectorIndexCustomize.ts b/examples/vectorIndexCustomize.ts index cf29d97efc0eb833e6b680dff8cd1fb1c23d5873..9f04c704864bbc61344830c8a7f7fdbfe9823502 100644 --- a/examples/vectorIndexCustomize.ts +++ b/examples/vectorIndexCustomize.ts @@ -25,12 +25,9 @@ async function main() { similarityCutoff: 0.7, }); // TODO: cannot pass responseSynthesizer into retriever query engine - const queryEngine = new RetrieverQueryEngine( - retriever, - undefined, - undefined, - [nodePostprocessor], - ); + const queryEngine = new RetrieverQueryEngine(retriever, undefined, [ + nodePostprocessor, + ]); const response = await queryEngine.query({ query: "What did the author do growing up?", diff --git a/examples/vectorIndexFromVectorStore.ts b/examples/vectorIndexFromVectorStore.ts index 042dab8551b7f1f636b88fdc96e72d78dcb05b5f..02da91cfaa9c47598e5748c0f5c5f4ac2c0c7f00 100644 --- a/examples/vectorIndexFromVectorStore.ts +++ b/examples/vectorIndexFromVectorStore.ts @@ -165,9 +165,7 @@ async function main() { }); const responseSynthesizer = getResponseSynthesizer("tree_summarize"); - return new RetrieverQueryEngine(retriever, responseSynthesizer, { - filter, - }); + return new RetrieverQueryEngine(retriever, responseSynthesizer); }; // whatever is a key from your metadata diff --git a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts index e3509b2968c26bdbc2a99a2c18c2401933a9c0ee..b71cf07dcb67701a26f0a05228858caa0d693c0f 100644 --- a/packages/llamaindex/src/cloud/LlamaCloudIndex.ts +++ b/packages/llamaindex/src/cloud/LlamaCloudIndex.ts @@ -308,7 +308,6 @@ export class LlamaCloudIndex { return new RetrieverQueryEngine( retriever, params?.responseSynthesizer, - params?.preFilters, params?.nodePostprocessors, ); } diff --git a/packages/llamaindex/src/engines/query/RetrieverQueryEngine.ts b/packages/llamaindex/src/engines/query/RetrieverQueryEngine.ts index ab1906e0751e1834c7e026cbfe335db1637671fa..2a3d105f2a3472bbd508506a02cce507735e046b 100644 --- a/packages/llamaindex/src/engines/query/RetrieverQueryEngine.ts +++ b/packages/llamaindex/src/engines/query/RetrieverQueryEngine.ts @@ -14,12 +14,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine { retriever: BaseRetriever; responseSynthesizer: BaseSynthesizer; nodePostprocessors: BaseNodePostprocessor[]; - preFilters?: unknown; constructor( retriever: BaseRetriever, responseSynthesizer?: BaseSynthesizer, - preFilters?: unknown, nodePostprocessors?: BaseNodePostprocessor[], ) { super(async (strOrQueryBundle, stream) => { @@ -52,7 +50,6 @@ export class RetrieverQueryEngine extends BaseQueryEngine { this.retriever = retriever; this.responseSynthesizer = responseSynthesizer || getResponseSynthesizer("compact"); - this.preFilters = preFilters; this.nodePostprocessors = nodePostprocessors || []; } diff --git a/packages/llamaindex/src/indices/keyword/index.ts b/packages/llamaindex/src/indices/keyword/index.ts index 911850616040b7bbdaf65814007606a4dee73a8c..369b52ed5e16fb10e4c9d42eae5edb26e5e3046c 100644 --- a/packages/llamaindex/src/indices/keyword/index.ts +++ b/packages/llamaindex/src/indices/keyword/index.ts @@ -246,7 +246,6 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> { return new RetrieverQueryEngine( retriever ?? this.asRetriever(), responseSynthesizer, - options?.preFilters, options?.nodePostprocessors, ); } diff --git a/packages/llamaindex/src/indices/summary/index.ts b/packages/llamaindex/src/indices/summary/index.ts index c449a129790835132175dfd92708b58a60049e63..a36b31978c068b9735de5c21c4180733385507d1 100644 --- a/packages/llamaindex/src/indices/summary/index.ts +++ b/packages/llamaindex/src/indices/summary/index.ts @@ -189,7 +189,6 @@ export class SummaryIndex extends BaseIndex<IndexList> { return new RetrieverQueryEngine( retriever, responseSynthesizer, - options?.preFilters, options?.nodePostprocessors, ); } diff --git a/packages/llamaindex/src/indices/vectorStore/index.ts b/packages/llamaindex/src/indices/vectorStore/index.ts index d0bb647ef17867d435c8171c479d85e50bf2b8c0..e6a232b962fde2f008880f6bf55bb6bba2fbb25f 100644 --- a/packages/llamaindex/src/indices/vectorStore/index.ts +++ b/packages/llamaindex/src/indices/vectorStore/index.ts @@ -298,9 +298,8 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> { similarityTopK, } = options ?? {}; return new RetrieverQueryEngine( - retriever ?? this.asRetriever({ similarityTopK }), + retriever ?? this.asRetriever({ similarityTopK, filters: preFilters }), responseSynthesizer, - preFilters, nodePostprocessors, ); } @@ -387,7 +386,7 @@ export type VectorIndexRetrieverOptions = { index: VectorStoreIndex; similarityTopK?: number | undefined; topK?: TopKMap | undefined; - filters?: MetadataFilters; + filters?: MetadataFilters | undefined; }; export class VectorIndexRetriever extends BaseRetriever {