From da5cfc42e590249199f47487fd5f28a131c7b98c Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Tue, 30 Jul 2024 17:19:32 +0200 Subject: [PATCH] fix: integrate with `create-llama` (#1088) Co-authored-by: Alex Yang <himself65@outlook.com> --- .changeset/soft-kiwis-protect.md | 5 +++++ .changeset/spotty-shoes-poke.md | 5 +++++ .../llamaindex/src/cloud/LlamaCloudRetriever.ts | 8 +++++--- .../src/engines/chat/ContextChatEngine.ts | 2 +- .../llamaindex/src/indices/vectorStore/index.ts | 16 ++++++++++++---- packages/llamaindex/src/readers/PDFReader.ts | 4 ++++ packages/llamaindex/src/readers/type.ts | 3 +-- 7 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 .changeset/soft-kiwis-protect.md create mode 100644 .changeset/spotty-shoes-poke.md diff --git a/.changeset/soft-kiwis-protect.md b/.changeset/soft-kiwis-protect.md new file mode 100644 index 000000000..3b6715807 --- /dev/null +++ b/.changeset/soft-kiwis-protect.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Add metadatafilter options to retriever constructors diff --git a/.changeset/spotty-shoes-poke.md b/.changeset/spotty-shoes-poke.md new file mode 100644 index 000000000..77fd17019 --- /dev/null +++ b/.changeset/spotty-shoes-poke.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Fix system prompt not used in ContextChatEngine diff --git a/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts b/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts index 74f704139..0f63fdf56 100644 --- a/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts +++ b/packages/llamaindex/src/cloud/LlamaCloudRetriever.ts @@ -15,8 +15,8 @@ import { initService } from "./utils.js"; export type CloudRetrieveParams = Omit< RetrievalParams, - "query" | "searchFilters" | "className" | "denseSimilarityTopK" -> & { similarityTopK?: number }; + "query" | "search_filters" | "dense_similarity_top_k" +> & { similarityTopK?: number; filters?: MetadataFilters }; export class LlamaCloudRetriever implements BaseRetriever { clientParams: ClientParams; @@ -84,7 +84,9 @@ export class LlamaCloudRetriever implements BaseRetriever { requestBody: { ...this.retrieveParams, query: extractText(query), - search_filters: preFilters as MetadataFilters, + search_filters: + this.retrieveParams.filters ?? (preFilters as MetadataFilters), + dense_similarity_top_k: this.retrieveParams.similarityTopK, }, }); diff --git a/packages/llamaindex/src/engines/chat/ContextChatEngine.ts b/packages/llamaindex/src/engines/chat/ContextChatEngine.ts index 5230981ea..792c31f45 100644 --- a/packages/llamaindex/src/engines/chat/ContextChatEngine.ts +++ b/packages/llamaindex/src/engines/chat/ContextChatEngine.ts @@ -126,7 +126,7 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine { if (!this.systemPrompt) return message; return { ...message, - content: this.systemPrompt.trim() + "\n" + message.content, + content: this.systemPrompt.trim() + "\n" + extractText(message.content), }; } } diff --git a/packages/llamaindex/src/indices/vectorStore/index.ts b/packages/llamaindex/src/indices/vectorStore/index.ts index c4902da99..3959a4678 100644 --- a/packages/llamaindex/src/indices/vectorStore/index.ts +++ b/packages/llamaindex/src/indices/vectorStore/index.ts @@ -386,6 +386,7 @@ export type VectorIndexRetrieverOptions = { index: VectorStoreIndex; similarityTopK?: number; topK?: TopKMap; + filters?: MetadataFilters; }; export class VectorIndexRetriever implements BaseRetriever { @@ -393,14 +394,21 @@ export class VectorIndexRetriever implements BaseRetriever { topK: TopKMap; serviceContext?: ServiceContext; - - constructor({ index, similarityTopK, topK }: VectorIndexRetrieverOptions) { + filters?: MetadataFilters; + + constructor({ + index, + similarityTopK, + topK, + filters, + }: VectorIndexRetrieverOptions) { this.index = index; this.serviceContext = this.index.serviceContext; this.topK = topK ?? { [ModalityType.TEXT]: similarityTopK ?? DEFAULT_SIMILARITY_TOP_K, [ModalityType.IMAGE]: DEFAULT_SIMILARITY_TOP_K, }; + this.filters = filters; } /** @@ -443,7 +451,7 @@ export class VectorIndexRetriever implements BaseRetriever { query: MessageContent, type: ModalityType, vectorStore: VectorStore, - preFilters?: MetadataFilters, + filters?: MetadataFilters, ): Promise<NodeWithScore[]> { // convert string message to multi-modal format if (typeof query === "string") { @@ -460,7 +468,7 @@ export class VectorIndexRetriever implements BaseRetriever { queryEmbedding, mode: VectorStoreQueryMode.DEFAULT, similarityTopK: this.topK[type], - filters: preFilters ?? undefined, + filters: this.filters ?? filters ?? undefined, }); nodes = nodes.concat(this.buildNodeListFromQueryResult(result)); } diff --git a/packages/llamaindex/src/readers/PDFReader.ts b/packages/llamaindex/src/readers/PDFReader.ts index 12f11cc31..3e5e3e448 100644 --- a/packages/llamaindex/src/readers/PDFReader.ts +++ b/packages/llamaindex/src/readers/PDFReader.ts @@ -6,6 +6,10 @@ import { FileReader } from "./type.js"; */ export class PDFReader extends FileReader { async loadDataAsContent(content: Uint8Array): Promise<Document[]> { + // XXX: create a new Uint8Array to prevent "Please provide binary data as `Uint8Array`, rather than `Buffer`." error if a Buffer passed + if (content instanceof Buffer) { + content = new Uint8Array(content); + } const { totalPages, text } = await readPDF(content); return text.map((text, page) => { const metadata = { diff --git a/packages/llamaindex/src/readers/type.ts b/packages/llamaindex/src/readers/type.ts index bbaa6f3dd..80fe584e9 100644 --- a/packages/llamaindex/src/readers/type.ts +++ b/packages/llamaindex/src/readers/type.ts @@ -18,8 +18,7 @@ export abstract class FileReader implements BaseReader { ): Promise<Document[]>; async loadData(filePath: string): Promise<Document[]> { - // XXX: create a new Uint8Array to prevent "Please provide binary data as `Uint8Array`, rather than `Buffer`." error in PDFReader - const fileContent = new Uint8Array(await fs.readFile(filePath)); + const fileContent = await fs.readFile(filePath); const fileName = path.basename(filePath); const docs = await this.loadDataAsContent(fileContent, fileName); docs.forEach(FileReader.addMetaData(filePath)); -- GitLab