From cba54061a26662d63c09c050e5d25b4d64d66dc4 Mon Sep 17 00:00:00 2001 From: Fabian Wimmer <github@insightby.ai> Date: Fri, 21 Jun 2024 01:32:20 +0200 Subject: [PATCH] fix: every Llama Parse job being called "blob" (#946) Co-authored-by: Alex Yang <himself65@outlook.com> --- .changeset/small-goats-serve.md | 5 +++++ .../llamaindex/src/readers/LlamaParseReader.ts | 15 ++++++++++----- packages/llamaindex/src/readers/type.ts | 8 ++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 .changeset/small-goats-serve.md diff --git a/.changeset/small-goats-serve.md b/.changeset/small-goats-serve.md new file mode 100644 index 000000000..e082e2ccc --- /dev/null +++ b/.changeset/small-goats-serve.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +fix: every Llama Parse job being called "blob" diff --git a/packages/llamaindex/src/readers/LlamaParseReader.ts b/packages/llamaindex/src/readers/LlamaParseReader.ts index 4ccd9e92c..7aa27a6fa 100644 --- a/packages/llamaindex/src/readers/LlamaParseReader.ts +++ b/packages/llamaindex/src/readers/LlamaParseReader.ts @@ -160,16 +160,17 @@ export class LlamaParseReader extends FileReader { } // Create a job for the LlamaParse API - private async createJob(data: Buffer): Promise<string> { + private async createJob(data: Buffer, fileName?: string): Promise<string> { // Load data, set the mime type const { mimeType, extension } = await this.getMimeType(data); if (this.verbose) { - console.log(`Starting load for ${extension} file`); + const name = fileName ? fileName : extension; + console.log(`Starting load for ${name} file`); } const body = new FormData(); - body.set("file", new Blob([data], { type: mimeType })); + body.set("file", new Blob([data], { type: mimeType }), fileName); const LlamaParseBodyParams = { language: this.language, @@ -272,11 +273,15 @@ export class LlamaParseReader extends FileReader { * To be used with resultType = "text" and "markdown" * * @param {Buffer} fileContent - The content of the file to be loaded. + * @param {string} [fileName] - The optional name of the file to be loaded. * @return {Promise<Document[]>} A Promise object that resolves to an array of Document objects. */ - async loadDataAsContent(fileContent: Buffer): Promise<Document[]> { + async loadDataAsContent( + fileContent: Buffer, + fileName?: string, + ): Promise<Document[]> { // Creates a job for the file - const jobId = await this.createJob(fileContent); + const jobId = await this.createJob(fileContent, fileName); if (this.verbose) { console.log(`Started parsing the file under job id ${jobId}`); } diff --git a/packages/llamaindex/src/readers/type.ts b/packages/llamaindex/src/readers/type.ts index b6e3e9912..90549f3a7 100644 --- a/packages/llamaindex/src/readers/type.ts +++ b/packages/llamaindex/src/readers/type.ts @@ -12,11 +12,15 @@ export interface BaseReader { * A FileReader takes file paths and imports data into Document objects. */ export abstract class FileReader implements BaseReader { - abstract loadDataAsContent(fileContent: Buffer): Promise<Document[]>; + abstract loadDataAsContent( + fileContent: Buffer, + fileName?: string, + ): Promise<Document[]>; async loadData(filePath: string): Promise<Document[]> { const fileContent = await fs.readFile(filePath); - const docs = await this.loadDataAsContent(fileContent); + const fileName = path.basename(filePath); + const docs = await this.loadDataAsContent(fileContent, fileName); docs.forEach(FileReader.addMetaData(filePath)); return docs; } -- GitLab