From cba54061a26662d63c09c050e5d25b4d64d66dc4 Mon Sep 17 00:00:00 2001
From: Fabian Wimmer <github@insightby.ai>
Date: Fri, 21 Jun 2024 01:32:20 +0200
Subject: [PATCH] fix: every Llama Parse job being called "blob" (#946)

Co-authored-by: Alex Yang <himself65@outlook.com>
---
 .changeset/small-goats-serve.md                   |  5 +++++
 .../llamaindex/src/readers/LlamaParseReader.ts    | 15 ++++++++++-----
 packages/llamaindex/src/readers/type.ts           |  8 ++++++--
 3 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 .changeset/small-goats-serve.md

diff --git a/.changeset/small-goats-serve.md b/.changeset/small-goats-serve.md
new file mode 100644
index 000000000..e082e2ccc
--- /dev/null
+++ b/.changeset/small-goats-serve.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+fix: every Llama Parse job being called "blob"
diff --git a/packages/llamaindex/src/readers/LlamaParseReader.ts b/packages/llamaindex/src/readers/LlamaParseReader.ts
index 4ccd9e92c..7aa27a6fa 100644
--- a/packages/llamaindex/src/readers/LlamaParseReader.ts
+++ b/packages/llamaindex/src/readers/LlamaParseReader.ts
@@ -160,16 +160,17 @@ export class LlamaParseReader extends FileReader {
   }
 
   // Create a job for the LlamaParse API
-  private async createJob(data: Buffer): Promise<string> {
+  private async createJob(data: Buffer, fileName?: string): Promise<string> {
     // Load data, set the mime type
     const { mimeType, extension } = await this.getMimeType(data);
 
     if (this.verbose) {
-      console.log(`Starting load for ${extension} file`);
+      const name = fileName ? fileName : extension;
+      console.log(`Starting load for ${name} file`);
     }
 
     const body = new FormData();
-    body.set("file", new Blob([data], { type: mimeType }));
+    body.set("file", new Blob([data], { type: mimeType }), fileName);
 
     const LlamaParseBodyParams = {
       language: this.language,
@@ -272,11 +273,15 @@ export class LlamaParseReader extends FileReader {
    * To be used with resultType = "text" and "markdown"
    *
    * @param {Buffer} fileContent - The content of the file to be loaded.
+   * @param {string} [fileName] - The optional name of the file to be loaded.
    * @return {Promise<Document[]>} A Promise object that resolves to an array of Document objects.
    */
-  async loadDataAsContent(fileContent: Buffer): Promise<Document[]> {
+  async loadDataAsContent(
+    fileContent: Buffer,
+    fileName?: string,
+  ): Promise<Document[]> {
     // Creates a job for the file
-    const jobId = await this.createJob(fileContent);
+    const jobId = await this.createJob(fileContent, fileName);
     if (this.verbose) {
       console.log(`Started parsing the file under job id ${jobId}`);
     }
diff --git a/packages/llamaindex/src/readers/type.ts b/packages/llamaindex/src/readers/type.ts
index b6e3e9912..90549f3a7 100644
--- a/packages/llamaindex/src/readers/type.ts
+++ b/packages/llamaindex/src/readers/type.ts
@@ -12,11 +12,15 @@ export interface BaseReader {
  * A FileReader takes file paths and imports data into Document objects.
  */
 export abstract class FileReader implements BaseReader {
-  abstract loadDataAsContent(fileContent: Buffer): Promise<Document[]>;
+  abstract loadDataAsContent(
+    fileContent: Buffer,
+    fileName?: string,
+  ): Promise<Document[]>;
 
   async loadData(filePath: string): Promise<Document[]> {
     const fileContent = await fs.readFile(filePath);
-    const docs = await this.loadDataAsContent(fileContent);
+    const fileName = path.basename(filePath);
+    const docs = await this.loadDataAsContent(fileContent, fileName);
     docs.forEach(FileReader.addMetaData(filePath));
     return docs;
   }
-- 
GitLab