From fcbf18344c0155903f2254ea1170a3f5ee7a392d Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Mon, 19 Aug 2024 14:01:41 +0700
Subject: [PATCH] feat: implement llamacloud file service (#1125)

---
 .changeset/kind-news-argue.md                 |  5 +
 .../src/cloud/LLamaCloudFileService.ts        | 99 +++++++++++++++++++
 packages/llamaindex/src/cloud/index.ts        |  1 +
 3 files changed, 105 insertions(+)
 create mode 100644 .changeset/kind-news-argue.md
 create mode 100644 packages/llamaindex/src/cloud/LLamaCloudFileService.ts

diff --git a/.changeset/kind-news-argue.md b/.changeset/kind-news-argue.md
new file mode 100644
index 000000000..a634056ac
--- /dev/null
+++ b/.changeset/kind-news-argue.md
@@ -0,0 +1,5 @@
+---
+"llamaindex": patch
+---
+
+implement llamacloud file service
diff --git a/packages/llamaindex/src/cloud/LLamaCloudFileService.ts b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts
new file mode 100644
index 000000000..e44712737
--- /dev/null
+++ b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts
@@ -0,0 +1,99 @@
+import {
+  FilesService,
+  PipelinesService,
+  ProjectsService,
+} from "@llamaindex/cloud/api";
+import { initService } from "./utils.js";
+
+export class LLamaCloudFileService {
+  /**
+   * Get list of projects, each project contains a list of pipelines
+   */
+  public static async getAllProjectsWithPipelines() {
+    initService();
+    try {
+      const projects = await ProjectsService.listProjectsApiV1ProjectsGet();
+      const pipelines =
+        await PipelinesService.searchPipelinesApiV1PipelinesGet();
+      return projects.map((project) => ({
+        ...project,
+        pipelines: pipelines.filter((p) => p.project_id === project.id),
+      }));
+    } catch (error) {
+      console.error("Error listing projects and pipelines:", error);
+      return [];
+    }
+  }
+
+  /**
+   * Upload a file to a pipeline in LlamaCloud
+   */
+  public static async addFileToPipeline(
+    projectId: string,
+    pipelineId: string,
+    uploadFile: File | Blob,
+    customMetadata: Record<string, any> = {},
+  ) {
+    initService();
+    const file = await FilesService.uploadFileApiV1FilesPost({
+      projectId,
+      formData: {
+        upload_file: uploadFile,
+      },
+    });
+    const files = [
+      {
+        file_id: file.id,
+        custom_metadata: { file_id: file.id, ...customMetadata },
+      },
+    ];
+    await PipelinesService.addFilesToPipelineApiV1PipelinesPipelineIdFilesPut({
+      pipelineId,
+      requestBody: files,
+    });
+
+    // Wait 2s for the file to be processed
+    const maxAttempts = 20;
+    let attempt = 0;
+    while (attempt < maxAttempts) {
+      const result =
+        await PipelinesService.getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet(
+          {
+            pipelineId,
+            fileId: file.id,
+          },
+        );
+      if (result.status === "ERROR") {
+        throw new Error(`File processing failed: ${JSON.stringify(result)}`);
+      }
+      if (result.status === "SUCCESS") {
+        // File is ingested - return the file id
+        return file.id;
+      }
+      attempt += 1;
+      await new Promise((resolve) => setTimeout(resolve, 100)); // Sleep for 100ms
+    }
+    throw new Error(
+      `File processing did not complete after ${maxAttempts} attempts.`,
+    );
+  }
+
+  /**
+   * Get download URL for a file in LlamaCloud
+   */
+  public static async getFileUrl(pipelineId: string, filename: string) {
+    initService();
+    const allPipelineFiles =
+      await PipelinesService.listPipelineFilesApiV1PipelinesPipelineIdFilesGet({
+        pipelineId,
+      });
+    const file = allPipelineFiles.find((file) => file.name === filename);
+    if (!file?.file_id) return null;
+    const fileContent =
+      await FilesService.readFileContentApiV1FilesIdContentGet({
+        id: file.file_id,
+        projectId: file.project_id,
+      });
+    return fileContent.url;
+  }
+}
diff --git a/packages/llamaindex/src/cloud/index.ts b/packages/llamaindex/src/cloud/index.ts
index f332433f2..d1c16b0f0 100644
--- a/packages/llamaindex/src/cloud/index.ts
+++ b/packages/llamaindex/src/cloud/index.ts
@@ -1,4 +1,5 @@
 export type { CloudConstructorParams } from "./constants.js";
+export { LLamaCloudFileService } from "./LLamaCloudFileService.js";
 export { LlamaCloudIndex } from "./LlamaCloudIndex.js";
 export {
   LlamaCloudRetriever,
-- 
GitLab