From fcbf18344c0155903f2254ea1170a3f5ee7a392d Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Mon, 19 Aug 2024 14:01:41 +0700 Subject: [PATCH] feat: implement llamacloud file service (#1125) --- .changeset/kind-news-argue.md | 5 + .../src/cloud/LLamaCloudFileService.ts | 99 +++++++++++++++++++ packages/llamaindex/src/cloud/index.ts | 1 + 3 files changed, 105 insertions(+) create mode 100644 .changeset/kind-news-argue.md create mode 100644 packages/llamaindex/src/cloud/LLamaCloudFileService.ts diff --git a/.changeset/kind-news-argue.md b/.changeset/kind-news-argue.md new file mode 100644 index 000000000..a634056ac --- /dev/null +++ b/.changeset/kind-news-argue.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +implement llamacloud file service diff --git a/packages/llamaindex/src/cloud/LLamaCloudFileService.ts b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts new file mode 100644 index 000000000..e44712737 --- /dev/null +++ b/packages/llamaindex/src/cloud/LLamaCloudFileService.ts @@ -0,0 +1,99 @@ +import { + FilesService, + PipelinesService, + ProjectsService, +} from "@llamaindex/cloud/api"; +import { initService } from "./utils.js"; + +export class LLamaCloudFileService { + /** + * Get list of projects, each project contains a list of pipelines + */ + public static async getAllProjectsWithPipelines() { + initService(); + try { + const projects = await ProjectsService.listProjectsApiV1ProjectsGet(); + const pipelines = + await PipelinesService.searchPipelinesApiV1PipelinesGet(); + return projects.map((project) => ({ + ...project, + pipelines: pipelines.filter((p) => p.project_id === project.id), + })); + } catch (error) { + console.error("Error listing projects and pipelines:", error); + return []; + } + } + + /** + * Upload a file to a pipeline in LlamaCloud + */ + public static async addFileToPipeline( + projectId: string, + pipelineId: string, + uploadFile: File | Blob, + customMetadata: Record<string, any> = {}, + ) { + initService(); + const file = await FilesService.uploadFileApiV1FilesPost({ + projectId, + formData: { + upload_file: uploadFile, + }, + }); + const files = [ + { + file_id: file.id, + custom_metadata: { file_id: file.id, ...customMetadata }, + }, + ]; + await PipelinesService.addFilesToPipelineApiV1PipelinesPipelineIdFilesPut({ + pipelineId, + requestBody: files, + }); + + // Wait 2s for the file to be processed + const maxAttempts = 20; + let attempt = 0; + while (attempt < maxAttempts) { + const result = + await PipelinesService.getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet( + { + pipelineId, + fileId: file.id, + }, + ); + if (result.status === "ERROR") { + throw new Error(`File processing failed: ${JSON.stringify(result)}`); + } + if (result.status === "SUCCESS") { + // File is ingested - return the file id + return file.id; + } + attempt += 1; + await new Promise((resolve) => setTimeout(resolve, 100)); // Sleep for 100ms + } + throw new Error( + `File processing did not complete after ${maxAttempts} attempts.`, + ); + } + + /** + * Get download URL for a file in LlamaCloud + */ + public static async getFileUrl(pipelineId: string, filename: string) { + initService(); + const allPipelineFiles = + await PipelinesService.listPipelineFilesApiV1PipelinesPipelineIdFilesGet({ + pipelineId, + }); + const file = allPipelineFiles.find((file) => file.name === filename); + if (!file?.file_id) return null; + const fileContent = + await FilesService.readFileContentApiV1FilesIdContentGet({ + id: file.file_id, + projectId: file.project_id, + }); + return fileContent.url; + } +} diff --git a/packages/llamaindex/src/cloud/index.ts b/packages/llamaindex/src/cloud/index.ts index f332433f2..d1c16b0f0 100644 --- a/packages/llamaindex/src/cloud/index.ts +++ b/packages/llamaindex/src/cloud/index.ts @@ -1,4 +1,5 @@ export type { CloudConstructorParams } from "./constants.js"; +export { LLamaCloudFileService } from "./LLamaCloudFileService.js"; export { LlamaCloudIndex } from "./LlamaCloudIndex.js"; export { LlamaCloudRetriever, -- GitLab