From 75e1f6104ca30a270e8258d6d7bbc731a0657940 Mon Sep 17 00:00:00 2001 From: Huu Le <39040748+leehuwuj@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:58:12 +0700 Subject: [PATCH] fix: TypeScript templates do not create a new LlamaCloud index or upload a file to an existing index. (#356) --- .changeset/lucky-queens-smile.md | 5 ++++ .changeset/thirty-tips-drum.md | 5 ++++ e2e/shared/streaming_template.spec.ts | 7 +++++ .../llamaindex/typescript/documents/upload.ts | 28 +++++++++++++------ .../llamaindex/typescript/streaming/events.ts | 2 +- .../vectordbs/python/llamacloud/generate.py | 22 +++++---------- .../python/llamacloud/query_filter.py | 2 +- .../typescript/llamacloud/generate.ts | 23 ++++++++++++--- .../typescript/llamacloud/queryFilter.ts | 2 +- .../types/streaming/express/package.json | 2 +- .../fastapi/app/api/routers/chat_config.py | 7 +++-- .../ui/chat/widgets/LlamaCloudSelector.tsx | 11 +++++++- templates/types/streaming/nextjs/package.json | 2 +- 13 files changed, 83 insertions(+), 35 deletions(-) create mode 100644 .changeset/lucky-queens-smile.md create mode 100644 .changeset/thirty-tips-drum.md diff --git a/.changeset/lucky-queens-smile.md b/.changeset/lucky-queens-smile.md new file mode 100644 index 00000000..6795f9fb --- /dev/null +++ b/.changeset/lucky-queens-smile.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Fix cannot query public document from llamacloud diff --git a/.changeset/thirty-tips-drum.md b/.changeset/thirty-tips-drum.md new file mode 100644 index 00000000..4a45f8c7 --- /dev/null +++ b/.changeset/thirty-tips-drum.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Fix typescript templates cannot upload file to llamacloud diff --git a/e2e/shared/streaming_template.spec.ts b/e2e/shared/streaming_template.spec.ts index 91183a91..b34d4fed 100644 --- a/e2e/shared/streaming_template.spec.ts +++ b/e2e/shared/streaming_template.spec.ts @@ -27,6 +27,13 @@ const userMessage = dataSource !== "--no-files" ? "Physical standard for letters" : "Hello"; test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => { + const isNode18 = process.version.startsWith("v18"); + const isLlamaCloud = dataSource === "--llamacloud"; + // llamacloud is using File API which is not supported on node 18 + if (isNode18 && isLlamaCloud) { + test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source"); + } + let port: number; let externalPort: number; let cwd: string; diff --git a/templates/components/llamaindex/typescript/documents/upload.ts b/templates/components/llamaindex/typescript/documents/upload.ts index 4f205a60..a5a817e7 100644 --- a/templates/components/llamaindex/typescript/documents/upload.ts +++ b/templates/components/llamaindex/typescript/documents/upload.ts @@ -16,14 +16,26 @@ export async function uploadDocument( // trigger LlamaCloudIndex API to upload the file and run the pipeline const projectId = await index.getProjectId(); const pipelineId = await index.getPipelineId(); - return [ - await LLamaCloudFileService.addFileToPipeline( - projectId, - pipelineId, - new File([fileBuffer], filename, { type: mimeType }), - { private: "true" }, - ), - ]; + try { + return [ + await LLamaCloudFileService.addFileToPipeline( + projectId, + pipelineId, + new File([fileBuffer], filename, { type: mimeType }), + { private: "true" }, + ), + ]; + } catch (error) { + if ( + error instanceof ReferenceError && + error.message.includes("File is not defined") + ) { + throw new Error( + "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.", + ); + } + throw error; + } } // run the pipeline for other vector store indexes diff --git a/templates/components/llamaindex/typescript/streaming/events.ts b/templates/components/llamaindex/typescript/streaming/events.ts index c14af55d..538e0014 100644 --- a/templates/components/llamaindex/typescript/streaming/events.ts +++ b/templates/components/llamaindex/typescript/streaming/events.ts @@ -75,7 +75,7 @@ export function createCallbackManager(stream: StreamData) { callbackManager.on("retrieve-end", (data) => { const { nodes, query } = data.detail; appendSourceData(stream, nodes); - appendEventData(stream, `Retrieving context for query: '${query}'`); + appendEventData(stream, `Retrieving context for query: '${query.query}'`); appendEventData( stream, `Retrieved ${nodes.length} sources to use as context for the query`, diff --git a/templates/components/vectordbs/python/llamacloud/generate.py b/templates/components/vectordbs/python/llamacloud/generate.py index 2efec0ee..6be271bd 100644 --- a/templates/components/vectordbs/python/llamacloud/generate.py +++ b/templates/components/vectordbs/python/llamacloud/generate.py @@ -1,20 +1,18 @@ # flake8: noqa: E402 import os + from dotenv import load_dotenv load_dotenv() -from llama_cloud import PipelineType - -from app.settings import init_settings -from llama_index.core.settings import Settings - +import logging from app.engine.index import get_client, get_index - -import logging -from llama_index.core.readers import SimpleDirectoryReader from app.engine.service import LLamaCloudFileService +from app.settings import init_settings +from llama_cloud import PipelineType +from llama_index.core.readers import SimpleDirectoryReader +from llama_index.core.settings import Settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -80,13 +78,7 @@ def generate_datasource(): f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}" ) LLamaCloudFileService.add_file_to_pipeline( - project_id, - pipeline_id, - f, - custom_metadata={ - # Set private=false to mark the document as public (required for filtering) - "private": "false", - }, + project_id, pipeline_id, f, custom_metadata={} ) logger.info("Finished generating the index") diff --git a/templates/components/vectordbs/python/llamacloud/query_filter.py b/templates/components/vectordbs/python/llamacloud/query_filter.py index fdaabd6a..d90581c6 100644 --- a/templates/components/vectordbs/python/llamacloud/query_filter.py +++ b/templates/components/vectordbs/python/llamacloud/query_filter.py @@ -5,7 +5,7 @@ def generate_filters(doc_ids): """ Generate public/private document filters based on the doc_ids and the vector store. """ - # Using "is_empty" filter to include the documents don't have the "private" key because they're uploaded in LlamaCloud UI + # public documents (ingested by "poetry run generate" or in the LlamaCloud UI) don't have the "private" field public_doc_filter = MetadataFilter( key="private", value=None, diff --git a/templates/components/vectordbs/typescript/llamacloud/generate.ts b/templates/components/vectordbs/typescript/llamacloud/generate.ts index 7f3bd26a..9668f7b1 100644 --- a/templates/components/vectordbs/typescript/llamacloud/generate.ts +++ b/templates/components/vectordbs/typescript/llamacloud/generate.ts @@ -25,6 +25,8 @@ async function* walk(dir: string): AsyncGenerator<string> { async function loadAndIndex() { const index = await getDataSource(); + // ensure the index is available or create a new one + await index.ensureIndex(); const projectId = await index.getProjectId(); const pipelineId = await index.getPipelineId(); @@ -32,10 +34,23 @@ async function loadAndIndex() { for await (const filePath of walk(DATA_DIR)) { const buffer = await fs.readFile(filePath); const filename = path.basename(filePath); - const file = new File([buffer], filename); - await LLamaCloudFileService.addFileToPipeline(projectId, pipelineId, file, { - private: "false", - }); + try { + await LLamaCloudFileService.addFileToPipeline( + projectId, + pipelineId, + new File([buffer], filename), + ); + } catch (error) { + if ( + error instanceof ReferenceError && + error.message.includes("File is not defined") + ) { + throw new Error( + "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.", + ); + } + throw error; + } } console.log(`Successfully uploaded documents to LlamaCloud!`); diff --git a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts index 5f3da346..79298330 100644 --- a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts +++ b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts @@ -1,7 +1,7 @@ import { CloudRetrieveParams, MetadataFilter } from "llamaindex"; export function generateFilters(documentIds: string[]) { - // public documents don't have the "private" field or it's set to "false" + // public documents (ingested by "npm run generate" or in the LlamaCloud UI) don't have the "private" field const publicDocumentsFilter: MetadataFilter = { key: "private", operator: "is_empty", diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json index 543bfc2e..f7cea6f0 100644 --- a/templates/types/streaming/express/package.json +++ b/templates/types/streaming/express/package.json @@ -21,7 +21,7 @@ "dotenv": "^16.3.1", "duck-duck-scrape": "^2.2.5", "express": "^4.18.2", - "llamaindex": "0.6.18", + "llamaindex": "0.6.19", "pdf2json": "3.0.5", "ajv": "^8.12.0", "@e2b/code-interpreter": "0.0.9-beta.3", diff --git a/templates/types/streaming/fastapi/app/api/routers/chat_config.py b/templates/types/streaming/fastapi/app/api/routers/chat_config.py index ae88ca9a..228664d3 100644 --- a/templates/types/streaming/fastapi/app/api/routers/chat_config.py +++ b/templates/types/streaming/fastapi/app/api/routers/chat_config.py @@ -1,11 +1,10 @@ import logging import os -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from app.api.routers.models import ChatConfig - config_router = r = APIRouter() logger = logging.getLogger("uvicorn") @@ -27,6 +26,10 @@ try: @r.get("/llamacloud") async def chat_llama_cloud_config(): + if not os.getenv("LLAMA_CLOUD_API_KEY"): + raise HTTPException( + status_code=500, detail="LlamaCloud API KEY is not configured" + ) projects = LLamaCloudFileService.get_all_projects_with_pipelines() pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME") project = os.getenv("LLAMA_CLOUD_PROJECT_NAME") diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx index c9ed7ecb..6d67081c 100644 --- a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx +++ b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx @@ -66,7 +66,16 @@ export function LlamaCloudSelector({ useEffect(() => { if (process.env.NEXT_PUBLIC_USE_LLAMACLOUD === "true" && !config) { fetch(`${backend}/api/chat/config/llamacloud`) - .then((response) => response.json()) + .then((response) => { + if (!response.ok) { + return response.json().then((errorData) => { + window.alert( + `Error: ${JSON.stringify(errorData) || "Unknown error occurred"}`, + ); + }); + } + return response.json(); + }) .then((data) => { const pipeline = defaultPipeline ?? data.pipeline; // defaultPipeline will override pipeline in .env setConfig({ ...data, pipeline }); diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json index 472f1636..c8f694ed 100644 --- a/templates/types/streaming/nextjs/package.json +++ b/templates/types/streaming/nextjs/package.json @@ -27,7 +27,7 @@ "duck-duck-scrape": "^2.2.5", "formdata-node": "^6.0.3", "got": "^14.4.1", - "llamaindex": "0.6.18", + "llamaindex": "0.6.19", "lucide-react": "^0.294.0", "next": "^14.2.4", "react": "^18.2.0", -- GitLab