diff --git a/.changeset/lucky-queens-smile.md b/.changeset/lucky-queens-smile.md new file mode 100644 index 0000000000000000000000000000000000000000..6795f9fb0b92bb4376d04f8c46fafcf79a4de0e6 --- /dev/null +++ b/.changeset/lucky-queens-smile.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Fix cannot query public document from llamacloud diff --git a/.changeset/thirty-tips-drum.md b/.changeset/thirty-tips-drum.md new file mode 100644 index 0000000000000000000000000000000000000000..4a45f8c76cade490f9571f868ee2bdbafa78e019 --- /dev/null +++ b/.changeset/thirty-tips-drum.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Fix typescript templates cannot upload file to llamacloud diff --git a/e2e/shared/streaming_template.spec.ts b/e2e/shared/streaming_template.spec.ts index 91183a9162a630efa449223e99b2b78fe4f3d5d7..b34d4fedeeff7c3c328c83d1e012a9196953cae1 100644 --- a/e2e/shared/streaming_template.spec.ts +++ b/e2e/shared/streaming_template.spec.ts @@ -27,6 +27,13 @@ const userMessage = dataSource !== "--no-files" ? "Physical standard for letters" : "Hello"; test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => { + const isNode18 = process.version.startsWith("v18"); + const isLlamaCloud = dataSource === "--llamacloud"; + // llamacloud is using File API which is not supported on node 18 + if (isNode18 && isLlamaCloud) { + test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source"); + } + let port: number; let externalPort: number; let cwd: string; diff --git a/templates/components/llamaindex/typescript/documents/upload.ts b/templates/components/llamaindex/typescript/documents/upload.ts index 4f205a60107ca9d166758520ae8e18edb0a311a4..a5a817e772a44ae3c90e46fd4ecae1216b2be831 100644 --- a/templates/components/llamaindex/typescript/documents/upload.ts +++ b/templates/components/llamaindex/typescript/documents/upload.ts @@ -16,14 +16,26 @@ export async function uploadDocument( // trigger LlamaCloudIndex API to upload the file and run the pipeline const projectId = await index.getProjectId(); const pipelineId = await index.getPipelineId(); - return [ - await LLamaCloudFileService.addFileToPipeline( - projectId, - pipelineId, - new File([fileBuffer], filename, { type: mimeType }), - { private: "true" }, - ), - ]; + try { + return [ + await LLamaCloudFileService.addFileToPipeline( + projectId, + pipelineId, + new File([fileBuffer], filename, { type: mimeType }), + { private: "true" }, + ), + ]; + } catch (error) { + if ( + error instanceof ReferenceError && + error.message.includes("File is not defined") + ) { + throw new Error( + "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.", + ); + } + throw error; + } } // run the pipeline for other vector store indexes diff --git a/templates/components/llamaindex/typescript/streaming/events.ts b/templates/components/llamaindex/typescript/streaming/events.ts index c14af55dc9f798dbf5dcfd6b9f834b91c9716009..538e001458e1292ba209ab9fb13bdea6dfb16815 100644 --- a/templates/components/llamaindex/typescript/streaming/events.ts +++ b/templates/components/llamaindex/typescript/streaming/events.ts @@ -75,7 +75,7 @@ export function createCallbackManager(stream: StreamData) { callbackManager.on("retrieve-end", (data) => { const { nodes, query } = data.detail; appendSourceData(stream, nodes); - appendEventData(stream, `Retrieving context for query: '${query}'`); + appendEventData(stream, `Retrieving context for query: '${query.query}'`); appendEventData( stream, `Retrieved ${nodes.length} sources to use as context for the query`, diff --git a/templates/components/vectordbs/python/llamacloud/generate.py b/templates/components/vectordbs/python/llamacloud/generate.py index 2efec0ee15971c5da08bce08e9c2ff98a3e35319..6be271bd5c3cff9056541cd2767e6809f0935e5f 100644 --- a/templates/components/vectordbs/python/llamacloud/generate.py +++ b/templates/components/vectordbs/python/llamacloud/generate.py @@ -1,20 +1,18 @@ # flake8: noqa: E402 import os + from dotenv import load_dotenv load_dotenv() -from llama_cloud import PipelineType - -from app.settings import init_settings -from llama_index.core.settings import Settings - +import logging from app.engine.index import get_client, get_index - -import logging -from llama_index.core.readers import SimpleDirectoryReader from app.engine.service import LLamaCloudFileService +from app.settings import init_settings +from llama_cloud import PipelineType +from llama_index.core.readers import SimpleDirectoryReader +from llama_index.core.settings import Settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -80,13 +78,7 @@ def generate_datasource(): f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}" ) LLamaCloudFileService.add_file_to_pipeline( - project_id, - pipeline_id, - f, - custom_metadata={ - # Set private=false to mark the document as public (required for filtering) - "private": "false", - }, + project_id, pipeline_id, f, custom_metadata={} ) logger.info("Finished generating the index") diff --git a/templates/components/vectordbs/python/llamacloud/query_filter.py b/templates/components/vectordbs/python/llamacloud/query_filter.py index fdaabd6a2f5b8aff585d0708bef36b23238fee65..d90581c65ef9704515b44d11c0e1d26a29d3d1e3 100644 --- a/templates/components/vectordbs/python/llamacloud/query_filter.py +++ b/templates/components/vectordbs/python/llamacloud/query_filter.py @@ -5,7 +5,7 @@ def generate_filters(doc_ids): """ Generate public/private document filters based on the doc_ids and the vector store. """ - # Using "is_empty" filter to include the documents don't have the "private" key because they're uploaded in LlamaCloud UI + # public documents (ingested by "poetry run generate" or in the LlamaCloud UI) don't have the "private" field public_doc_filter = MetadataFilter( key="private", value=None, diff --git a/templates/components/vectordbs/typescript/llamacloud/generate.ts b/templates/components/vectordbs/typescript/llamacloud/generate.ts index 7f3bd26a3bca4b48201f3633f30d7ed2f6abaa9e..9668f7b1b03e1e5df5086b07c09ba7ae8200bb82 100644 --- a/templates/components/vectordbs/typescript/llamacloud/generate.ts +++ b/templates/components/vectordbs/typescript/llamacloud/generate.ts @@ -25,6 +25,8 @@ async function* walk(dir: string): AsyncGenerator<string> { async function loadAndIndex() { const index = await getDataSource(); + // ensure the index is available or create a new one + await index.ensureIndex(); const projectId = await index.getProjectId(); const pipelineId = await index.getPipelineId(); @@ -32,10 +34,23 @@ async function loadAndIndex() { for await (const filePath of walk(DATA_DIR)) { const buffer = await fs.readFile(filePath); const filename = path.basename(filePath); - const file = new File([buffer], filename); - await LLamaCloudFileService.addFileToPipeline(projectId, pipelineId, file, { - private: "false", - }); + try { + await LLamaCloudFileService.addFileToPipeline( + projectId, + pipelineId, + new File([buffer], filename), + ); + } catch (error) { + if ( + error instanceof ReferenceError && + error.message.includes("File is not defined") + ) { + throw new Error( + "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.", + ); + } + throw error; + } } console.log(`Successfully uploaded documents to LlamaCloud!`); diff --git a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts index 5f3da346f63d67286b7f2055530de9e0b9bc7736..79298330480af4d870732395674fb1ec8e3c83ef 100644 --- a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts +++ b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts @@ -1,7 +1,7 @@ import { CloudRetrieveParams, MetadataFilter } from "llamaindex"; export function generateFilters(documentIds: string[]) { - // public documents don't have the "private" field or it's set to "false" + // public documents (ingested by "npm run generate" or in the LlamaCloud UI) don't have the "private" field const publicDocumentsFilter: MetadataFilter = { key: "private", operator: "is_empty", diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json index 543bfc2e7a87f8335d38c06bbe90e73eccd7b993..f7cea6f0afa82c3b78c9f9d25fbd11cc82522cc6 100644 --- a/templates/types/streaming/express/package.json +++ b/templates/types/streaming/express/package.json @@ -21,7 +21,7 @@ "dotenv": "^16.3.1", "duck-duck-scrape": "^2.2.5", "express": "^4.18.2", - "llamaindex": "0.6.18", + "llamaindex": "0.6.19", "pdf2json": "3.0.5", "ajv": "^8.12.0", "@e2b/code-interpreter": "0.0.9-beta.3", diff --git a/templates/types/streaming/fastapi/app/api/routers/chat_config.py b/templates/types/streaming/fastapi/app/api/routers/chat_config.py index ae88ca9a59d72c3520ce0eb9ab5f99ef5bd4dc05..228664d3db541d5ec06463c1cd9f87a2421c9813 100644 --- a/templates/types/streaming/fastapi/app/api/routers/chat_config.py +++ b/templates/types/streaming/fastapi/app/api/routers/chat_config.py @@ -1,11 +1,10 @@ import logging import os -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from app.api.routers.models import ChatConfig - config_router = r = APIRouter() logger = logging.getLogger("uvicorn") @@ -27,6 +26,10 @@ try: @r.get("/llamacloud") async def chat_llama_cloud_config(): + if not os.getenv("LLAMA_CLOUD_API_KEY"): + raise HTTPException( + status_code=500, detail="LlamaCloud API KEY is not configured" + ) projects = LLamaCloudFileService.get_all_projects_with_pipelines() pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME") project = os.getenv("LLAMA_CLOUD_PROJECT_NAME") diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx index c9ed7ecb5058e6dea90ad6ed86ea3af51f807ee7..6d67081c98a2a8b17b749fa827669fe5847d1c76 100644 --- a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx +++ b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx @@ -66,7 +66,16 @@ export function LlamaCloudSelector({ useEffect(() => { if (process.env.NEXT_PUBLIC_USE_LLAMACLOUD === "true" && !config) { fetch(`${backend}/api/chat/config/llamacloud`) - .then((response) => response.json()) + .then((response) => { + if (!response.ok) { + return response.json().then((errorData) => { + window.alert( + `Error: ${JSON.stringify(errorData) || "Unknown error occurred"}`, + ); + }); + } + return response.json(); + }) .then((data) => { const pipeline = defaultPipeline ?? data.pipeline; // defaultPipeline will override pipeline in .env setConfig({ ...data, pipeline }); diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json index 472f16367e902cc30d674e574a502532d7eff89f..c8f694edb83071989f567096912e1184f2f3dd20 100644 --- a/templates/types/streaming/nextjs/package.json +++ b/templates/types/streaming/nextjs/package.json @@ -27,7 +27,7 @@ "duck-duck-scrape": "^2.2.5", "formdata-node": "^6.0.3", "got": "^14.4.1", - "llamaindex": "0.6.18", + "llamaindex": "0.6.19", "lucide-react": "^0.294.0", "next": "^14.2.4", "react": "^18.2.0",