From 75e1f6104ca30a270e8258d6d7bbc731a0657940 Mon Sep 17 00:00:00 2001
From: Huu Le <39040748+leehuwuj@users.noreply.github.com>
Date: Thu, 10 Oct 2024 17:58:12 +0700
Subject: [PATCH] fix: TypeScript templates do not create a new LlamaCloud
 index or upload a file to an existing index. (#356)

---
 .changeset/lucky-queens-smile.md              |  5 ++++
 .changeset/thirty-tips-drum.md                |  5 ++++
 e2e/shared/streaming_template.spec.ts         |  7 +++++
 .../llamaindex/typescript/documents/upload.ts | 28 +++++++++++++------
 .../llamaindex/typescript/streaming/events.ts |  2 +-
 .../vectordbs/python/llamacloud/generate.py   | 22 +++++----------
 .../python/llamacloud/query_filter.py         |  2 +-
 .../typescript/llamacloud/generate.ts         | 23 ++++++++++++---
 .../typescript/llamacloud/queryFilter.ts      |  2 +-
 .../types/streaming/express/package.json      |  2 +-
 .../fastapi/app/api/routers/chat_config.py    |  7 +++--
 .../ui/chat/widgets/LlamaCloudSelector.tsx    | 11 +++++++-
 templates/types/streaming/nextjs/package.json |  2 +-
 13 files changed, 83 insertions(+), 35 deletions(-)
 create mode 100644 .changeset/lucky-queens-smile.md
 create mode 100644 .changeset/thirty-tips-drum.md

diff --git a/.changeset/lucky-queens-smile.md b/.changeset/lucky-queens-smile.md
new file mode 100644
index 00000000..6795f9fb
--- /dev/null
+++ b/.changeset/lucky-queens-smile.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Fix cannot query public document from llamacloud
diff --git a/.changeset/thirty-tips-drum.md b/.changeset/thirty-tips-drum.md
new file mode 100644
index 00000000..4a45f8c7
--- /dev/null
+++ b/.changeset/thirty-tips-drum.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Fix typescript templates cannot upload file to llamacloud
diff --git a/e2e/shared/streaming_template.spec.ts b/e2e/shared/streaming_template.spec.ts
index 91183a91..b34d4fed 100644
--- a/e2e/shared/streaming_template.spec.ts
+++ b/e2e/shared/streaming_template.spec.ts
@@ -27,6 +27,13 @@ const userMessage =
   dataSource !== "--no-files" ? "Physical standard for letters" : "Hello";
 
 test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
+  const isNode18 = process.version.startsWith("v18");
+  const isLlamaCloud = dataSource === "--llamacloud";
+  // llamacloud is using File API which is not supported on node 18
+  if (isNode18 && isLlamaCloud) {
+    test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source");
+  }
+
   let port: number;
   let externalPort: number;
   let cwd: string;
diff --git a/templates/components/llamaindex/typescript/documents/upload.ts b/templates/components/llamaindex/typescript/documents/upload.ts
index 4f205a60..a5a817e7 100644
--- a/templates/components/llamaindex/typescript/documents/upload.ts
+++ b/templates/components/llamaindex/typescript/documents/upload.ts
@@ -16,14 +16,26 @@ export async function uploadDocument(
     // trigger LlamaCloudIndex API to upload the file and run the pipeline
     const projectId = await index.getProjectId();
     const pipelineId = await index.getPipelineId();
-    return [
-      await LLamaCloudFileService.addFileToPipeline(
-        projectId,
-        pipelineId,
-        new File([fileBuffer], filename, { type: mimeType }),
-        { private: "true" },
-      ),
-    ];
+    try {
+      return [
+        await LLamaCloudFileService.addFileToPipeline(
+          projectId,
+          pipelineId,
+          new File([fileBuffer], filename, { type: mimeType }),
+          { private: "true" },
+        ),
+      ];
+    } catch (error) {
+      if (
+        error instanceof ReferenceError &&
+        error.message.includes("File is not defined")
+      ) {
+        throw new Error(
+          "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
+        );
+      }
+      throw error;
+    }
   }
 
   // run the pipeline for other vector store indexes
diff --git a/templates/components/llamaindex/typescript/streaming/events.ts b/templates/components/llamaindex/typescript/streaming/events.ts
index c14af55d..538e0014 100644
--- a/templates/components/llamaindex/typescript/streaming/events.ts
+++ b/templates/components/llamaindex/typescript/streaming/events.ts
@@ -75,7 +75,7 @@ export function createCallbackManager(stream: StreamData) {
   callbackManager.on("retrieve-end", (data) => {
     const { nodes, query } = data.detail;
     appendSourceData(stream, nodes);
-    appendEventData(stream, `Retrieving context for query: '${query}'`);
+    appendEventData(stream, `Retrieving context for query: '${query.query}'`);
     appendEventData(
       stream,
       `Retrieved ${nodes.length} sources to use as context for the query`,
diff --git a/templates/components/vectordbs/python/llamacloud/generate.py b/templates/components/vectordbs/python/llamacloud/generate.py
index 2efec0ee..6be271bd 100644
--- a/templates/components/vectordbs/python/llamacloud/generate.py
+++ b/templates/components/vectordbs/python/llamacloud/generate.py
@@ -1,20 +1,18 @@
 # flake8: noqa: E402
 import os
+
 from dotenv import load_dotenv
 
 load_dotenv()
 
-from llama_cloud import PipelineType
-
-from app.settings import init_settings
-from llama_index.core.settings import Settings
-
+import logging
 
 from app.engine.index import get_client, get_index
-
-import logging
-from llama_index.core.readers import SimpleDirectoryReader
 from app.engine.service import LLamaCloudFileService
+from app.settings import init_settings
+from llama_cloud import PipelineType
+from llama_index.core.readers import SimpleDirectoryReader
+from llama_index.core.settings import Settings
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
@@ -80,13 +78,7 @@ def generate_datasource():
                 f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
             )
             LLamaCloudFileService.add_file_to_pipeline(
-                project_id,
-                pipeline_id,
-                f,
-                custom_metadata={
-                    # Set private=false to mark the document as public (required for filtering)
-                    "private": "false",
-                },
+                project_id, pipeline_id, f, custom_metadata={}
             )
 
     logger.info("Finished generating the index")
diff --git a/templates/components/vectordbs/python/llamacloud/query_filter.py b/templates/components/vectordbs/python/llamacloud/query_filter.py
index fdaabd6a..d90581c6 100644
--- a/templates/components/vectordbs/python/llamacloud/query_filter.py
+++ b/templates/components/vectordbs/python/llamacloud/query_filter.py
@@ -5,7 +5,7 @@ def generate_filters(doc_ids):
     """
     Generate public/private document filters based on the doc_ids and the vector store.
     """
-    # Using "is_empty" filter to include the documents don't have the "private" key because they're uploaded in LlamaCloud UI
+    # public documents (ingested by "poetry run generate" or in the LlamaCloud UI) don't have the "private" field
     public_doc_filter = MetadataFilter(
         key="private",
         value=None,
diff --git a/templates/components/vectordbs/typescript/llamacloud/generate.ts b/templates/components/vectordbs/typescript/llamacloud/generate.ts
index 7f3bd26a..9668f7b1 100644
--- a/templates/components/vectordbs/typescript/llamacloud/generate.ts
+++ b/templates/components/vectordbs/typescript/llamacloud/generate.ts
@@ -25,6 +25,8 @@ async function* walk(dir: string): AsyncGenerator<string> {
 
 async function loadAndIndex() {
   const index = await getDataSource();
+  // ensure the index is available or create a new one
+  await index.ensureIndex();
   const projectId = await index.getProjectId();
   const pipelineId = await index.getPipelineId();
 
@@ -32,10 +34,23 @@ async function loadAndIndex() {
   for await (const filePath of walk(DATA_DIR)) {
     const buffer = await fs.readFile(filePath);
     const filename = path.basename(filePath);
-    const file = new File([buffer], filename);
-    await LLamaCloudFileService.addFileToPipeline(projectId, pipelineId, file, {
-      private: "false",
-    });
+    try {
+      await LLamaCloudFileService.addFileToPipeline(
+        projectId,
+        pipelineId,
+        new File([buffer], filename),
+      );
+    } catch (error) {
+      if (
+        error instanceof ReferenceError &&
+        error.message.includes("File is not defined")
+      ) {
+        throw new Error(
+          "File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
+        );
+      }
+      throw error;
+    }
   }
 
   console.log(`Successfully uploaded documents to LlamaCloud!`);
diff --git a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts
index 5f3da346..79298330 100644
--- a/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts
+++ b/templates/components/vectordbs/typescript/llamacloud/queryFilter.ts
@@ -1,7 +1,7 @@
 import { CloudRetrieveParams, MetadataFilter } from "llamaindex";
 
 export function generateFilters(documentIds: string[]) {
-  // public documents don't have the "private" field or it's set to "false"
+  // public documents (ingested by "npm run generate" or in the LlamaCloud UI) don't have the "private" field
   const publicDocumentsFilter: MetadataFilter = {
     key: "private",
     operator: "is_empty",
diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json
index 543bfc2e..f7cea6f0 100644
--- a/templates/types/streaming/express/package.json
+++ b/templates/types/streaming/express/package.json
@@ -21,7 +21,7 @@
     "dotenv": "^16.3.1",
     "duck-duck-scrape": "^2.2.5",
     "express": "^4.18.2",
-    "llamaindex": "0.6.18",
+    "llamaindex": "0.6.19",
     "pdf2json": "3.0.5",
     "ajv": "^8.12.0",
     "@e2b/code-interpreter": "0.0.9-beta.3",
diff --git a/templates/types/streaming/fastapi/app/api/routers/chat_config.py b/templates/types/streaming/fastapi/app/api/routers/chat_config.py
index ae88ca9a..228664d3 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat_config.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat_config.py
@@ -1,11 +1,10 @@
 import logging
 import os
 
-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
 
 from app.api.routers.models import ChatConfig
 
-
 config_router = r = APIRouter()
 
 logger = logging.getLogger("uvicorn")
@@ -27,6 +26,10 @@ try:
 
     @r.get("/llamacloud")
     async def chat_llama_cloud_config():
+        if not os.getenv("LLAMA_CLOUD_API_KEY"):
+            raise HTTPException(
+                status_code=500, detail="LlamaCloud API KEY is not configured"
+            )
         projects = LLamaCloudFileService.get_all_projects_with_pipelines()
         pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME")
         project = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx
index c9ed7ecb..6d67081c 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/LlamaCloudSelector.tsx
@@ -66,7 +66,16 @@ export function LlamaCloudSelector({
   useEffect(() => {
     if (process.env.NEXT_PUBLIC_USE_LLAMACLOUD === "true" && !config) {
       fetch(`${backend}/api/chat/config/llamacloud`)
-        .then((response) => response.json())
+        .then((response) => {
+          if (!response.ok) {
+            return response.json().then((errorData) => {
+              window.alert(
+                `Error: ${JSON.stringify(errorData) || "Unknown error occurred"}`,
+              );
+            });
+          }
+          return response.json();
+        })
         .then((data) => {
           const pipeline = defaultPipeline ?? data.pipeline; // defaultPipeline will override pipeline in .env
           setConfig({ ...data, pipeline });
diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json
index 472f1636..c8f694ed 100644
--- a/templates/types/streaming/nextjs/package.json
+++ b/templates/types/streaming/nextjs/package.json
@@ -27,7 +27,7 @@
     "duck-duck-scrape": "^2.2.5",
     "formdata-node": "^6.0.3",
     "got": "^14.4.1",
-    "llamaindex": "0.6.18",
+    "llamaindex": "0.6.19",
     "lucide-react": "^0.294.0",
     "next": "^14.2.4",
     "react": "^18.2.0",
-- 
GitLab