From bd5e39a3901d9f71e6c07fbe1237e83404087042 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Fri, 16 Aug 2024 10:57:44 +0700
Subject: [PATCH] fix: files in sub folders of 'data' are not displayed (#241)

---
 .changeset/good-oranges-cover.md                |  5 +++++
 .../llamaindex/typescript/streaming/events.ts   | 14 ++++++++++++--
 templates/components/loaders/python/file.py     | 13 ++++---------
 .../streaming/fastapi/app/api/routers/models.py | 17 +++++++++++++----
 templates/types/streaming/fastapi/app/config.py |  1 +
 templates/types/streaming/fastapi/main.py       | 15 +++++++++------
 .../nextjs/app/api/files/[...slug]/route.ts     |  7 ++++++-
 7 files changed, 50 insertions(+), 22 deletions(-)
 create mode 100644 .changeset/good-oranges-cover.md
 create mode 100644 templates/types/streaming/fastapi/app/config.py

diff --git a/.changeset/good-oranges-cover.md b/.changeset/good-oranges-cover.md
new file mode 100644
index 00000000..0211d78d
--- /dev/null
+++ b/.changeset/good-oranges-cover.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Fix error that files in sub folders of 'data' are not displayed
diff --git a/templates/components/llamaindex/typescript/streaming/events.ts b/templates/components/llamaindex/typescript/streaming/events.ts
index 36d6ba7a..7f094c1c 100644
--- a/templates/components/llamaindex/typescript/streaming/events.ts
+++ b/templates/components/llamaindex/typescript/streaming/events.ts
@@ -7,6 +7,8 @@ import {
   ToolCall,
   ToolOutput,
 } from "llamaindex";
+import path from "node:path";
+import { DATA_DIR } from "../../engine/loader";
 import { LLamaCloudFileService } from "./service";
 
 export function appendSourceData(
@@ -122,8 +124,16 @@ function getNodeUrl(metadata: Metadata) {
       return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`;
     }
     const isPrivate = metadata["private"] === "true";
-    const folder = isPrivate ? "output/uploaded" : "data";
-    return `${process.env.FILESERVER_URL_PREFIX}/${folder}/${fileName}`;
+    if (isPrivate) {
+      return `${process.env.FILESERVER_URL_PREFIX}/output/uploaded/${fileName}`;
+    }
+    const filePath = metadata["file_path"];
+    const dataDir = path.resolve(DATA_DIR);
+
+    if (filePath && dataDir) {
+      const relativePath = path.relative(dataDir, filePath);
+      return `${process.env.FILESERVER_URL_PREFIX}/data/${relativePath}`;
+    }
   }
   // fallback to URL in metadata (e.g. for websites)
   return metadata["URL"];
diff --git a/templates/components/loaders/python/file.py b/templates/components/loaders/python/file.py
index 4dea4f83..856d451c 100644
--- a/templates/components/loaders/python/file.py
+++ b/templates/components/loaders/python/file.py
@@ -2,21 +2,16 @@ import os
 import logging
 from typing import Dict
 from llama_parse import LlamaParse
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
+
+from app.config import DATA_DIR
 
 logger = logging.getLogger(__name__)
 
 
 class FileLoaderConfig(BaseModel):
-    data_dir: str = "data"
     use_llama_parse: bool = False
 
-    @validator("data_dir")
-    def data_dir_must_exist(cls, v):
-        if not os.path.isdir(v):
-            raise ValueError(f"Directory '{v}' does not exist")
-        return v
-
 
 def llama_parse_parser():
     if os.getenv("LLAMA_CLOUD_API_KEY") is None:
@@ -54,7 +49,7 @@ def get_file_documents(config: FileLoaderConfig):
 
             file_extractor = llama_parse_extractor()
         reader = SimpleDirectoryReader(
-            config.data_dir,
+            DATA_DIR,
             recursive=True,
             filename_as_id=True,
             raise_on_error=True,
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 15f69bc3..29648608 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -7,6 +7,8 @@ from llama_index.core.schema import NodeWithScore
 from pydantic import BaseModel, Field, validator
 from pydantic.alias_generators import to_camel
 
+from app.config import DATA_DIR
+
 logger = logging.getLogger("uvicorn")
 
 
@@ -175,6 +177,7 @@ class SourceNodes(BaseModel):
                 "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server"
             )
         file_name = metadata.get("file_name")
+
         if file_name and url_prefix:
             # file_name exists and file server is configured
             pipeline_id = metadata.get("pipeline_id")
@@ -184,11 +187,17 @@ class SourceNodes(BaseModel):
                 return f"{url_prefix}/output/llamacloud/{file_name}"
             is_private = metadata.get("private", "false") == "true"
             if is_private:
+                # file is a private upload
                 return f"{url_prefix}/output/uploaded/{file_name}"
-            return f"{url_prefix}/data/{file_name}"
-        else:
-            # fallback to URL in metadata (e.g. for websites)
-            return metadata.get("URL")
+            # file is from calling the 'generate' script
+            # Get the relative path of file_path to data_dir
+            file_path = metadata.get("file_path")
+            data_dir = os.path.abspath(DATA_DIR)
+            if file_path and data_dir:
+                relative_path = os.path.relpath(file_path, data_dir)
+                return f"{url_prefix}/data/{relative_path}"
+        # fallback to URL in metadata (e.g. for websites)
+        return metadata.get("URL")
 
     @classmethod
     def from_source_nodes(cls, source_nodes: List[NodeWithScore]):
diff --git a/templates/types/streaming/fastapi/app/config.py b/templates/types/streaming/fastapi/app/config.py
new file mode 100644
index 00000000..29fa8d9a
--- /dev/null
+++ b/templates/types/streaming/fastapi/app/config.py
@@ -0,0 +1 @@
+DATA_DIR = "data"
diff --git a/templates/types/streaming/fastapi/main.py b/templates/types/streaming/fastapi/main.py
index b0be152a..12a54872 100644
--- a/templates/types/streaming/fastapi/main.py
+++ b/templates/types/streaming/fastapi/main.py
@@ -1,6 +1,8 @@
 # flake8: noqa: E402
 from dotenv import load_dotenv
 
+from app.config import DATA_DIR
+
 load_dotenv()
 
 import logging
@@ -43,15 +45,16 @@ if environment == "dev":
 
 def mount_static_files(directory, path):
     if os.path.exists(directory):
-        for dir, _, _ in os.walk(directory):
-            relative_path = os.path.relpath(dir, directory)
-            mount_path = path if relative_path == "." else f"{path}/{relative_path}"
-            logger.info(f"Mounting static files '{dir}' at {mount_path}")
-            app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static")
+        logger.info(f"Mounting static files '{directory}' at '{path}'")
+        app.mount(
+            path,
+            StaticFiles(directory=directory, check_dir=False),
+            name=f"{directory}-static",
+        )
 
 
 # Mount the data files to serve the file viewer
-mount_static_files("data", "/api/files/data")
+mount_static_files(DATA_DIR, "/api/files/data")
 # Mount the output files from tools
 mount_static_files("output", "/api/files/output")
 
diff --git a/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts b/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts
index d811996a..5bb2e06e 100644
--- a/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts
+++ b/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts
@@ -1,6 +1,7 @@
 import { readFile } from "fs/promises";
 import { NextRequest, NextResponse } from "next/server";
 import path from "path";
+import { DATA_DIR } from "../../chat/engine/loader";
 
 /**
  * This API is to get file data from allowed folders
@@ -28,7 +29,11 @@ export async function GET(
   }
 
   try {
-    const filePath = path.join(process.cwd(), folder, path.join(...pathTofile));
+    const filePath = path.join(
+      process.cwd(),
+      folder === "data" ? DATA_DIR : folder,
+      path.join(...pathTofile),
+    );
     const blob = await readFile(filePath);
 
     return new NextResponse(blob, {
-- 
GitLab