diff --git a/.changeset/good-oranges-cover.md b/.changeset/good-oranges-cover.md new file mode 100644 index 0000000000000000000000000000000000000000..0211d78d4c16b02b55de773259489ee0c2acd934 --- /dev/null +++ b/.changeset/good-oranges-cover.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Fix error that files in sub folders of 'data' are not displayed diff --git a/templates/components/llamaindex/typescript/streaming/events.ts b/templates/components/llamaindex/typescript/streaming/events.ts index 36d6ba7a5bbb9ae45033b3ba943b57b1bd002aa8..7f094c1c684b033e90eabfb487c62b65dda329fa 100644 --- a/templates/components/llamaindex/typescript/streaming/events.ts +++ b/templates/components/llamaindex/typescript/streaming/events.ts @@ -7,6 +7,8 @@ import { ToolCall, ToolOutput, } from "llamaindex"; +import path from "node:path"; +import { DATA_DIR } from "../../engine/loader"; import { LLamaCloudFileService } from "./service"; export function appendSourceData( @@ -122,8 +124,16 @@ function getNodeUrl(metadata: Metadata) { return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`; } const isPrivate = metadata["private"] === "true"; - const folder = isPrivate ? "output/uploaded" : "data"; - return `${process.env.FILESERVER_URL_PREFIX}/${folder}/${fileName}`; + if (isPrivate) { + return `${process.env.FILESERVER_URL_PREFIX}/output/uploaded/${fileName}`; + } + const filePath = metadata["file_path"]; + const dataDir = path.resolve(DATA_DIR); + + if (filePath && dataDir) { + const relativePath = path.relative(dataDir, filePath); + return `${process.env.FILESERVER_URL_PREFIX}/data/${relativePath}`; + } } // fallback to URL in metadata (e.g. for websites) return metadata["URL"]; diff --git a/templates/components/loaders/python/file.py b/templates/components/loaders/python/file.py index 4dea4f833bd90d037bcfe7a8a65a9e8361ec9821..856d451c7cd36708f0e6b26bdd1561904120a46f 100644 --- a/templates/components/loaders/python/file.py +++ b/templates/components/loaders/python/file.py @@ -2,21 +2,16 @@ import os import logging from typing import Dict from llama_parse import LlamaParse -from pydantic import BaseModel, validator +from pydantic import BaseModel + +from app.config import DATA_DIR logger = logging.getLogger(__name__) class FileLoaderConfig(BaseModel): - data_dir: str = "data" use_llama_parse: bool = False - @validator("data_dir") - def data_dir_must_exist(cls, v): - if not os.path.isdir(v): - raise ValueError(f"Directory '{v}' does not exist") - return v - def llama_parse_parser(): if os.getenv("LLAMA_CLOUD_API_KEY") is None: @@ -54,7 +49,7 @@ def get_file_documents(config: FileLoaderConfig): file_extractor = llama_parse_extractor() reader = SimpleDirectoryReader( - config.data_dir, + DATA_DIR, recursive=True, filename_as_id=True, raise_on_error=True, diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py index 15f69bc34dd493fcbbda0c3ca4b722c2818b77db..29648608fec20fff2679857df7bd8e4927858d49 100644 --- a/templates/types/streaming/fastapi/app/api/routers/models.py +++ b/templates/types/streaming/fastapi/app/api/routers/models.py @@ -7,6 +7,8 @@ from llama_index.core.schema import NodeWithScore from pydantic import BaseModel, Field, validator from pydantic.alias_generators import to_camel +from app.config import DATA_DIR + logger = logging.getLogger("uvicorn") @@ -175,6 +177,7 @@ class SourceNodes(BaseModel): "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server" ) file_name = metadata.get("file_name") + if file_name and url_prefix: # file_name exists and file server is configured pipeline_id = metadata.get("pipeline_id") @@ -184,11 +187,17 @@ class SourceNodes(BaseModel): return f"{url_prefix}/output/llamacloud/{file_name}" is_private = metadata.get("private", "false") == "true" if is_private: + # file is a private upload return f"{url_prefix}/output/uploaded/{file_name}" - return f"{url_prefix}/data/{file_name}" - else: - # fallback to URL in metadata (e.g. for websites) - return metadata.get("URL") + # file is from calling the 'generate' script + # Get the relative path of file_path to data_dir + file_path = metadata.get("file_path") + data_dir = os.path.abspath(DATA_DIR) + if file_path and data_dir: + relative_path = os.path.relpath(file_path, data_dir) + return f"{url_prefix}/data/{relative_path}" + # fallback to URL in metadata (e.g. for websites) + return metadata.get("URL") @classmethod def from_source_nodes(cls, source_nodes: List[NodeWithScore]): diff --git a/templates/types/streaming/fastapi/app/config.py b/templates/types/streaming/fastapi/app/config.py new file mode 100644 index 0000000000000000000000000000000000000000..29fa8d9a28fa2fc5ae9502639c1452cf8ae15e4b --- /dev/null +++ b/templates/types/streaming/fastapi/app/config.py @@ -0,0 +1 @@ +DATA_DIR = "data" diff --git a/templates/types/streaming/fastapi/main.py b/templates/types/streaming/fastapi/main.py index b0be152a3f8a3f3fe5422d1f88cdec7218324e7a..12a548728372e4c636cbc2dbb5445612a6320afe 100644 --- a/templates/types/streaming/fastapi/main.py +++ b/templates/types/streaming/fastapi/main.py @@ -1,6 +1,8 @@ # flake8: noqa: E402 from dotenv import load_dotenv +from app.config import DATA_DIR + load_dotenv() import logging @@ -43,15 +45,16 @@ if environment == "dev": def mount_static_files(directory, path): if os.path.exists(directory): - for dir, _, _ in os.walk(directory): - relative_path = os.path.relpath(dir, directory) - mount_path = path if relative_path == "." else f"{path}/{relative_path}" - logger.info(f"Mounting static files '{dir}' at {mount_path}") - app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static") + logger.info(f"Mounting static files '{directory}' at '{path}'") + app.mount( + path, + StaticFiles(directory=directory, check_dir=False), + name=f"{directory}-static", + ) # Mount the data files to serve the file viewer -mount_static_files("data", "/api/files/data") +mount_static_files(DATA_DIR, "/api/files/data") # Mount the output files from tools mount_static_files("output", "/api/files/output") diff --git a/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts b/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts index d811996aa7bea235cb077dcf33ab33d74c53bffe..5bb2e06e49b78eb43e2321311835c1c2d2062c90 100644 --- a/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts +++ b/templates/types/streaming/nextjs/app/api/files/[...slug]/route.ts @@ -1,6 +1,7 @@ import { readFile } from "fs/promises"; import { NextRequest, NextResponse } from "next/server"; import path from "path"; +import { DATA_DIR } from "../../chat/engine/loader"; /** * This API is to get file data from allowed folders @@ -28,7 +29,11 @@ export async function GET( } try { - const filePath = path.join(process.cwd(), folder, path.join(...pathTofile)); + const filePath = path.join( + process.cwd(), + folder === "data" ? DATA_DIR : folder, + path.join(...pathTofile), + ); const blob = await readFile(filePath); return new NextResponse(blob, {