Skip to content
Snippets Groups Projects
Unverified Commit bd5e39a3 authored by Marcus Schiesser's avatar Marcus Schiesser Committed by GitHub
Browse files

fix: files in sub folders of 'data' are not displayed (#241)

parent de2c7523
No related branches found
No related tags found
No related merge requests found
---
"create-llama": patch
---
Fix error that files in sub folders of 'data' are not displayed
...@@ -7,6 +7,8 @@ import { ...@@ -7,6 +7,8 @@ import {
ToolCall, ToolCall,
ToolOutput, ToolOutput,
} from "llamaindex"; } from "llamaindex";
import path from "node:path";
import { DATA_DIR } from "../../engine/loader";
import { LLamaCloudFileService } from "./service"; import { LLamaCloudFileService } from "./service";
export function appendSourceData( export function appendSourceData(
...@@ -122,8 +124,16 @@ function getNodeUrl(metadata: Metadata) { ...@@ -122,8 +124,16 @@ function getNodeUrl(metadata: Metadata) {
return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`; return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`;
} }
const isPrivate = metadata["private"] === "true"; const isPrivate = metadata["private"] === "true";
const folder = isPrivate ? "output/uploaded" : "data"; if (isPrivate) {
return `${process.env.FILESERVER_URL_PREFIX}/${folder}/${fileName}`; return `${process.env.FILESERVER_URL_PREFIX}/output/uploaded/${fileName}`;
}
const filePath = metadata["file_path"];
const dataDir = path.resolve(DATA_DIR);
if (filePath && dataDir) {
const relativePath = path.relative(dataDir, filePath);
return `${process.env.FILESERVER_URL_PREFIX}/data/${relativePath}`;
}
} }
// fallback to URL in metadata (e.g. for websites) // fallback to URL in metadata (e.g. for websites)
return metadata["URL"]; return metadata["URL"];
......
...@@ -2,21 +2,16 @@ import os ...@@ -2,21 +2,16 @@ import os
import logging import logging
from typing import Dict from typing import Dict
from llama_parse import LlamaParse from llama_parse import LlamaParse
from pydantic import BaseModel, validator from pydantic import BaseModel
from app.config import DATA_DIR
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class FileLoaderConfig(BaseModel): class FileLoaderConfig(BaseModel):
data_dir: str = "data"
use_llama_parse: bool = False use_llama_parse: bool = False
@validator("data_dir")
def data_dir_must_exist(cls, v):
if not os.path.isdir(v):
raise ValueError(f"Directory '{v}' does not exist")
return v
def llama_parse_parser(): def llama_parse_parser():
if os.getenv("LLAMA_CLOUD_API_KEY") is None: if os.getenv("LLAMA_CLOUD_API_KEY") is None:
...@@ -54,7 +49,7 @@ def get_file_documents(config: FileLoaderConfig): ...@@ -54,7 +49,7 @@ def get_file_documents(config: FileLoaderConfig):
file_extractor = llama_parse_extractor() file_extractor = llama_parse_extractor()
reader = SimpleDirectoryReader( reader = SimpleDirectoryReader(
config.data_dir, DATA_DIR,
recursive=True, recursive=True,
filename_as_id=True, filename_as_id=True,
raise_on_error=True, raise_on_error=True,
......
...@@ -7,6 +7,8 @@ from llama_index.core.schema import NodeWithScore ...@@ -7,6 +7,8 @@ from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel, Field, validator from pydantic import BaseModel, Field, validator
from pydantic.alias_generators import to_camel from pydantic.alias_generators import to_camel
from app.config import DATA_DIR
logger = logging.getLogger("uvicorn") logger = logging.getLogger("uvicorn")
...@@ -175,6 +177,7 @@ class SourceNodes(BaseModel): ...@@ -175,6 +177,7 @@ class SourceNodes(BaseModel):
"Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server" "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server"
) )
file_name = metadata.get("file_name") file_name = metadata.get("file_name")
if file_name and url_prefix: if file_name and url_prefix:
# file_name exists and file server is configured # file_name exists and file server is configured
pipeline_id = metadata.get("pipeline_id") pipeline_id = metadata.get("pipeline_id")
...@@ -184,11 +187,17 @@ class SourceNodes(BaseModel): ...@@ -184,11 +187,17 @@ class SourceNodes(BaseModel):
return f"{url_prefix}/output/llamacloud/{file_name}" return f"{url_prefix}/output/llamacloud/{file_name}"
is_private = metadata.get("private", "false") == "true" is_private = metadata.get("private", "false") == "true"
if is_private: if is_private:
# file is a private upload
return f"{url_prefix}/output/uploaded/{file_name}" return f"{url_prefix}/output/uploaded/{file_name}"
return f"{url_prefix}/data/{file_name}" # file is from calling the 'generate' script
else: # Get the relative path of file_path to data_dir
# fallback to URL in metadata (e.g. for websites) file_path = metadata.get("file_path")
return metadata.get("URL") data_dir = os.path.abspath(DATA_DIR)
if file_path and data_dir:
relative_path = os.path.relpath(file_path, data_dir)
return f"{url_prefix}/data/{relative_path}"
# fallback to URL in metadata (e.g. for websites)
return metadata.get("URL")
@classmethod @classmethod
def from_source_nodes(cls, source_nodes: List[NodeWithScore]): def from_source_nodes(cls, source_nodes: List[NodeWithScore]):
......
DATA_DIR = "data"
# flake8: noqa: E402 # flake8: noqa: E402
from dotenv import load_dotenv from dotenv import load_dotenv
from app.config import DATA_DIR
load_dotenv() load_dotenv()
import logging import logging
...@@ -43,15 +45,16 @@ if environment == "dev": ...@@ -43,15 +45,16 @@ if environment == "dev":
def mount_static_files(directory, path): def mount_static_files(directory, path):
if os.path.exists(directory): if os.path.exists(directory):
for dir, _, _ in os.walk(directory): logger.info(f"Mounting static files '{directory}' at '{path}'")
relative_path = os.path.relpath(dir, directory) app.mount(
mount_path = path if relative_path == "." else f"{path}/{relative_path}" path,
logger.info(f"Mounting static files '{dir}' at {mount_path}") StaticFiles(directory=directory, check_dir=False),
app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static") name=f"{directory}-static",
)
# Mount the data files to serve the file viewer # Mount the data files to serve the file viewer
mount_static_files("data", "/api/files/data") mount_static_files(DATA_DIR, "/api/files/data")
# Mount the output files from tools # Mount the output files from tools
mount_static_files("output", "/api/files/output") mount_static_files("output", "/api/files/output")
......
import { readFile } from "fs/promises"; import { readFile } from "fs/promises";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import path from "path"; import path from "path";
import { DATA_DIR } from "../../chat/engine/loader";
/** /**
* This API is to get file data from allowed folders * This API is to get file data from allowed folders
...@@ -28,7 +29,11 @@ export async function GET( ...@@ -28,7 +29,11 @@ export async function GET(
} }
try { try {
const filePath = path.join(process.cwd(), folder, path.join(...pathTofile)); const filePath = path.join(
process.cwd(),
folder === "data" ? DATA_DIR : folder,
path.join(...pathTofile),
);
const blob = await readFile(filePath); const blob = await readFile(filePath);
return new NextResponse(blob, { return new NextResponse(blob, {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment