Skip to content
Snippets Groups Projects
Unverified Commit 75e1f610 authored by Huu Le's avatar Huu Le Committed by GitHub
Browse files

fix: TypeScript templates do not create a new LlamaCloud index or upload a...

fix: TypeScript templates do not create a new LlamaCloud index or upload a file to an existing index. (#356)
parent 88220f1d
No related branches found
No related tags found
No related merge requests found
Showing
with 83 additions and 35 deletions
---
"create-llama": patch
---
Fix cannot query public document from llamacloud
---
"create-llama": patch
---
Fix typescript templates cannot upload file to llamacloud
......@@ -27,6 +27,13 @@ const userMessage =
dataSource !== "--no-files" ? "Physical standard for letters" : "Hello";
test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
const isNode18 = process.version.startsWith("v18");
const isLlamaCloud = dataSource === "--llamacloud";
// llamacloud is using File API which is not supported on node 18
if (isNode18 && isLlamaCloud) {
test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source");
}
let port: number;
let externalPort: number;
let cwd: string;
......
......@@ -16,14 +16,26 @@ export async function uploadDocument(
// trigger LlamaCloudIndex API to upload the file and run the pipeline
const projectId = await index.getProjectId();
const pipelineId = await index.getPipelineId();
return [
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], filename, { type: mimeType }),
{ private: "true" },
),
];
try {
return [
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], filename, { type: mimeType }),
{ private: "true" },
),
];
} catch (error) {
if (
error instanceof ReferenceError &&
error.message.includes("File is not defined")
) {
throw new Error(
"File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
);
}
throw error;
}
}
// run the pipeline for other vector store indexes
......
......@@ -75,7 +75,7 @@ export function createCallbackManager(stream: StreamData) {
callbackManager.on("retrieve-end", (data) => {
const { nodes, query } = data.detail;
appendSourceData(stream, nodes);
appendEventData(stream, `Retrieving context for query: '${query}'`);
appendEventData(stream, `Retrieving context for query: '${query.query}'`);
appendEventData(
stream,
`Retrieved ${nodes.length} sources to use as context for the query`,
......
# flake8: noqa: E402
import os
from dotenv import load_dotenv
load_dotenv()
from llama_cloud import PipelineType
from app.settings import init_settings
from llama_index.core.settings import Settings
import logging
from app.engine.index import get_client, get_index
import logging
from llama_index.core.readers import SimpleDirectoryReader
from app.engine.service import LLamaCloudFileService
from app.settings import init_settings
from llama_cloud import PipelineType
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.settings import Settings
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
......@@ -80,13 +78,7 @@ def generate_datasource():
f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
)
LLamaCloudFileService.add_file_to_pipeline(
project_id,
pipeline_id,
f,
custom_metadata={
# Set private=false to mark the document as public (required for filtering)
"private": "false",
},
project_id, pipeline_id, f, custom_metadata={}
)
logger.info("Finished generating the index")
......
......@@ -5,7 +5,7 @@ def generate_filters(doc_ids):
"""
Generate public/private document filters based on the doc_ids and the vector store.
"""
# Using "is_empty" filter to include the documents don't have the "private" key because they're uploaded in LlamaCloud UI
# public documents (ingested by "poetry run generate" or in the LlamaCloud UI) don't have the "private" field
public_doc_filter = MetadataFilter(
key="private",
value=None,
......
......@@ -25,6 +25,8 @@ async function* walk(dir: string): AsyncGenerator<string> {
async function loadAndIndex() {
const index = await getDataSource();
// ensure the index is available or create a new one
await index.ensureIndex();
const projectId = await index.getProjectId();
const pipelineId = await index.getPipelineId();
......@@ -32,10 +34,23 @@ async function loadAndIndex() {
for await (const filePath of walk(DATA_DIR)) {
const buffer = await fs.readFile(filePath);
const filename = path.basename(filePath);
const file = new File([buffer], filename);
await LLamaCloudFileService.addFileToPipeline(projectId, pipelineId, file, {
private: "false",
});
try {
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([buffer], filename),
);
} catch (error) {
if (
error instanceof ReferenceError &&
error.message.includes("File is not defined")
) {
throw new Error(
"File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
);
}
throw error;
}
}
console.log(`Successfully uploaded documents to LlamaCloud!`);
......
import { CloudRetrieveParams, MetadataFilter } from "llamaindex";
export function generateFilters(documentIds: string[]) {
// public documents don't have the "private" field or it's set to "false"
// public documents (ingested by "npm run generate" or in the LlamaCloud UI) don't have the "private" field
const publicDocumentsFilter: MetadataFilter = {
key: "private",
operator: "is_empty",
......
......@@ -21,7 +21,7 @@
"dotenv": "^16.3.1",
"duck-duck-scrape": "^2.2.5",
"express": "^4.18.2",
"llamaindex": "0.6.18",
"llamaindex": "0.6.19",
"pdf2json": "3.0.5",
"ajv": "^8.12.0",
"@e2b/code-interpreter": "0.0.9-beta.3",
......
import logging
import os
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from app.api.routers.models import ChatConfig
config_router = r = APIRouter()
logger = logging.getLogger("uvicorn")
......@@ -27,6 +26,10 @@ try:
@r.get("/llamacloud")
async def chat_llama_cloud_config():
if not os.getenv("LLAMA_CLOUD_API_KEY"):
raise HTTPException(
status_code=500, detail="LlamaCloud API KEY is not configured"
)
projects = LLamaCloudFileService.get_all_projects_with_pipelines()
pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME")
project = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
......
......@@ -66,7 +66,16 @@ export function LlamaCloudSelector({
useEffect(() => {
if (process.env.NEXT_PUBLIC_USE_LLAMACLOUD === "true" && !config) {
fetch(`${backend}/api/chat/config/llamacloud`)
.then((response) => response.json())
.then((response) => {
if (!response.ok) {
return response.json().then((errorData) => {
window.alert(
`Error: ${JSON.stringify(errorData) || "Unknown error occurred"}`,
);
});
}
return response.json();
})
.then((data) => {
const pipeline = defaultPipeline ?? data.pipeline; // defaultPipeline will override pipeline in .env
setConfig({ ...data, pipeline });
......
......@@ -27,7 +27,7 @@
"duck-duck-scrape": "^2.2.5",
"formdata-node": "^6.0.3",
"got": "^14.4.1",
"llamaindex": "0.6.18",
"llamaindex": "0.6.19",
"lucide-react": "^0.294.0",
"next": "^14.2.4",
"react": "^18.2.0",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment