diff --git a/.changeset/big-turtles-own.md b/.changeset/big-turtles-own.md new file mode 100644 index 0000000000000000000000000000000000000000..ed9c194b41593200237377ffb05e32f08f227df7 --- /dev/null +++ b/.changeset/big-turtles-own.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Ensure that the index and document store are created when uploading a file with no available index. diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts index 07ae88e02a08f6edc0dc9aca1c2d0fb76511ef12..4a554ff047c8a7dd7dbac9e4dac35a75dea57771 100644 --- a/helpers/env-variables.ts +++ b/helpers/env-variables.ts @@ -217,7 +217,13 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`, }, ]; default: - return []; + return [ + { + name: "STORAGE_CACHE_DIR", + description: "The directory to store the local storage cache.", + value: ".cache", + }, + ]; } }; diff --git a/templates/components/llamaindex/typescript/documents/pipeline.ts b/templates/components/llamaindex/typescript/documents/pipeline.ts index 01b52fd5d732d98a6569bcf418d624bd3cbe40f1..cd4d6d092ba4c65151a57be6d86cf264b4c77e34 100644 --- a/templates/components/llamaindex/typescript/documents/pipeline.ts +++ b/templates/components/llamaindex/typescript/documents/pipeline.ts @@ -3,6 +3,7 @@ import { IngestionPipeline, Settings, SimpleNodeParser, + storageContextFromDefaults, VectorStoreIndex, } from "llamaindex"; @@ -28,11 +29,20 @@ export async function runPipeline( return documents.map((document) => document.id_); } else { // Initialize a new index with the documents - const newIndex = await VectorStoreIndex.fromDocuments(documents); - newIndex.storageContext.docStore.persist(); console.log( "Got empty index, created new index with the uploaded documents", ); + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } + const storageContext = await storageContextFromDefaults({ + persistDir, + }); + const newIndex = await VectorStoreIndex.fromDocuments(documents, { + storageContext, + }); + await newIndex.storageContext.docStore.persist(); return documents.map((document) => document.id_); } } diff --git a/templates/components/vectordbs/typescript/none/generate.ts b/templates/components/vectordbs/typescript/none/generate.ts index 595b27df136e5157b725755922f805e8ee516994..4647361a3a576b810948c6de172c4dbffb5860a0 100644 --- a/templates/components/vectordbs/typescript/none/generate.ts +++ b/templates/components/vectordbs/typescript/none/generate.ts @@ -5,7 +5,6 @@ import * as dotenv from "dotenv"; import { getDocuments } from "./loader"; import { initSettings } from "./settings"; -import { STORAGE_CACHE_DIR } from "./shared"; // Load environment variables from local .env file dotenv.config(); @@ -20,9 +19,13 @@ async function getRuntime(func: any) { async function generateDatasource() { console.log(`Generating storage context...`); // Split documents, create embeddings and store them in the storage context + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const ms = await getRuntime(async () => { const storageContext = await storageContextFromDefaults({ - persistDir: STORAGE_CACHE_DIR, + persistDir, }); const documents = await getDocuments(); diff --git a/templates/components/vectordbs/typescript/none/index.ts b/templates/components/vectordbs/typescript/none/index.ts index fecc76f45637cd22514b0a791f4fdd6be07200c6..d38ea6001ad451961e37ebd7c5c39a6447f07ac6 100644 --- a/templates/components/vectordbs/typescript/none/index.ts +++ b/templates/components/vectordbs/typescript/none/index.ts @@ -1,10 +1,13 @@ import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex"; import { storageContextFromDefaults } from "llamaindex/storage/StorageContext"; -import { STORAGE_CACHE_DIR } from "./shared"; export async function getDataSource(params?: any) { + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const storageContext = await storageContextFromDefaults({ - persistDir: `${STORAGE_CACHE_DIR}`, + persistDir, }); const numberOfDocs = Object.keys( diff --git a/templates/components/vectordbs/typescript/none/shared.ts b/templates/components/vectordbs/typescript/none/shared.ts deleted file mode 100644 index e7736e5b3f830112d7619a5fba199ac9479ea92d..0000000000000000000000000000000000000000 --- a/templates/components/vectordbs/typescript/none/shared.ts +++ /dev/null @@ -1 +0,0 @@ -export const STORAGE_CACHE_DIR = "./cache";