From 282eaa07fccf25870b2c5b357cb134ab60428bab Mon Sep 17 00:00:00 2001
From: Huu Le <39040748+leehuwuj@users.noreply.github.com>
Date: Wed, 13 Nov 2024 18:47:28 +0700
Subject: [PATCH] Fix: ts upload file does not create index and document store
 (#422)

---
 .changeset/big-turtles-own.md                      |  5 +++++
 helpers/env-variables.ts                           |  8 +++++++-
 .../llamaindex/typescript/documents/pipeline.ts    | 14 ++++++++++++--
 .../vectordbs/typescript/none/generate.ts          |  7 +++++--
 .../components/vectordbs/typescript/none/index.ts  |  7 +++++--
 .../components/vectordbs/typescript/none/shared.ts |  1 -
 6 files changed, 34 insertions(+), 8 deletions(-)
 create mode 100644 .changeset/big-turtles-own.md
 delete mode 100644 templates/components/vectordbs/typescript/none/shared.ts

diff --git a/.changeset/big-turtles-own.md b/.changeset/big-turtles-own.md
new file mode 100644
index 00000000..ed9c194b
--- /dev/null
+++ b/.changeset/big-turtles-own.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Ensure that the index and document store are created when uploading a file with no available index.
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
index 07ae88e0..4a554ff0 100644
--- a/helpers/env-variables.ts
+++ b/helpers/env-variables.ts
@@ -217,7 +217,13 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`,
         },
       ];
     default:
-      return [];
+      return [
+        {
+          name: "STORAGE_CACHE_DIR",
+          description: "The directory to store the local storage cache.",
+          value: ".cache",
+        },
+      ];
   }
 };
 
diff --git a/templates/components/llamaindex/typescript/documents/pipeline.ts b/templates/components/llamaindex/typescript/documents/pipeline.ts
index 01b52fd5..cd4d6d09 100644
--- a/templates/components/llamaindex/typescript/documents/pipeline.ts
+++ b/templates/components/llamaindex/typescript/documents/pipeline.ts
@@ -3,6 +3,7 @@ import {
   IngestionPipeline,
   Settings,
   SimpleNodeParser,
+  storageContextFromDefaults,
   VectorStoreIndex,
 } from "llamaindex";
 
@@ -28,11 +29,20 @@ export async function runPipeline(
     return documents.map((document) => document.id_);
   } else {
     // Initialize a new index with the documents
-    const newIndex = await VectorStoreIndex.fromDocuments(documents);
-    newIndex.storageContext.docStore.persist();
     console.log(
       "Got empty index, created new index with the uploaded documents",
     );
+    const persistDir = process.env.STORAGE_CACHE_DIR;
+    if (!persistDir) {
+      throw new Error("STORAGE_CACHE_DIR environment variable is required!");
+    }
+    const storageContext = await storageContextFromDefaults({
+      persistDir,
+    });
+    const newIndex = await VectorStoreIndex.fromDocuments(documents, {
+      storageContext,
+    });
+    await newIndex.storageContext.docStore.persist();
     return documents.map((document) => document.id_);
   }
 }
diff --git a/templates/components/vectordbs/typescript/none/generate.ts b/templates/components/vectordbs/typescript/none/generate.ts
index 595b27df..4647361a 100644
--- a/templates/components/vectordbs/typescript/none/generate.ts
+++ b/templates/components/vectordbs/typescript/none/generate.ts
@@ -5,7 +5,6 @@ import * as dotenv from "dotenv";
 
 import { getDocuments } from "./loader";
 import { initSettings } from "./settings";
-import { STORAGE_CACHE_DIR } from "./shared";
 
 // Load environment variables from local .env file
 dotenv.config();
@@ -20,9 +19,13 @@ async function getRuntime(func: any) {
 async function generateDatasource() {
   console.log(`Generating storage context...`);
   // Split documents, create embeddings and store them in the storage context
+  const persistDir = process.env.STORAGE_CACHE_DIR;
+  if (!persistDir) {
+    throw new Error("STORAGE_CACHE_DIR environment variable is required!");
+  }
   const ms = await getRuntime(async () => {
     const storageContext = await storageContextFromDefaults({
-      persistDir: STORAGE_CACHE_DIR,
+      persistDir,
     });
     const documents = await getDocuments();
 
diff --git a/templates/components/vectordbs/typescript/none/index.ts b/templates/components/vectordbs/typescript/none/index.ts
index fecc76f4..d38ea600 100644
--- a/templates/components/vectordbs/typescript/none/index.ts
+++ b/templates/components/vectordbs/typescript/none/index.ts
@@ -1,10 +1,13 @@
 import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
 import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
-import { STORAGE_CACHE_DIR } from "./shared";
 
 export async function getDataSource(params?: any) {
+  const persistDir = process.env.STORAGE_CACHE_DIR;
+  if (!persistDir) {
+    throw new Error("STORAGE_CACHE_DIR environment variable is required!");
+  }
   const storageContext = await storageContextFromDefaults({
-    persistDir: `${STORAGE_CACHE_DIR}`,
+    persistDir,
   });
 
   const numberOfDocs = Object.keys(
diff --git a/templates/components/vectordbs/typescript/none/shared.ts b/templates/components/vectordbs/typescript/none/shared.ts
deleted file mode 100644
index e7736e5b..00000000
--- a/templates/components/vectordbs/typescript/none/shared.ts
+++ /dev/null
@@ -1 +0,0 @@
-export const STORAGE_CACHE_DIR = "./cache";
-- 
GitLab