From 459824dbec07469f3c50574edbca6c6ac5ce31d4 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Wed, 27 Mar 2024 15:06:14 +0800
Subject: [PATCH] fix: remove redundant template engine variable and fixed
 llamaparse flag (#22)

---
 create-app.ts          |  2 --
 e2e/basic.spec.ts      |  9 +++--
 e2e/utils.ts           | 13 +++----
 helpers/datasources.ts | 27 +++++++++++++++
 helpers/index.ts       |  2 +-
 helpers/python.ts      | 78 ++++++++++++++++++++----------------------
 helpers/types.ts       |  2 --
 helpers/typescript.ts  | 70 +++++++++++++++++--------------------
 index.ts               | 25 ++++++++------
 package.json           |  1 +
 questions.ts           | 67 ++++++------------------------------
 scripts/build.sh       |  2 +-
 12 files changed, 134 insertions(+), 164 deletions(-)
 create mode 100644 helpers/datasources.ts

diff --git a/create-app.ts b/create-app.ts
index 8683ee9a..35ecafe9 100644
--- a/create-app.ts
+++ b/create-app.ts
@@ -26,7 +26,6 @@ export type InstallAppArgs = Omit<
 export async function createApp({
   template,
   framework,
-  engine,
   ui,
   appPath,
   packageManager,
@@ -76,7 +75,6 @@ export async function createApp({
     root,
     template,
     framework,
-    engine,
     ui,
     packageManager,
     isOnline,
diff --git a/e2e/basic.spec.ts b/e2e/basic.spec.ts
index f2d0d6ac..454e552a 100644
--- a/e2e/basic.spec.ts
+++ b/e2e/basic.spec.ts
@@ -4,7 +4,6 @@ import { ChildProcess } from "child_process";
 import fs from "fs";
 import path from "path";
 import type {
-  TemplateEngine,
   TemplateFramework,
   TemplatePostInstallAction,
   TemplateType,
@@ -18,7 +17,7 @@ const templateFrameworks: TemplateFramework[] = [
   "express",
   "fastapi",
 ];
-const templateEngines: TemplateEngine[] = ["simple", "context"];
+const dataSources: string[] = ["--no-files", "--example-file"];
 const templateUIs: TemplateUI[] = ["shadcn", "html"];
 const templatePostInstallActions: TemplatePostInstallAction[] = [
   "none",
@@ -27,12 +26,12 @@ const templatePostInstallActions: TemplatePostInstallAction[] = [
 
 for (const templateType of templateTypes) {
   for (const templateFramework of templateFrameworks) {
-    for (const templateEngine of templateEngines) {
+    for (const dataSource of dataSources) {
       for (const templateUI of templateUIs) {
         for (const templatePostInstallAction of templatePostInstallActions) {
           const appType: AppType =
             templateFramework === "nextjs" ? "" : "--frontend";
-          test.describe(`try create-llama ${templateType} ${templateFramework} ${templateEngine} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
+          test.describe(`try create-llama ${templateType} ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
             let port: number;
             let externalPort: number;
             let cwd: string;
@@ -49,7 +48,7 @@ for (const templateType of templateTypes) {
                 cwd,
                 templateType,
                 templateFramework,
-                templateEngine,
+                dataSource,
                 templateUI,
                 vectorDb,
                 appType,
diff --git a/e2e/utils.ts b/e2e/utils.ts
index 54514d26..e4f18497 100644
--- a/e2e/utils.ts
+++ b/e2e/utils.ts
@@ -4,7 +4,6 @@ import { mkdir } from "node:fs/promises";
 import * as path from "path";
 import waitPort from "wait-port";
 import {
-  TemplateEngine,
   TemplateFramework,
   TemplatePostInstallAction,
   TemplateType,
@@ -67,7 +66,7 @@ export async function runCreateLlama(
   cwd: string,
   templateType: TemplateType,
   templateFramework: TemplateFramework,
-  templateEngine: TemplateEngine,
+  dataSource: string,
   templateUI: TemplateUI,
   vectorDb: TemplateVectorDB,
   appType: AppType,
@@ -75,10 +74,13 @@ export async function runCreateLlama(
   externalPort: number,
   postInstallAction: TemplatePostInstallAction,
 ): Promise<CreateLlamaResult> {
+  if (!process.env.OPENAI_API_KEY) {
+    throw new Error("Setting OPENAI_API_KEY is mandatory to run tests");
+  }
   const name = [
     templateType,
     templateFramework,
-    templateEngine,
+    dataSource,
     templateUI,
     appType,
   ].join("-");
@@ -89,8 +91,7 @@ export async function runCreateLlama(
     templateType,
     "--framework",
     templateFramework,
-    "--engine",
-    templateEngine,
+    dataSource,
     "--ui",
     templateUI,
     "--vector-db",
@@ -100,7 +101,7 @@ export async function runCreateLlama(
     "--embedding-model",
     EMBEDDING_MODEL,
     "--open-ai-key",
-    process.env.OPENAI_API_KEY || "testKey",
+    process.env.OPENAI_API_KEY,
     appType,
     "--eslint",
     "--use-pnpm",
diff --git a/helpers/datasources.ts b/helpers/datasources.ts
new file mode 100644
index 00000000..2735eb8d
--- /dev/null
+++ b/helpers/datasources.ts
@@ -0,0 +1,27 @@
+import { TemplateDataSource } from "./types";
+
+// Example file has an empty config
+export const EXAMPLE_FILE: TemplateDataSource = {
+  type: "file",
+  config: {},
+};
+
+export function getDataSources(
+  files?: string,
+  exampleFile?: boolean,
+): TemplateDataSource[] | undefined {
+  let dataSources: TemplateDataSource[] | undefined = undefined;
+  if (files) {
+    // If user specified files option, then the program should use context engine
+    dataSources = files.split(",").map((filePath) => ({
+      type: "file",
+      config: {
+        path: filePath,
+      },
+    }));
+  }
+  if (exampleFile) {
+    dataSources = [...(dataSources ? dataSources : []), EXAMPLE_FILE];
+  }
+  return dataSources;
+}
diff --git a/helpers/index.ts b/helpers/index.ts
index de28a18f..b3ba62b7 100644
--- a/helpers/index.ts
+++ b/helpers/index.ts
@@ -137,7 +137,7 @@ export const installTemplate = async (
       port: props.externalPort,
     });
 
-    if (props.engine === "context") {
+    if (props.dataSources.length > 0) {
       console.log("\nGenerating context data...\n");
       await copyContextData(props.root, props.dataSources);
       if (
diff --git a/helpers/python.ts b/helpers/python.ts
index b4427c24..90c5c140 100644
--- a/helpers/python.ts
+++ b/helpers/python.ts
@@ -177,7 +177,6 @@ export const installPythonTemplate = async ({
   root,
   template,
   framework,
-  engine,
   vectorDb,
   dataSources,
   tools,
@@ -188,7 +187,6 @@ export const installPythonTemplate = async ({
   | "root"
   | "framework"
   | "template"
-  | "engine"
   | "vectorDb"
   | "dataSources"
   | "tools"
@@ -217,7 +215,7 @@ export const installPythonTemplate = async ({
     },
   });
 
-  if (engine === "context") {
+  if (dataSources.length > 0) {
     const enginePath = path.join(root, "app", "engine");
     const compPath = path.join(templatesDir, "components");
 
@@ -257,46 +255,44 @@ export const installPythonTemplate = async ({
       });
     }
 
-    if (dataSources.length > 0) {
-      const loaderConfigs: Record<string, any> = {};
-      const loaderPath = path.join(enginePath, "loaders");
+    const loaderConfigs: Record<string, any> = {};
+    const loaderPath = path.join(enginePath, "loaders");
 
-      // Copy loaders to enginePath
-      await copy("**", loaderPath, {
-        parents: true,
-        cwd: path.join(compPath, "loaders", "python"),
-      });
+    // Copy loaders to enginePath
+    await copy("**", loaderPath, {
+      parents: true,
+      cwd: path.join(compPath, "loaders", "python"),
+    });
 
-      // Generate loaders config
-      // Web loader config
-      if (dataSources.some((ds) => ds.type === "web")) {
-        const webLoaderConfig = dataSources
-          .filter((ds) => ds.type === "web")
-          .map((ds) => {
-            const dsConfig = ds.config as WebSourceConfig;
-            return {
-              base_url: dsConfig.baseUrl,
-              prefix: dsConfig.prefix,
-              depth: dsConfig.depth,
-            };
-          });
-        loaderConfigs["web"] = webLoaderConfig;
-      }
-      // File loader config
-      if (dataSources.some((ds) => ds.type === "file")) {
-        loaderConfigs["file"] = {
-          use_llama_parse: useLlamaParse,
-        };
-      }
-      // Write loaders config
-      if (Object.keys(loaderConfigs).length > 0) {
-        const loaderConfigPath = path.join(root, "config/loaders.json");
-        await fs.mkdir(path.join(root, "config"), { recursive: true });
-        await fs.writeFile(
-          loaderConfigPath,
-          JSON.stringify(loaderConfigs, null, 2),
-        );
-      }
+    // Generate loaders config
+    // Web loader config
+    if (dataSources.some((ds) => ds.type === "web")) {
+      const webLoaderConfig = dataSources
+        .filter((ds) => ds.type === "web")
+        .map((ds) => {
+          const dsConfig = ds.config as WebSourceConfig;
+          return {
+            base_url: dsConfig.baseUrl,
+            prefix: dsConfig.prefix,
+            depth: dsConfig.depth,
+          };
+        });
+      loaderConfigs["web"] = webLoaderConfig;
+    }
+    // File loader config
+    if (dataSources.some((ds) => ds.type === "file")) {
+      loaderConfigs["file"] = {
+        use_llama_parse: useLlamaParse,
+      };
+    }
+    // Write loaders config
+    if (Object.keys(loaderConfigs).length > 0) {
+      const loaderConfigPath = path.join(root, "config/loaders.json");
+      await fs.mkdir(path.join(root, "config"), { recursive: true });
+      await fs.writeFile(
+        loaderConfigPath,
+        JSON.stringify(loaderConfigs, null, 2),
+      );
     }
   }
 
diff --git a/helpers/types.ts b/helpers/types.ts
index 0bed9a6d..c1276602 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -3,7 +3,6 @@ import { Tool } from "./tools";
 
 export type TemplateType = "streaming" | "community" | "llamapack";
 export type TemplateFramework = "nextjs" | "express" | "fastapi";
-export type TemplateEngine = "simple" | "context";
 export type TemplateUI = "html" | "shadcn";
 export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone" | "milvus";
 export type TemplatePostInstallAction =
@@ -43,7 +42,6 @@ export interface InstallTemplateArgs {
   isOnline: boolean;
   template: TemplateType;
   framework: TemplateFramework;
-  engine: TemplateEngine;
   ui: TemplateUI;
   dataSources: TemplateDataSource[];
   eslint: boolean;
diff --git a/helpers/typescript.ts b/helpers/typescript.ts
index 4aefb531..f81aaed5 100644
--- a/helpers/typescript.ts
+++ b/helpers/typescript.ts
@@ -56,7 +56,6 @@ export const installTSTemplate = async ({
   isOnline,
   template,
   framework,
-  engine,
   ui,
   eslint,
   customApiPath,
@@ -142,50 +141,41 @@ export const installTSTemplate = async ({
   /**
    * Copy the selected chat engine files to the target directory and reference it.
    */
-  let relativeEngineDestPath;
   const compPath = path.join(templatesDir, "components");
-  if (engine && (framework === "express" || framework === "nextjs")) {
-    console.log("\nUsing chat engine:", engine, "\n");
+  const relativeEngineDestPath =
+    framework === "nextjs"
+      ? path.join("app", "api", "chat")
+      : path.join("src", "controllers");
+  const enginePath = path.join(root, relativeEngineDestPath, "engine");
 
-    let vectorDBFolder: string = engine;
-
-    if (engine !== "simple" && vectorDb) {
+  if (dataSources.length === 0) {
+    // use simple hat engine if user neither select tools nor a data source
+    console.log("\nUsing simple chat engine\n");
+  } else {
+    if (vectorDb) {
+      // copy vector db component
       console.log("\nUsing vector DB:", vectorDb, "\n");
-      vectorDBFolder = vectorDb;
-    }
-
-    relativeEngineDestPath =
-      framework === "nextjs"
-        ? path.join("app", "api", "chat")
-        : path.join("src", "controllers");
-
-    const enginePath = path.join(root, relativeEngineDestPath, "engine");
-
-    // copy vector db component
-    const vectorDBPath = path.join(
-      compPath,
-      "vectordbs",
-      "typescript",
-      vectorDBFolder,
-    );
-    await copy("**", enginePath, {
-      parents: true,
-      cwd: vectorDBPath,
-    });
-
-    // copy loader component
-    const dataSourceType = dataSources[0]?.type;
-    if (dataSourceType) {
-      let loaderFolder: string;
-      loaderFolder = useLlamaParse ? "llama_parse" : dataSourceType;
+      const vectorDBPath = path.join(
+        compPath,
+        "vectordbs",
+        "typescript",
+        vectorDb,
+      );
       await copy("**", enginePath, {
         parents: true,
-        cwd: path.join(compPath, "loaders", "typescript", loaderFolder),
+        cwd: vectorDBPath,
       });
     }
-
-    // copy tools component
+    // copy loader component (TS only supports llama_parse and file for now)
+    let loaderFolder: string;
+    loaderFolder = useLlamaParse ? "llama_parse" : "file";
+    await copy("**", enginePath, {
+      parents: true,
+      cwd: path.join(compPath, "loaders", "typescript", loaderFolder),
+    });
     if (tools?.length) {
+      // use agent chat engine if user selects tools
+      console.log("\nUsing agent chat engine\n");
       await copy("**", enginePath, {
         parents: true,
         cwd: path.join(compPath, "engines", "typescript", "agent"),
@@ -201,7 +191,9 @@ export const installTSTemplate = async ({
         configFilePath,
         JSON.stringify(configContent, null, 2),
       );
-    } else if (engine !== "simple") {
+    } else {
+      // use context chat engine if user does not select tools
+      console.log("\nUsing context chat engine\n");
       await copy("**", enginePath, {
         parents: true,
         cwd: path.join(compPath, "engines", "typescript", "chat"),
@@ -248,7 +240,7 @@ export const installTSTemplate = async ({
     // modify the dev script to use the custom api path
   }
 
-  if (engine === "context" && relativeEngineDestPath) {
+  if (dataSources.length > 0 && relativeEngineDestPath) {
     // add generate script if using context engine
     packageJson.scripts = {
       ...packageJson.scripts,
diff --git a/index.ts b/index.ts
index 2bd8cf3c..b6c11c71 100644
--- a/index.ts
+++ b/index.ts
@@ -9,6 +9,7 @@ import prompts from "prompts";
 import terminalLink from "terminal-link";
 import checkForUpdate from "update-check";
 import { createApp } from "./create-app";
+import { getDataSources } from "./helpers/datasources";
 import { getPkgManager } from "./helpers/get-pkg-manager";
 import { isFolderEmpty } from "./helpers/is-folder-empty";
 import { runApp } from "./helpers/run-app";
@@ -71,13 +72,6 @@ const program = new Commander.Command(packageJson.name)
     `
 
   Select a template to bootstrap the application with.
-`,
-  )
-  .option(
-    "--engine <engine>",
-    `
-
-  Select a chat engine to bootstrap the application with.
 `,
   )
   .option(
@@ -92,6 +86,13 @@ const program = new Commander.Command(packageJson.name)
     `
   
     Specify the path to a local file or folder for chatting.
+`,
+  )
+  .option(
+    "--example-file",
+    `
+
+  Select to use an example PDF as data source.
 `,
   )
   .option(
@@ -164,7 +165,7 @@ const program = new Commander.Command(packageJson.name)
 `,
   )
   .option(
-    "--llama-parse",
+    "--use-llama-parse",
     `
     Enable LlamaParse.
 `,
@@ -199,7 +200,12 @@ if (process.argv.includes("--tools")) {
   }
 }
 if (process.argv.includes("--no-llama-parse")) {
-  program.llamaParse = false;
+  program.useLlamaParse = false;
+}
+if (process.argv.includes("--no-files")) {
+  program.dataSources = [];
+} else {
+  program.dataSources = getDataSources(program.files, program.exampleFile);
 }
 
 const packageManager = !!program.useNpm
@@ -287,7 +293,6 @@ async function run(): Promise<void> {
   await createApp({
     template: program.template,
     framework: program.framework,
-    engine: program.engine,
     ui: program.ui,
     appPath: resolvedProjectPath,
     packageManager,
diff --git a/package.json b/package.json
index 92bc5bb4..e4a93ffc 100644
--- a/package.json
+++ b/package.json
@@ -25,6 +25,7 @@
     "format:write": "prettier --ignore-unknown --write .",
     "dev": "ncc build ./index.ts -w -o dist/",
     "build": "bash ./scripts/build.sh",
+    "build:ncc": "pnpm run clean && ncc build ./index.ts -o ./dist/ --minify --no-cache --no-source-map-register",
     "lint": "eslint . --ignore-pattern dist --ignore-pattern e2e/cache",
     "e2e": "playwright test",
     "prepare": "husky",
diff --git a/questions.ts b/questions.ts
index 6cec0a6e..73b92a45 100644
--- a/questions.ts
+++ b/questions.ts
@@ -13,6 +13,7 @@ import {
   TemplateFramework,
 } from "./helpers";
 import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
+import { EXAMPLE_FILE } from "./helpers/datasources";
 import { templatesDir } from "./helpers/dir";
 import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
 import { getProjectOptions } from "./helpers/repo";
@@ -24,7 +25,6 @@ export type QuestionArgs = Omit<
   InstallAppArgs,
   "appPath" | "packageManager"
 > & {
-  files?: string;
   listServerModels?: boolean;
 };
 const supportedContextFileTypes = [
@@ -70,7 +70,6 @@ if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
 const defaults: QuestionArgs = {
   template: "streaming",
   framework: "nextjs",
-  engine: "simple",
   ui: "html",
   eslint: true,
   frontend: false,
@@ -621,25 +620,12 @@ export const askQuestions = async (
     }
   }
 
-  if (program.files) {
-    // If user specified files option, then the program should use context engine
-    program.engine = "context";
-    program.files.split(",").forEach((filePath) => {
-      program.dataSources.push({
-        type: "file",
-        config: {
-          path: filePath,
-        },
-      });
-    });
-  }
-
-  if (!program.engine) {
+  if (!program.dataSources) {
     if (ciInfo.isCI) {
-      program.engine = getPrefOrDefault("engine");
       program.dataSources = getPrefOrDefault("dataSources");
     } else {
       program.dataSources = [];
+      // continue asking user for data sources if none are initially provided
       while (true) {
         const { selectedSource } = await prompts(
           {
@@ -658,22 +644,12 @@ export const askQuestions = async (
           handlers,
         );
 
-        if (selectedSource === "no") {
+        if (selectedSource === "no" || selectedSource === "none") {
+          // user doesn't want another data source or any data source
           break;
         }
-
-        if (selectedSource === "none") {
-          // Selected simple chat
-          program.dataSources = [];
-          // Stop asking for another data source
-          break;
-        }
-
         if (selectedSource === "exampleFile") {
-          program.dataSources.push({
-            type: "file",
-            config: {},
-          });
+          program.dataSources.push(EXAMPLE_FILE);
         } else if (selectedSource === "file" || selectedSource === "folder") {
           // Select local data source
           const selectedPaths = await selectLocalContextData(selectedSource);
@@ -720,31 +696,13 @@ export const askQuestions = async (
           });
         }
       }
-
-      if (program.dataSources.length === 0) {
-        program.engine = "simple";
-      } else {
-        program.engine = "context";
-      }
-    }
-  } else if (!program.dataSources) {
-    // Handle a case when engine is specified but dataSource is not
-    if (program.engine === "context") {
-      program.dataSources = [
-        {
-          type: "file",
-          config: {},
-        },
-      ];
-    } else if (program.engine === "simple") {
-      program.dataSources = [];
     }
   }
 
   // Asking for LlamaParse if user selected file or folder data source
   if (
     program.dataSources.some((ds) => ds.type === "file") &&
-    !program.useLlamaParse
+    program.useLlamaParse === undefined
   ) {
     if (ciInfo.isCI) {
       program.useLlamaParse = getPrefOrDefault("useLlamaParse");
@@ -780,7 +738,7 @@ export const askQuestions = async (
     }
   }
 
-  if (program.engine !== "simple" && !program.vectorDb) {
+  if (program.dataSources.length > 0 && !program.vectorDb) {
     if (ciInfo.isCI) {
       program.vectorDb = getPrefOrDefault("vectorDb");
     } else {
@@ -799,7 +757,8 @@ export const askQuestions = async (
     }
   }
 
-  if (!program.tools && program.engine === "context") {
+  // TODO: allow tools also without datasources
+  if (!program.tools && program.dataSources.length > 0) {
     if (ciInfo.isCI) {
       program.tools = getPrefOrDefault("tools");
     } else {
@@ -845,10 +804,4 @@ export const askQuestions = async (
   }
 
   await askPostInstallAction();
-
-  // TODO: consider using zod to validate the input (doesn't work like this as not every option is required)
-  // templateUISchema.parse(program.ui);
-  // templateEngineSchema.parse(program.engine);
-  // templateFrameworkSchema.parse(program.framework);
-  // templateTypeSchema.parse(program.template);``
 };
diff --git a/scripts/build.sh b/scripts/build.sh
index 9b8e5f49..498fe508 100644
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 # build dist/index.js file
-npm run clean && ncc build ./index.ts -o ./dist/ --minify --no-cache --no-source-map-register
+pnpm run build:ncc
 
 # add shebang to the top of dist/index.js
 # XXX: Windows needs a space after `node` to work correctly
-- 
GitLab