feat: Add more. env variables to config host, port, llm and embedding (#630)

6001b2f8 · Huu Le (Lee) · GitHub · 44fd8cb3 · 6001b2f8 · 6001b2f8
Commit 6001b2f8 authored 1 year ago by Huu Le (Lee) Committed by GitHub 1 year ago
--- a/helpers/env-variables.ts
+++ b/helpers/env-variables.ts
+import fs from "fs/promises";
+import path from "path";
+import {
+  FileSourceConfig,
+  TemplateDataSource,
+  TemplateFramework,
+  TemplateVectorDB,
+} from "./types";
+
+type EnvVar = {
+  name?: string;
+  description?: string;
+  value?: string;
+};
+
+const renderEnvVar = (envVars: EnvVar[]): string => {
+  return envVars.reduce(
+    (prev, env) =>
+      prev +
+      (env.description
+        ? `# ${env.description.replaceAll("\n", "\n# ")}\n`
+        : "") +
+      (env.name && env.value
+        ? `${env.name}=${env.value}\n\n`
+        : `# ${env.name}=\n\n`),
+    "",
+  );
+};
+
+const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
+  switch (vectorDb) {
+    case "mongo":
+      return [
+        {
+          name: "MONGO_URI",
+          description:
+            "For generating a connection URI, see https://docs.timescale.com/use-timescale/latest/services/create-a-service\nThe MongoDB connection URI.",
+        },
+        {
+          name: "MONGODB_DATABASE",
+        },
+        {
+          name: "MONGODB_VECTORS",
+        },
+        {
+          name: "MONGODB_VECTOR_INDEX",
+        },
+      ];
+    case "pg":
+      return [
+        {
+          name: "PG_CONNECTION_STRING",
+          description:
+            "For generating a connection URI, see https://docs.timescale.com/use-timescale/latest/services/create-a-service\nThe PostgreSQL connection string.",
+        },
+      ];
+
+    case "pinecone":
+      return [
+        {
+          name: "PINECONE_API_KEY",
+          description:
+            "Configuration for Pinecone vector store\nThe Pinecone API key.",
+        },
+        {
+          name: "PINECONE_ENVIRONMENT",
+        },
+        {
+          name: "PINECONE_INDEX_NAME",
+        },
+      ];
+    default:
+      return [];
+  }
+};
+
+const getDataSourceEnvs = (dataSource: TemplateDataSource) => {
+  switch (dataSource.type) {
+    case "web":
+      return [
+        {
+          name: "BASE_URL",
+          description: "The base URL to start web scraping.",
+        },
+        {
+          name: "URL_PREFIX",
+          description: "The prefix of the URL to start web scraping.",
+        },
+        {
+          name: "MAX_DEPTH",
+          description: "The maximum depth to scrape.",
+        },
+      ];
+    default:
+      return [];
+  }
+};
+
+export const createBackendEnvFile = async (
+  root: string,
+  opts: {
+    openAiKey?: string;
+    llamaCloudKey?: string;
+    vectorDb?: TemplateVectorDB;
+    model?: string;
+    embeddingModel?: string;
+    framework?: TemplateFramework;
+    dataSource?: TemplateDataSource;
+    port?: number;
+  },
+) => {
+  // Init env values
+  const envFileName = ".env";
+  const defaultEnvs = [
+    {
+      render: true,
+      name: "MODEL",
+      description: "The name of LLM model to use.",
+      value: opts.model || "gpt-3.5-turbo",
+    },
+    {
+      render: true,
+      name: "OPENAI_API_KEY",
+      description: "The OpenAI API key to use.",
+      value: opts.openAiKey,
+    },
+    // Add vector database environment variables
+    ...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
+    // Add data source environment variables
+    ...(opts.dataSource ? getDataSourceEnvs(opts.dataSource) : []),
+  ];
+  let envVars: EnvVar[] = [];
+  if (opts.framework === "fastapi") {
+    envVars = [
+      ...defaultEnvs,
+      ...[
+        {
+          name: "APP_HOST",
+          description: "The address to start the backend app.",
+          value: "0.0.0.0",
+        },
+        {
+          name: "APP_PORT",
+          description: "The port to start the backend app.",
+          value: opts.port?.toString() || "8000",
+        },
+        {
+          name: "EMBEDDING_MODEL",
+          description: "Name of the embedding model to use.",
+          value: opts.embeddingModel,
+        },
+        {
+          name: "EMBEDDING_DIM",
+          description: "Dimension of the embedding model to use.",
+        },
+        {
+          name: "LLM_TEMPERATURE",
+          description: "Temperature for sampling from the model.",
+        },
+        {
+          name: "LLM_MAX_TOKENS",
+          description: "Maximum number of tokens to generate.",
+        },
+        {
+          name: "TOP_K",
+          description:
+            "The number of similar embeddings to return when retrieving documents.",
+          value: "3",
+        },
+        {
+          name: "SYSTEM_PROMPT",
+          description: `Custom system prompt.
+Example:
+SYSTEM_PROMPT="
+We have provided context information below.
+---------------------
+{context_str}
+---------------------
+Given this information, please answer the question: {query_str}
+"`,
+        },
+        (opts?.dataSource?.config as FileSourceConfig).useLlamaParse
+          ? {
+              name: "LLAMA_CLOUD_API_KEY",
+              description: `The Llama Cloud API key.`,
+              value: opts.llamaCloudKey,
+            }
+          : {},
+      ],
+    ];
+  } else {
+    envVars = [
+      ...defaultEnvs,
+      ...[
+        opts.framework === "nextjs"
+          ? {
+              name: "NEXT_PUBLIC_MODEL",
+              description:
+                "The LLM model to use (hardcode to front-end artifact).",
+            }
+          : {},
+      ],
+    ];
+  }
+  // Render and write env file
+  const content = renderEnvVar(envVars);
+  await fs.writeFile(path.join(root, envFileName), content);
+  console.log(`Created '${envFileName}' file. Please check the settings.`);
+};
+
+export const createFrontendEnvFile = async (
+  root: string,
+  opts: {
+    customApiPath?: string;
+    model?: string;
+  },
+) => {
+  const defaultFrontendEnvs = [
+    {
+      name: "MODEL",
+      description: "The OpenAI model to use.",
+      value: opts.model,
+    },
+    {
+      name: "NEXT_PUBLIC_MODEL",
+      description: "The OpenAI model to use (hardcode to front-end artifact).",
+      value: opts.model,
+    },
+    {
+      name: "NEXT_PUBLIC_CHAT_API",
+      description: "The backend API for chat endpoint.",
+      value: opts.customApiPath
+        ? opts.customApiPath
+        : "http://localhost:8000/api/chat",
+    },
+  ];
+  const content = renderEnvVar(defaultFrontendEnvs);
+  await fs.writeFile(path.join(root, ".env"), content);
+};
--- a/helpers/index.ts
+++ b/helpers/index.ts
@@ -7,6 +7,7 @@ import { cyan } from "picocolors";

 import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./constant";
 import { templatesDir } from "./dir";
+import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
 import { PackageManager } from "./get-pkg-manager";
 import { installLlamapackProject } from "./llama-pack";
 import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
@@ -18,89 +19,9 @@ import {
  TemplateDataSource,
  TemplateFramework,
  TemplateVectorDB,
-  WebSourceConfig,
 } from "./types";
 import { installTSTemplate } from "./typescript";

-const createEnvLocalFile = async (
-  root: string,
-  opts?: {
-    openAiKey?: string;
-    llamaCloudKey?: string;
-    vectorDb?: TemplateVectorDB;
-    model?: string;
-    embeddingModel?: string;
-    framework?: TemplateFramework;
-    dataSource?: TemplateDataSource;
-  },
-) => {
-  const envFileName = ".env";
-  let content = "";
-
-  const model = opts?.model || "gpt-3.5-turbo";
-  content += `MODEL=${model}\n`;
-  if (opts?.framework === "nextjs") {
-    content += `NEXT_PUBLIC_MODEL=${model}\n`;
-  }
-  console.log("\nUsing OpenAI model: ", model, "\n");
-
-  if (opts?.openAiKey) {
-    content += `OPENAI_API_KEY=${opts?.openAiKey}\n`;
-  }
-
-  if (opts?.embeddingModel) {
-    content += `EMBEDDING_MODEL=${opts?.embeddingModel}\n`;
-  }
-
-  if ((opts?.dataSource?.config as FileSourceConfig).useLlamaParse) {
-    if (opts?.llamaCloudKey) {
-      content += `LLAMA_CLOUD_API_KEY=${opts?.llamaCloudKey}\n`;
-    } else {
-      content += `# Please obtain the Llama Cloud API key from https://cloud.llamaindex.ai/api-key 
-# and set it to the LLAMA_CLOUD_API_KEY variable below.
-# LLAMA_CLOUD_API_KEY=`;
-    }
-  }
-
-  switch (opts?.vectorDb) {
-    case "mongo": {
-      content += `# For generating a connection URI, see https://www.mongodb.com/docs/guides/atlas/connection-string\n`;
-      content += `MONGO_URI=\n`;
-      content += `MONGODB_DATABASE=\n`;
-      content += `MONGODB_VECTORS=\n`;
-      content += `MONGODB_VECTOR_INDEX=\n`;
-      break;
-    }
-    case "pg": {
-      content += `# For generating a connection URI, see https://docs.timescale.com/use-timescale/latest/services/create-a-service\n`;
-      content += `PG_CONNECTION_STRING=\n`;
-      break;
-    }
-    case "pinecone": {
-      content += `PINECONE_API_KEY=\n`;
-      content += `PINECONE_ENVIRONMENT=\n`;
-      content += `PINECONE_INDEX_NAME=\n`;
-      break;
-    }
-  }
-
-  switch (opts?.dataSource?.type) {
-    case "web": {
-      const webConfig = opts?.dataSource.config as WebSourceConfig;
-      content += `# web loader config\n`;
-      content += `BASE_URL=${webConfig.baseUrl}\n`;
-      content += `URL_PREFIX=${webConfig.baseUrl}\n`;
-      content += `MAX_DEPTH=${webConfig.depth}\n`;
-      break;
-    }
-  }
-
-  if (content) {
-    await fs.writeFile(path.join(root, envFileName), content);
-    console.log(`Created '${envFileName}' file. Please check the settings.`);
-  }
-};
-
 // eslint-disable-next-line max-params
 async function generateContextData(
  framework: TemplateFramework,
@@ -232,7 +153,7 @@ export const installTemplate = async (
    // This is a backend, so we need to copy the test data and create the env file.

    // Copy the environment file to the target directory.
-    await createEnvLocalFile(props.root, {
+    await createBackendEnvFile(props.root, {
      openAiKey: props.openAiKey,
      llamaCloudKey: props.llamaCloudKey,
      vectorDb: props.vectorDb,
@@ -240,6 +161,7 @@ export const installTemplate = async (
      embeddingModel: props.embeddingModel,
      framework: props.framework,
      dataSource: props.dataSource,
+      port: props.externalPort,
    });

    if (props.engine === "context") {
@@ -260,8 +182,10 @@ export const installTemplate = async (
    }
  } else {
    // this is a frontend for a full-stack app, create .env file with model information
-    const content = `MODEL=${props.model}\nNEXT_PUBLIC_MODEL=${props.model}\n`;
-    await fs.writeFile(path.join(props.root, ".env"), content);
+    createFrontendEnvFile(props.root, {
+      model: props.model,
+      customApiPath: props.customApiPath,
+    });
  }
 };


--- a/helpers/typescript.ts
+++ b/helpers/typescript.ts
@@ -171,10 +171,6 @@ export const installTSTemplate = async ({
    const apiPath = path.join(root, "app", "api");
    await fs.rm(apiPath, { recursive: true });
    // modify the dev script to use the custom api path
-    packageJson.scripts = {
-      ...packageJson.scripts,
-      dev: `cross-env NEXT_PUBLIC_CHAT_API=${customApiPath} next dev`,
-    };
  }

  if (engine === "context" && relativeEngineDestPath) {

--- a/templates/components/engines/python/agent/__init__.py
+++ b/templates/components/engines/python/agent/__init__.py
+import os
 from llama_index.core.settings import Settings
 from llama_index.core.agent import AgentRunner
 from llama_index.core.tools.query_engine import QueryEngineTool
@@ -6,11 +7,13 @@ from app.engine.index import get_index


 def get_chat_engine():
+    system_prompt = os.getenv("SYSTEM_PROMPT")
+    top_k = os.getenv("TOP_K", "3")
    tools = []

    # Add query tool
    index = get_index()
-    query_engine = index.as_query_engine(similarity_top_k=3)
+    query_engine = index.as_query_engine(similarity_top_k=int(top_k))
    query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine)
    tools.append(query_engine_tool)

@@ -20,5 +23,6 @@ def get_chat_engine():
    return AgentRunner.from_llm(
        llm=Settings.llm,
        tools=tools,
+        system_prompt=system_prompt,
        verbose=True,
    )
--- a/templates/components/engines/python/chat/__init__.py
+++ b/templates/components/engines/python/chat/__init__.py
+import os
 from app.engine.index import get_index


 def get_chat_engine():
+    system_prompt = os.getenv("SYSTEM_PROMPT")
+    top_k = os.getenv("TOP_K", 3)
+
    return get_index().as_chat_engine(
-        similarity_top_k=3, chat_mode="condense_plus_context"
+        similarity_top_k=int(top_k),
+        system_prompt=system_prompt,
+        chat_mode="condense_plus_context",
    )
--- a/templates/components/loaders/python/llama_parse/loader.py
+++ b/templates/components/loaders/python/llama_parse/loader.py
+import os
 from llama_parse import LlamaParse
 from llama_index.core import SimpleDirectoryReader

@@ -5,6 +6,11 @@ DATA_DIR = "data"  # directory containing the documents


 def get_documents():
+    if os.getenv("LLAMA_CLOUD_API_KEY") is None:
+        raise ValueError(
+            "LLAMA_CLOUD_API_KEY environment variable is not set. "
+            "Please set it in .env file or in your shell environment then run again!"
+        )
    parser = LlamaParse(result_type="markdown", verbose=True, language="en")

    reader = SimpleDirectoryReader(DATA_DIR, file_extractor={".pdf": parser})

--- a/templates/types/simple/fastapi/app/settings.py
+++ b/templates/types/simple/fastapi/app/settings.py
 import os
+from typing import Dict
+from llama_index.core.settings import Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core.settings import Settings
+
+
+def llm_config_from_env() -> Dict:
+    from llama_index.core.constants import DEFAULT_TEMPERATURE
+
+    model = os.getenv("MODEL")
+    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
+    max_tokens = os.getenv("LLM_MAX_TOKENS")
+
+    config = {
+        "model": model,
+        "temperature": float(temperature),
+        "max_tokens": int(max_tokens) if max_tokens is not None else None,
+    }
+    return config
+
+
+def embedding_config_from_env() -> Dict:
+    model = os.getenv("EMBEDDING_MODEL")
+    dimension = os.getenv("EMBEDDING_DIM")
+
+    config = {
+        "model": model,
+        "dimension": int(dimension) if dimension is not None else None,
+    }
+    return config


 def init_settings():
-    llm_model = os.getenv("MODEL", "gpt-3.5-turbo")
-    embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
+    llm_configs = llm_config_from_env()
+    embedding_configs = embedding_config_from_env()

-    Settings.llm = OpenAI(model=llm_model)
-    Settings.embed_model = OpenAIEmbedding(model=embedding_model)
-    Settings.chunk_size = 1024
-    Settings.chunk_overlap = 20
+    Settings.llm = OpenAI(**llm_configs)
+    Settings.embed_model = OpenAIEmbedding(**embedding_configs)
+    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
+    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
--- a/templates/types/simple/fastapi/main.py
+++ b/templates/types/simple/fastapi/main.py
@@ -32,4 +32,7 @@ app.include_router(chat_router, prefix="/api/chat")


 if __name__ == "__main__":
-    uvicorn.run(app="main:app", host="0.0.0.0", reload=True)
+    app_host = os.getenv("APP_HOST", "0.0.0.0")
+    app_port = int(os.getenv("APP_PORT", "8000"))
+
+    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=True)
--- a/templates/types/streaming/fastapi/app/settings.py
+++ b/templates/types/streaming/fastapi/app/settings.py
 import os
+from typing import Dict
+from llama_index.core.settings import Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core.settings import Settings
+
+
+def llm_config_from_env() -> Dict:
+    from llama_index.core.constants import DEFAULT_TEMPERATURE
+
+    model = os.getenv("MODEL")
+    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
+    max_tokens = os.getenv("LLM_MAX_TOKENS")
+
+    config = {
+        "model": model,
+        "temperature": float(temperature),
+        "max_tokens": int(max_tokens) if max_tokens is not None else None,
+    }
+    return config
+
+
+def embedding_config_from_env() -> Dict:
+    model = os.getenv("EMBEDDING_MODEL")
+    dimension = os.getenv("EMBEDDING_DIM")
+
+    config = {
+        "model": model,
+        "dimension": int(dimension) if dimension is not None else None,
+    }
+    return config


 def init_settings():
-    llm_model = os.getenv("MODEL", "gpt-3.5-turbo")
-    embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
+    llm_configs = llm_config_from_env()
+    embedding_configs = embedding_config_from_env()

-    Settings.llm = OpenAI(model=llm_model)
-    Settings.embed_model = OpenAIEmbedding(model=embedding_model)
-    Settings.chunk_size = 1024
-    Settings.chunk_overlap = 20
+    Settings.llm = OpenAI(**llm_configs)
+    Settings.embed_model = OpenAIEmbedding(**embedding_configs)
+    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
+    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
--- a/templates/types/streaming/fastapi/main.py
+++ b/templates/types/streaming/fastapi/main.py
@@ -33,4 +33,7 @@ app.include_router(chat_router, prefix="/api/chat")


 if __name__ == "__main__":
-    uvicorn.run(app="main:app", host="0.0.0.0", reload=True)
+    app_host = os.getenv("APP_HOST", "0.0.0.0")
+    app_port = int(os.getenv("APP_PORT", "8000"))
+
+    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=True)