From f1c3e8df69dba68d6c9df048b4594b6f1e27e21d Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Wed, 24 Apr 2024 11:13:45 +0800
Subject: [PATCH] Adding support for Llama 3 and Phi3 (via Ollama) (#53)

---
 .changeset/thirty-beds-sniff.md               |   5 +
 create-app.ts                                 |   8 +-
 helpers/env-variables.ts                      | 183 +++++++++---------
 helpers/index.ts                              |  17 +-
 helpers/providers/index.ts                    |  66 +++++++
 helpers/providers/ollama.ts                   |  92 +++++++++
 helpers/providers/openai.ts                   | 146 ++++++++++++++
 helpers/python.ts                             |  27 ++-
 helpers/types.ts                              |  12 +-
 index.ts                                      |  28 +--
 package.json                                  |  45 +++--
 pnpm-lock.yaml                                |  84 ++++----
 questions.ts                                  | 169 +++-------------
 .../vectordbs/typescript/pg/shared.ts         |   2 +-
 .../types/streaming/express/package.json      |   2 +-
 .../src/controllers/engine/settings.ts        |  41 +++-
 .../streaming/fastapi/README-template.md      |   8 +-
 .../types/streaming/fastapi/app/settings.py   |  53 ++---
 .../types/streaming/fastapi/pyproject.toml    |   1 -
 .../nextjs/app/api/chat/engine/settings.ts    |  41 +++-
 .../nextjs/app/components/chat-section.tsx    |   2 +-
 templates/types/streaming/nextjs/package.json |   2 +-
 22 files changed, 662 insertions(+), 372 deletions(-)
 create mode 100644 .changeset/thirty-beds-sniff.md
 create mode 100644 helpers/providers/index.ts
 create mode 100644 helpers/providers/ollama.ts
 create mode 100644 helpers/providers/openai.ts

diff --git a/.changeset/thirty-beds-sniff.md b/.changeset/thirty-beds-sniff.md
new file mode 100644
index 00000000..41525e4e
--- /dev/null
+++ b/.changeset/thirty-beds-sniff.md
@@ -0,0 +1,5 @@
+---
+"create-llama": minor
+---
+
+Add Llama3 and Phi3 support using Ollama
diff --git a/create-app.ts b/create-app.ts
index 5f6fd136..d8c5ae3a 100644
--- a/create-app.ts
+++ b/create-app.ts
@@ -30,10 +30,8 @@ export async function createApp({
   appPath,
   packageManager,
   frontend,
-  openAiKey,
+  modelConfig,
   llamaCloudKey,
-  model,
-  embeddingModel,
   communityProjectConfig,
   llamapack,
   vectorDb,
@@ -77,10 +75,8 @@ export async function createApp({
     ui,
     packageManager,
     isOnline,
-    openAiKey,
+    modelConfig,
     llamaCloudKey,
-    model,
-    embeddingModel,
     communityProjectConfig,
     llamapack,
     vectorDb,
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
index fec6e801..9cd3f5ad 100644
--- a/helpers/env-variables.ts
+++ b/helpers/env-variables.ts
@@ -1,6 +1,7 @@
 import fs from "fs/promises";
 import path from "path";
 import {
+  ModelConfig,
   TemplateDataSource,
   TemplateFramework,
   TemplateVectorDB,
@@ -28,7 +29,10 @@ const renderEnvVar = (envVars: EnvVar[]): string => {
   );
 };
 
-const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
+const getVectorDBEnvs = (vectorDb?: TemplateVectorDB): EnvVar[] => {
+  if (!vectorDb) {
+    return [];
+  }
   switch (vectorDb) {
     case "mongo":
       return [
@@ -130,84 +134,75 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
   }
 };
 
-export const createBackendEnvFile = async (
-  root: string,
-  opts: {
-    openAiKey?: string;
-    llamaCloudKey?: string;
-    vectorDb?: TemplateVectorDB;
-    model?: string;
-    embeddingModel?: string;
-    framework?: TemplateFramework;
-    dataSources?: TemplateDataSource[];
-    port?: number;
-  },
-) => {
-  // Init env values
-  const envFileName = ".env";
-  const defaultEnvs = [
-    {
-      render: true,
-      name: "MODEL",
-      description: "The name of LLM model to use.",
-      value: opts.model,
-    },
+const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
+  return [
     {
-      render: true,
-      name: "OPENAI_API_KEY",
-      description: "The OpenAI API key to use.",
-      value: opts.openAiKey,
+      name: "MODEL_PROVIDER",
+      description: "The provider for the AI models to use.",
+      value: modelConfig.provider,
     },
     {
-      name: "LLAMA_CLOUD_API_KEY",
-      description: `The Llama Cloud API key.`,
-      value: opts.llamaCloudKey,
+      name: "MODEL",
+      description: "The name of LLM model to use.",
+      value: modelConfig.model,
     },
     {
       name: "EMBEDDING_MODEL",
       description: "Name of the embedding model to use.",
-      value: opts.embeddingModel,
+      value: modelConfig.embeddingModel,
     },
     {
       name: "EMBEDDING_DIM",
       description: "Dimension of the embedding model to use.",
-      value: 1536,
+      value: modelConfig.dimensions.toString(),
     },
-    // Add vector database environment variables
-    ...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
+    ...(modelConfig.provider === "openai"
+      ? [
+          {
+            name: "OPENAI_API_KEY",
+            description: "The OpenAI API key to use.",
+            value: modelConfig.apiKey,
+          },
+        ]
+      : []),
   ];
-  let envVars: EnvVar[] = [];
-  if (opts.framework === "fastapi") {
-    envVars = [
-      ...defaultEnvs,
-      ...[
-        {
-          name: "APP_HOST",
-          description: "The address to start the backend app.",
-          value: "0.0.0.0",
-        },
-        {
-          name: "APP_PORT",
-          description: "The port to start the backend app.",
-          value: opts.port?.toString() || "8000",
-        },
-        {
-          name: "LLM_TEMPERATURE",
-          description: "Temperature for sampling from the model.",
-        },
-        {
-          name: "LLM_MAX_TOKENS",
-          description: "Maximum number of tokens to generate.",
-        },
-        {
-          name: "TOP_K",
-          description:
-            "The number of similar embeddings to return when retrieving documents.",
-          value: "3",
-        },
-        {
-          name: "SYSTEM_PROMPT",
-          description: `Custom system prompt.
+};
+
+const getFrameworkEnvs = (
+  framework?: TemplateFramework,
+  port?: number,
+): EnvVar[] => {
+  if (framework !== "fastapi") {
+    return [];
+  }
+  return [
+    {
+      name: "APP_HOST",
+      description: "The address to start the backend app.",
+      value: "0.0.0.0",
+    },
+    {
+      name: "APP_PORT",
+      description: "The port to start the backend app.",
+      value: port?.toString() || "8000",
+    },
+    {
+      name: "LLM_TEMPERATURE",
+      description: "Temperature for sampling from the model.",
+    },
+    {
+      name: "LLM_MAX_TOKENS",
+      description: "Maximum number of tokens to generate.",
+    },
+    {
+      name: "TOP_K",
+      description:
+        "The number of similar embeddings to return when retrieving documents.",
+      value: "3",
+    },
+    {
+      name: "SYSTEM_PROMPT",
+      description: `Custom system prompt.
 Example:
 SYSTEM_PROMPT="
 We have provided context information below.
@@ -216,22 +211,35 @@ We have provided context information below.
 ---------------------
 Given this information, please answer the question: {query_str}
 "`,
-        },
-      ],
-    ];
-  } else {
-    const nextJsEnvs = [
-      {
-        name: "NEXT_PUBLIC_MODEL",
-        description: "The LLM model to use (hardcode to front-end artifact).",
-        value: opts.model,
-      },
-    ];
-    envVars = [
-      ...defaultEnvs,
-      ...(opts.framework === "nextjs" ? nextJsEnvs : []),
-    ];
-  }
+    },
+  ];
+};
+
+export const createBackendEnvFile = async (
+  root: string,
+  opts: {
+    llamaCloudKey?: string;
+    vectorDb?: TemplateVectorDB;
+    modelConfig: ModelConfig;
+    framework?: TemplateFramework;
+    dataSources?: TemplateDataSource[];
+    port?: number;
+  },
+) => {
+  // Init env values
+  const envFileName = ".env";
+  const envVars: EnvVar[] = [
+    {
+      name: "LLAMA_CLOUD_API_KEY",
+      description: `The Llama Cloud API key.`,
+      value: opts.llamaCloudKey,
+    },
+    // Add model environment variables
+    ...getModelEnvs(opts.modelConfig),
+    // Add vector database environment variables
+    ...getVectorDBEnvs(opts.vectorDb),
+    ...getFrameworkEnvs(opts.framework, opts.port),
+  ];
   // Render and write env file
   const content = renderEnvVar(envVars);
   await fs.writeFile(path.join(root, envFileName), content);
@@ -242,20 +250,9 @@ export const createFrontendEnvFile = async (
   root: string,
   opts: {
     customApiPath?: string;
-    model?: string;
   },
 ) => {
   const defaultFrontendEnvs = [
-    {
-      name: "MODEL",
-      description: "The OpenAI model to use.",
-      value: opts.model,
-    },
-    {
-      name: "NEXT_PUBLIC_MODEL",
-      description: "The OpenAI model to use (hardcode to front-end artifact).",
-      value: opts.model,
-    },
     {
       name: "NEXT_PUBLIC_CHAT_API",
       description: "The backend API for chat endpoint.",
diff --git a/helpers/index.ts b/helpers/index.ts
index 8c990fac..b7b3991d 100644
--- a/helpers/index.ts
+++ b/helpers/index.ts
@@ -9,12 +9,14 @@ import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
 import { PackageManager } from "./get-pkg-manager";
 import { installLlamapackProject } from "./llama-pack";
 import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
+import { isModelConfigured } from "./providers";
 import { installPythonTemplate } from "./python";
 import { downloadAndExtractRepo } from "./repo";
 import { ConfigFileType, writeToolsConfig } from "./tools";
 import {
   FileSourceConfig,
   InstallTemplateArgs,
+  ModelConfig,
   TemplateDataSource,
   TemplateFramework,
   TemplateVectorDB,
@@ -24,8 +26,8 @@ import { installTSTemplate } from "./typescript";
 // eslint-disable-next-line max-params
 async function generateContextData(
   framework: TemplateFramework,
+  modelConfig: ModelConfig,
   packageManager?: PackageManager,
-  openAiKey?: string,
   vectorDb?: TemplateVectorDB,
   llamaCloudKey?: string,
   useLlamaParse?: boolean,
@@ -36,12 +38,12 @@ async function generateContextData(
         ? "poetry run generate"
         : `${packageManager} run generate`,
     )}`;
-    const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
+    const modelConfigured = isModelConfigured(modelConfig);
     const llamaCloudKeyConfigured = useLlamaParse
       ? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
       : true;
     const hasVectorDb = vectorDb && vectorDb !== "none";
-    if (openAiKeyConfigured && llamaCloudKeyConfigured && !hasVectorDb) {
+    if (modelConfigured && llamaCloudKeyConfigured && !hasVectorDb) {
       // If all the required environment variables are set, run the generate script
       if (framework === "fastapi") {
         if (isHavingPoetryLockFile()) {
@@ -63,7 +65,7 @@ async function generateContextData(
 
     // generate the message of what to do to run the generate script manually
     const settings = [];
-    if (!openAiKeyConfigured) settings.push("your OpenAI key");
+    if (!modelConfigured) settings.push("your model provider API key");
     if (!llamaCloudKeyConfigured) settings.push("your Llama Cloud key");
     if (hasVectorDb) settings.push("your Vector DB environment variables");
     const settingsMessage =
@@ -141,11 +143,9 @@ export const installTemplate = async (
 
     // Copy the environment file to the target directory.
     await createBackendEnvFile(props.root, {
-      openAiKey: props.openAiKey,
+      modelConfig: props.modelConfig,
       llamaCloudKey: props.llamaCloudKey,
       vectorDb: props.vectorDb,
-      model: props.model,
-      embeddingModel: props.embeddingModel,
       framework: props.framework,
       dataSources: props.dataSources,
       port: props.externalPort,
@@ -163,8 +163,8 @@ export const installTemplate = async (
       ) {
         await generateContextData(
           props.framework,
+          props.modelConfig,
           props.packageManager,
-          props.openAiKey,
           props.vectorDb,
           props.llamaCloudKey,
           props.useLlamaParse,
@@ -174,7 +174,6 @@ export const installTemplate = async (
   } else {
     // this is a frontend for a full-stack app, create .env file with model information
     await createFrontendEnvFile(props.root, {
-      model: props.model,
       customApiPath: props.customApiPath,
     });
   }
diff --git a/helpers/providers/index.ts b/helpers/providers/index.ts
new file mode 100644
index 00000000..bb793c09
--- /dev/null
+++ b/helpers/providers/index.ts
@@ -0,0 +1,66 @@
+import ciInfo from "ci-info";
+import prompts from "prompts";
+import { questionHandlers } from "../../questions";
+import { ModelConfig, ModelProvider } from "../types";
+import { askOllamaQuestions } from "./ollama";
+import { askOpenAIQuestions, isOpenAIConfigured } from "./openai";
+
+const DEFAULT_MODEL_PROVIDER = "openai";
+
+export type ModelConfigQuestionsParams = {
+  openAiKey?: string;
+  askModels: boolean;
+};
+
+export type ModelConfigParams = Omit<ModelConfig, "provider">;
+
+export async function askModelConfig({
+  askModels,
+  openAiKey,
+}: ModelConfigQuestionsParams): Promise<ModelConfig> {
+  let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER;
+  if (askModels && !ciInfo.isCI) {
+    const { provider } = await prompts(
+      {
+        type: "select",
+        name: "provider",
+        message: "Which model provider would you like to use",
+        choices: [
+          {
+            title: "OpenAI",
+            value: "openai",
+          },
+          { title: "Ollama", value: "ollama" },
+        ],
+        initial: 0,
+      },
+      questionHandlers,
+    );
+    modelProvider = provider;
+  }
+
+  let modelConfig: ModelConfigParams;
+  switch (modelProvider) {
+    case "ollama":
+      modelConfig = await askOllamaQuestions({ askModels });
+      break;
+    default:
+      modelConfig = await askOpenAIQuestions({
+        openAiKey,
+        askModels,
+      });
+  }
+  return {
+    ...modelConfig,
+    provider: modelProvider,
+  };
+}
+
+export function isModelConfigured(modelConfig: ModelConfig): boolean {
+  switch (modelConfig.provider) {
+    case "openai":
+      return isOpenAIConfigured(modelConfig);
+    default:
+      return true;
+  }
+}
diff --git a/helpers/providers/ollama.ts b/helpers/providers/ollama.ts
new file mode 100644
index 00000000..a32d9d30
--- /dev/null
+++ b/helpers/providers/ollama.ts
@@ -0,0 +1,92 @@
+import ciInfo from "ci-info";
+import ollama, { type ModelResponse } from "ollama";
+import { red } from "picocolors";
+import prompts from "prompts";
+import { ModelConfigParams } from ".";
+import { questionHandlers, toChoice } from "../../questions";
+
+type ModelData = {
+  dimensions: number;
+};
+const MODELS = ["llama3:8b", "wizardlm2:7b", "gemma:7b", "phi3"];
+const DEFAULT_MODEL = MODELS[0];
+// TODO: get embedding vector dimensions from the ollama sdk (currently not supported)
+const EMBEDDING_MODELS: Record<string, ModelData> = {
+  "nomic-embed-text": { dimensions: 768 },
+  "mxbai-embed-large": { dimensions: 1024 },
+  "all-minilm": { dimensions: 384 },
+};
+const DEFAULT_EMBEDDING_MODEL: string = Object.keys(EMBEDDING_MODELS)[0];
+
+type OllamaQuestionsParams = {
+  askModels: boolean;
+};
+
+export async function askOllamaQuestions({
+  askModels,
+}: OllamaQuestionsParams): Promise<ModelConfigParams> {
+  const config: ModelConfigParams = {
+    model: DEFAULT_MODEL,
+    embeddingModel: DEFAULT_EMBEDDING_MODEL,
+    dimensions: EMBEDDING_MODELS[DEFAULT_EMBEDDING_MODEL].dimensions,
+  };
+
+  // use default model values in CI or if user should not be asked
+  const useDefaults = ciInfo.isCI || !askModels;
+  if (!useDefaults) {
+    const { model } = await prompts(
+      {
+        type: "select",
+        name: "model",
+        message: "Which LLM model would you like to use?",
+        choices: MODELS.map(toChoice),
+        initial: 0,
+      },
+      questionHandlers,
+    );
+    await ensureModel(model);
+    config.model = model;
+
+    const { embeddingModel } = await prompts(
+      {
+        type: "select",
+        name: "embeddingModel",
+        message: "Which embedding model would you like to use?",
+        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+        initial: 0,
+      },
+      questionHandlers,
+    );
+    await ensureModel(embeddingModel);
+    config.embeddingModel = embeddingModel;
+    config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
+  }
+
+  return config;
+}
+
+async function ensureModel(modelName: string) {
+  try {
+    if (modelName.split(":").length === 1) {
+      // model doesn't have a version suffix, use latest
+      modelName = modelName + ":latest";
+    }
+    const { models } = await ollama.list();
+    const found =
+      models.find((model: ModelResponse) => model.name === modelName) !==
+      undefined;
+    if (!found) {
+      console.log(
+        red(
+          `Model ${modelName} was not pulled yet. Call 'ollama pull ${modelName}' and try again.`,
+        ),
+      );
+      process.exit(1);
+    }
+  } catch (error) {
+    console.log(
+      red("Listing Ollama models failed. Is 'ollama' running? " + error),
+    );
+    process.exit(1);
+  }
+}
diff --git a/helpers/providers/openai.ts b/helpers/providers/openai.ts
new file mode 100644
index 00000000..2e13d99f
--- /dev/null
+++ b/helpers/providers/openai.ts
@@ -0,0 +1,146 @@
+import ciInfo from "ci-info";
+import got from "got";
+import ora from "ora";
+import { red } from "picocolors";
+import prompts from "prompts";
+import { ModelConfigParams, ModelConfigQuestionsParams } from ".";
+import { questionHandlers } from "../../questions";
+
+const OPENAI_API_URL = "https://api.openai.com/v1";
+
+const DEFAULT_MODEL = "gpt-4-turbo";
+const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
+
+export async function askOpenAIQuestions({
+  openAiKey,
+  askModels,
+}: ModelConfigQuestionsParams): Promise<ModelConfigParams> {
+  const config: ModelConfigParams = {
+    apiKey: openAiKey,
+    model: DEFAULT_MODEL,
+    embeddingModel: DEFAULT_EMBEDDING_MODEL,
+    dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
+  };
+
+  if (!config.apiKey) {
+    const { key } = await prompts(
+      {
+        type: "text",
+        name: "key",
+        message: askModels
+          ? "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):"
+          : "Please provide your OpenAI API key (leave blank to skip):",
+        validate: (value: string) => {
+          console.log(value);
+          if (askModels && !value) {
+            if (process.env.OPENAI_API_KEY) {
+              return true;
+            }
+            return "OPENAI_API_KEY env variable is not set - key is required";
+          }
+          return true;
+        },
+      },
+      questionHandlers,
+    );
+    config.apiKey = key || process.env.OPENAI_API_KEY;
+  }
+
+  // use default model values in CI or if user should not be asked
+  const useDefaults = ciInfo.isCI || !askModels;
+  if (!useDefaults) {
+    const { model } = await prompts(
+      {
+        type: "select",
+        name: "model",
+        message: "Which LLM model would you like to use?",
+        choices: await getAvailableModelChoices(false, config.apiKey),
+        initial: 0,
+      },
+      questionHandlers,
+    );
+    config.model = model;
+
+    const { embeddingModel } = await prompts(
+      {
+        type: "select",
+        name: "embeddingModel",
+        message: "Which embedding model would you like to use?",
+        choices: await getAvailableModelChoices(true, config.apiKey),
+        initial: 0,
+      },
+      questionHandlers,
+    );
+    config.embeddingModel = embeddingModel;
+    config.dimensions = getDimensions(embeddingModel);
+  }
+
+  return config;
+}
+
+export function isOpenAIConfigured(params: ModelConfigParams): boolean {
+  if (params.apiKey) {
+    return true;
+  }
+  if (process.env["OPENAI_API_KEY"]) {
+    return true;
+  }
+  return false;
+}
+
+async function getAvailableModelChoices(
+  selectEmbedding: boolean,
+  apiKey?: string,
+) {
+  if (!apiKey) {
+    throw new Error("need OpenAI key to retrieve model choices");
+  }
+  const isLLMModel = (modelId: string) => {
+    return modelId.startsWith("gpt");
+  };
+
+  const isEmbeddingModel = (modelId: string) => {
+    return modelId.includes("embedding");
+  };
+
+  const spinner = ora("Fetching available models").start();
+  try {
+    const response = await got(`${OPENAI_API_URL}/models`, {
+      headers: {
+        Authorization: "Bearer " + apiKey,
+      },
+      timeout: 5000,
+      responseType: "json",
+    });
+    const data: any = await response.body;
+    spinner.stop();
+    return data.data
+      .filter((model: any) =>
+        selectEmbedding ? isEmbeddingModel(model.id) : isLLMModel(model.id),
+      )
+      .map((el: any) => {
+        return {
+          title: el.id,
+          value: el.id,
+        };
+      });
+  } catch (error) {
+    spinner.stop();
+    if ((error as any).response?.statusCode === 401) {
+      console.log(
+        red(
+          "Invalid OpenAI API key provided! Please provide a valid key and try again!",
+        ),
+      );
+    } else {
+      console.log(red("Request failed: " + error));
+    }
+    process.exit(1);
+  }
+}
+
+function getDimensions(modelName: string) {
+  // at 2024-04-24 all OpenAI embedding models support 1536 dimensions except
+  // "text-embedding-3-large", see https://openai.com/blog/new-embedding-models-and-api-updates
+  return modelName === "text-embedding-3-large" ? 1024 : 1536;
+}
diff --git a/helpers/python.ts b/helpers/python.ts
index af83dd2c..316fe52d 100644
--- a/helpers/python.ts
+++ b/helpers/python.ts
@@ -10,6 +10,7 @@ import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
 import { Tool } from "./tools";
 import {
   InstallTemplateArgs,
+  ModelConfig,
   TemplateDataSource,
   TemplateVectorDB,
 } from "./types";
@@ -21,6 +22,7 @@ interface Dependency {
 }
 
 const getAdditionalDependencies = (
+  modelConfig: ModelConfig,
   vectorDb?: TemplateVectorDB,
   dataSource?: TemplateDataSource,
   tools?: Tool[],
@@ -108,6 +110,25 @@ const getAdditionalDependencies = (
     });
   });
 
+  switch (modelConfig.provider) {
+    case "ollama":
+      dependencies.push({
+        name: "llama-index-llms-ollama",
+        version: "0.1.2",
+      });
+      dependencies.push({
+        name: "llama-index-embeddings-ollama",
+        version: "0.1.2",
+      });
+      break;
+    case "openai":
+      dependencies.push({
+        name: "llama-index-agent-openai",
+        version: "0.2.2",
+      });
+      break;
+  }
+
   return dependencies;
 };
 
@@ -205,8 +226,8 @@ export const installPythonTemplate = async ({
   dataSources,
   tools,
   postInstallAction,
-  useLlamaParse,
   observability,
+  modelConfig,
 }: Pick<
   InstallTemplateArgs,
   | "root"
@@ -215,9 +236,9 @@ export const installPythonTemplate = async ({
   | "vectorDb"
   | "dataSources"
   | "tools"
-  | "useLlamaParse"
   | "postInstallAction"
   | "observability"
+  | "modelConfig"
 >) => {
   console.log("\nInitializing Python project with template:", template, "\n");
   const templatePath = path.join(templatesDir, "types", template, framework);
@@ -258,7 +279,7 @@ export const installPythonTemplate = async ({
   });
 
   const addOnDependencies = dataSources
-    .map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
+    .map((ds) => getAdditionalDependencies(modelConfig, vectorDb, ds, tools))
     .flat();
 
   if (observability === "opentelemetry") {
diff --git a/helpers/types.ts b/helpers/types.ts
index 0db557ed..42b571e7 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -1,6 +1,14 @@
 import { PackageManager } from "../helpers/get-pkg-manager";
 import { Tool } from "./tools";
 
+export type ModelProvider = "openai" | "ollama";
+export type ModelConfig = {
+  provider: ModelProvider;
+  apiKey?: string;
+  model: string;
+  embeddingModel: string;
+  dimensions: number;
+};
 export type TemplateType = "streaming" | "community" | "llamapack";
 export type TemplateFramework = "nextjs" | "express" | "fastapi";
 export type TemplateUI = "html" | "shadcn";
@@ -59,11 +67,9 @@ export interface InstallTemplateArgs {
   ui: TemplateUI;
   dataSources: TemplateDataSource[];
   customApiPath?: string;
-  openAiKey?: string;
+  modelConfig: ModelConfig;
   llamaCloudKey?: string;
   useLlamaParse?: boolean;
-  model: string;
-  embeddingModel: string;
   communityProjectConfig?: CommunityProjectConfig;
   llamapack?: string;
   vectorDb?: TemplateVectorDB;
diff --git a/index.ts b/index.ts
index 45b79b38..11f3d83d 100644
--- a/index.ts
+++ b/index.ts
@@ -107,20 +107,6 @@ const program = new Commander.Command(packageJson.name)
     `
 
   Whether to generate a frontend for your backend.
-`,
-  )
-  .option(
-    "--model <model>",
-    `
-
-  Select OpenAI model to use. E.g. gpt-3.5-turbo.
-`,
-  )
-  .option(
-    "--embedding-model <embeddingModel>",
-    `
-
-  Select OpenAI embedding model to use. E.g. text-embedding-ada-002.
 `,
   )
   .option(
@@ -201,9 +187,7 @@ if (process.argv.includes("--tools")) {
 if (process.argv.includes("--no-llama-parse")) {
   program.useLlamaParse = false;
 }
-if (process.argv.includes("--ask-models")) {
-  program.askModels = true;
-}
+program.askModels = process.argv.includes("--ask-models");
 if (process.argv.includes("--no-files")) {
   program.dataSources = [];
 } else {
@@ -290,7 +274,11 @@ async function run(): Promise<void> {
   }
 
   const preferences = (conf.get("preferences") || {}) as QuestionArgs;
-  await askQuestions(program as unknown as QuestionArgs, preferences);
+  await askQuestions(
+    program as unknown as QuestionArgs,
+    preferences,
+    program.openAiKey,
+  );
 
   await createApp({
     template: program.template,
@@ -299,10 +287,8 @@ async function run(): Promise<void> {
     appPath: resolvedProjectPath,
     packageManager,
     frontend: program.frontend,
-    openAiKey: program.openAiKey,
+    modelConfig: program.modelConfig,
     llamaCloudKey: program.llamaCloudKey,
-    model: program.model,
-    embeddingModel: program.embeddingModel,
     communityProjectConfig: program.communityProjectConfig,
     llamapack: program.llamapack,
     vectorDb: program.vectorDb,
diff --git a/package.json b/package.json
index 6cb3bd72..867f4f4b 100644
--- a/package.json
+++ b/package.json
@@ -1,12 +1,12 @@
 {
   "name": "create-llama",
   "version": "0.0.32",
+  "description": "Create LlamaIndex-powered apps with one command",
   "keywords": [
     "rag",
     "llamaindex",
     "next.js"
   ],
-  "description": "Create LlamaIndex-powered apps with one command",
   "repository": {
     "type": "git",
     "url": "https://github.com/run-llama/LlamaIndexTS",
@@ -20,32 +20,30 @@
     "dist"
   ],
   "scripts": {
+    "build": "bash ./scripts/build.sh",
+    "build:ncc": "pnpm run clean && ncc build ./index.ts -o ./dist/ --minify --no-cache --no-source-map-register",
     "clean": "rimraf --glob ./dist ./templates/**/__pycache__ ./templates/**/node_modules ./templates/**/poetry.lock",
+    "dev": "ncc build ./index.ts -w -o dist/",
+    "e2e": "playwright test",
     "format": "prettier --ignore-unknown --cache --check .",
     "format:write": "prettier --ignore-unknown --write .",
-    "dev": "ncc build ./index.ts -w -o dist/",
-    "build": "bash ./scripts/build.sh",
-    "build:ncc": "pnpm run clean && ncc build ./index.ts -o ./dist/ --minify --no-cache --no-source-map-register",
     "lint": "eslint . --ignore-pattern dist --ignore-pattern e2e/cache",
-    "e2e": "playwright test",
+    "new-snapshot": "pnpm run build && changeset version --snapshot",
+    "new-version": "pnpm run build && changeset version",
+    "pack-install": "bash ./scripts/pack.sh",
     "prepare": "husky",
     "release": "pnpm run build && changeset publish",
-    "new-version": "pnpm run build && changeset version",
-    "release-snapshot": "pnpm run build && changeset publish --tag snapshot",
-    "new-snapshot": "pnpm run build && changeset version --snapshot",
-    "pack-install": "bash ./scripts/pack.sh"
+    "release-snapshot": "pnpm run build && changeset publish --tag snapshot"
   },
-  "devDependencies": {
-    "@playwright/test": "^1.41.1",
+  "dependencies": {
     "@types/async-retry": "1.4.2",
     "@types/ci-info": "2.0.0",
     "@types/cross-spawn": "6.0.0",
+    "@types/fs-extra": "11.0.4",
     "@types/node": "^20.11.7",
     "@types/prompts": "2.0.1",
     "@types/tar": "6.1.5",
     "@types/validate-npm-package-name": "3.0.0",
-    "@types/fs-extra": "11.0.4",
-    "@vercel/ncc": "0.38.1",
     "async-retry": "1.3.1",
     "async-sema": "3.0.1",
     "ci-info": "github:watson/ci-info#f43f6a1cefff47fb361c88cf4b943fdbcaafe540",
@@ -53,29 +51,34 @@
     "conf": "10.2.0",
     "cross-spawn": "7.0.3",
     "fast-glob": "3.3.1",
+    "fs-extra": "11.2.0",
     "got": "10.7.0",
+    "ollama": "^0.5.0",
+    "ora": "^8.0.1",
     "picocolors": "1.0.0",
     "prompts": "2.1.0",
-    "rimraf": "^5.0.5",
     "smol-toml": "^1.1.4",
     "tar": "6.1.15",
     "terminal-link": "^3.0.0",
     "update-check": "1.5.4",
     "validate-npm-package-name": "3.0.0",
-    "wait-port": "^1.1.0",
+    "yaml": "2.4.1"
+  },
+  "devDependencies": {
     "@changesets/cli": "^2.27.1",
+    "@playwright/test": "^1.41.1",
+    "@vercel/ncc": "0.38.1",
     "eslint": "^8.56.0",
+    "eslint-config-prettier": "^8.10.0",
     "husky": "^9.0.10",
     "prettier": "^3.2.5",
     "prettier-plugin-organize-imports": "^3.2.4",
+    "rimraf": "^5.0.5",
     "typescript": "^5.3.3",
-    "eslint-config-prettier": "^8.10.0",
-    "ora": "^8.0.1",
-    "fs-extra": "11.2.0",
-    "yaml": "2.4.1"
+    "wait-port": "^1.1.0"
   },
+  "packageManager": "pnpm@9.0.5",
   "engines": {
     "node": ">=16.14.0"
-  },
-  "packageManager": "pnpm@9.0.5"
+  }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 06852a76..3d3e9b2d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -7,13 +7,7 @@ settings:
 importers:
 
   .:
-    devDependencies:
-      '@changesets/cli':
-        specifier: ^2.27.1
-        version: 2.27.1
-      '@playwright/test':
-        specifier: ^1.41.1
-        version: 1.42.1
+    dependencies:
       '@types/async-retry':
         specifier: 1.4.2
         version: 1.4.2
@@ -38,9 +32,6 @@ importers:
       '@types/validate-npm-package-name':
         specifier: 3.0.0
         version: 3.0.0
-      '@vercel/ncc':
-        specifier: 0.38.1
-        version: 0.38.1
       async-retry:
         specifier: 1.3.1
         version: 1.3.1
@@ -59,12 +50,6 @@ importers:
       cross-spawn:
         specifier: 7.0.3
         version: 7.0.3
-      eslint:
-        specifier: ^8.56.0
-        version: 8.57.0
-      eslint-config-prettier:
-        specifier: ^8.10.0
-        version: 8.10.0(eslint@8.57.0)
       fast-glob:
         specifier: 3.3.1
         version: 3.3.1
@@ -74,27 +59,18 @@ importers:
       got:
         specifier: 10.7.0
         version: 10.7.0
-      husky:
-        specifier: ^9.0.10
-        version: 9.0.11
+      ollama:
+        specifier: ^0.5.0
+        version: 0.5.0
       ora:
         specifier: ^8.0.1
         version: 8.0.1
       picocolors:
         specifier: 1.0.0
         version: 1.0.0
-      prettier:
-        specifier: ^3.2.5
-        version: 3.2.5
-      prettier-plugin-organize-imports:
-        specifier: ^3.2.4
-        version: 3.2.4(prettier@3.2.5)(typescript@5.4.2)
       prompts:
         specifier: 2.1.0
         version: 2.1.0
-      rimraf:
-        specifier: ^5.0.5
-        version: 5.0.5
       smol-toml:
         specifier: ^1.1.4
         version: 1.1.4
@@ -104,21 +80,49 @@ importers:
       terminal-link:
         specifier: ^3.0.0
         version: 3.0.0
-      typescript:
-        specifier: ^5.3.3
-        version: 5.4.2
       update-check:
         specifier: 1.5.4
         version: 1.5.4
       validate-npm-package-name:
         specifier: 3.0.0
         version: 3.0.0
-      wait-port:
-        specifier: ^1.1.0
-        version: 1.1.0
       yaml:
         specifier: 2.4.1
         version: 2.4.1
+    devDependencies:
+      '@changesets/cli':
+        specifier: ^2.27.1
+        version: 2.27.1
+      '@playwright/test':
+        specifier: ^1.41.1
+        version: 1.42.1
+      '@vercel/ncc':
+        specifier: 0.38.1
+        version: 0.38.1
+      eslint:
+        specifier: ^8.56.0
+        version: 8.57.0
+      eslint-config-prettier:
+        specifier: ^8.10.0
+        version: 8.10.0(eslint@8.57.0)
+      husky:
+        specifier: ^9.0.10
+        version: 9.0.11
+      prettier:
+        specifier: ^3.2.5
+        version: 3.2.5
+      prettier-plugin-organize-imports:
+        specifier: ^3.2.4
+        version: 3.2.4(prettier@3.2.5)(typescript@5.4.2)
+      rimraf:
+        specifier: ^5.0.5
+        version: 5.0.5
+      typescript:
+        specifier: ^5.3.3
+        version: 5.4.2
+      wait-port:
+        specifier: ^1.1.0
+        version: 1.1.0
 
 packages:
 
@@ -1287,6 +1291,9 @@ packages:
     resolution: {integrity: sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ==}
     engines: {node: '>= 0.4'}
 
+  ollama@0.5.0:
+    resolution: {integrity: sha512-CRtRzsho210EGdK52GrUMohA2pU+7NbgEaBG3DcYeRmvQthDO7E2LHOkLlUUeaYUlNmEd8icbjC02ug9meSYnw==}
+
   once@1.4.0:
     resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
 
@@ -1844,6 +1851,9 @@ packages:
   wcwidth@1.0.1:
     resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==}
 
+  whatwg-fetch@3.6.20:
+    resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
+
   which-boxed-primitive@1.0.2:
     resolution: {integrity: sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==}
 
@@ -3256,6 +3266,10 @@ snapshots:
       has-symbols: 1.0.3
       object-keys: 1.1.1
 
+  ollama@0.5.0:
+    dependencies:
+      whatwg-fetch: 3.6.20
+
   once@1.4.0:
     dependencies:
       wrappy: 1.0.2
@@ -3816,6 +3830,8 @@ snapshots:
     dependencies:
       defaults: 1.0.4
 
+  whatwg-fetch@3.6.20: {}
+
   which-boxed-primitive@1.0.2:
     dependencies:
       is-bigint: 1.0.4
diff --git a/questions.ts b/questions.ts
index 375ad807..31eed224 100644
--- a/questions.ts
+++ b/questions.ts
@@ -1,8 +1,6 @@
 import { execSync } from "child_process";
 import ciInfo from "ci-info";
 import fs from "fs";
-import got from "got";
-import ora from "ora";
 import path from "path";
 import { blue, green, red } from "picocolors";
 import prompts from "prompts";
@@ -16,11 +14,10 @@ import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
 import { EXAMPLE_FILE } from "./helpers/datasources";
 import { templatesDir } from "./helpers/dir";
 import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
+import { askModelConfig, isModelConfigured } from "./helpers/providers";
 import { getProjectOptions } from "./helpers/repo";
 import { supportedTools, toolsRequireConfig } from "./helpers/tools";
 
-const OPENAI_API_URL = "https://api.openai.com/v1";
-
 export type QuestionArgs = Omit<
   InstallAppArgs,
   "appPath" | "packageManager"
@@ -67,16 +64,13 @@ if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
 }
 `;
 
-const defaults: QuestionArgs = {
+const defaults: Omit<QuestionArgs, "modelConfig"> = {
   template: "streaming",
   framework: "nextjs",
   ui: "shadcn",
   frontend: false,
-  openAiKey: "",
   llamaCloudKey: "",
   useLlamaParse: false,
-  model: "gpt-4-turbo",
-  embeddingModel: "text-embedding-3-large",
   communityProjectConfig: undefined,
   llamapack: "",
   postInstallAction: "dependencies",
@@ -84,7 +78,7 @@ const defaults: QuestionArgs = {
   tools: [],
 };
 
-const handlers = {
+export const questionHandlers = {
   onCancel: () => {
     console.error("Exiting.");
     process.exit(1);
@@ -232,63 +226,15 @@ export const onPromptState = (state: any) => {
   }
 };
 
-const getAvailableModelChoices = async (
-  selectEmbedding: boolean,
-  apiKey?: string,
-) => {
-  const isLLMModel = (modelId: string) => {
-    return modelId.startsWith("gpt");
-  };
-
-  const isEmbeddingModel = (modelId: string) => {
-    return modelId.includes("embedding");
-  };
-
-  if (apiKey) {
-    const spinner = ora("Fetching available models").start();
-    try {
-      const response = await got(`${OPENAI_API_URL}/models`, {
-        headers: {
-          Authorization: "Bearer " + apiKey,
-        },
-        timeout: 5000,
-        responseType: "json",
-      });
-      const data: any = await response.body;
-      spinner.stop();
-      return data.data
-        .filter((model: any) =>
-          selectEmbedding ? isEmbeddingModel(model.id) : isLLMModel(model.id),
-        )
-        .map((el: any) => {
-          return {
-            title: el.id,
-            value: el.id,
-          };
-        });
-    } catch (error) {
-      spinner.stop();
-      if ((error as any).response?.statusCode === 401) {
-        console.log(
-          red(
-            "Invalid OpenAI API key provided! Please provide a valid key and try again!",
-          ),
-        );
-      } else {
-        console.log(red("Request failed: " + error));
-      }
-      process.exit(1);
-    }
-  }
-};
-
 export const askQuestions = async (
   program: QuestionArgs,
   preferences: QuestionArgs,
+  openAiKey?: string,
 ) => {
-  const getPrefOrDefault = <K extends keyof QuestionArgs>(
+  const getPrefOrDefault = <K extends keyof Omit<QuestionArgs, "modelConfig">>(
     field: K,
-  ): QuestionArgs[K] => preferences[field] ?? defaults[field];
+  ): Omit<QuestionArgs, "modelConfig">[K] =>
+    preferences[field] ?? defaults[field];
 
   // Ask for next action after installation
   async function askPostInstallAction() {
@@ -311,8 +257,7 @@ export const askQuestions = async (
           },
         ];
 
-        const openAiKeyConfigured =
-          program.openAiKey || process.env["OPENAI_API_KEY"];
+        const modelConfigured = isModelConfigured(program.modelConfig);
         // If using LlamaParse, require LlamaCloud API key
         const llamaCloudKeyConfigured = program.useLlamaParse
           ? program.llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
@@ -321,7 +266,7 @@ export const askQuestions = async (
         // Can run the app if all tools do not require configuration
         if (
           !hasVectorDb &&
-          openAiKeyConfigured &&
+          modelConfigured &&
           llamaCloudKeyConfigured &&
           !toolsRequireConfig(program.tools) &&
           !program.llamapack
@@ -341,7 +286,7 @@ export const askQuestions = async (
             choices: actionChoices,
             initial: 1,
           },
-          handlers,
+          questionHandlers,
         );
 
         program.postInstallAction = action;
@@ -374,7 +319,7 @@ export const askQuestions = async (
           ],
           initial: 0,
         },
-        handlers,
+        questionHandlers,
       );
       program.template = template;
       preferences.template = template;
@@ -397,7 +342,7 @@ export const askQuestions = async (
         })),
         initial: 0,
       },
-      handlers,
+      questionHandlers,
     );
     const projectConfig = JSON.parse(communityProjectConfig);
     program.communityProjectConfig = projectConfig;
@@ -418,7 +363,7 @@ export const askQuestions = async (
         })),
         initial: 0,
       },
-      handlers,
+      questionHandlers,
     );
     program.llamapack = llamapack;
     preferences.llamapack = llamapack;
@@ -444,7 +389,7 @@ export const askQuestions = async (
           choices,
           initial: 0,
         },
-        handlers,
+        questionHandlers,
       );
       program.framework = framework;
       preferences.framework = framework;
@@ -504,7 +449,7 @@ export const askQuestions = async (
           ],
           initial: 0,
         },
-        handlers,
+        questionHandlers,
       );
 
       program.observability = observability;
@@ -512,67 +457,13 @@ export const askQuestions = async (
     }
   }
 
-  if (!program.openAiKey) {
-    const { key } = await prompts(
-      {
-        type: "text",
-        name: "key",
-        message: program.askModels
-          ? "Please provide your OpenAI API key (or leave blank to reuse OPENAI_API_KEY env variable):"
-          : "Please provide your OpenAI API key (leave blank to skip):",
-        validate: (value: string) => {
-          if (program.askModels && !value) {
-            if (process.env.OPENAI_API_KEY) {
-              return true;
-            }
-            return "OpenAI API key is required";
-          }
-          return true;
-        },
-      },
-      handlers,
-    );
-
-    program.openAiKey = key || process.env.OPENAI_API_KEY;
-    preferences.openAiKey = key || process.env.OPENAI_API_KEY;
-  }
-
-  if (!program.model) {
-    if (ciInfo.isCI || !program.askModels) {
-      program.model = defaults.model;
-    } else {
-      const { model } = await prompts(
-        {
-          type: "select",
-          name: "model",
-          message: "Which LLM model would you like to use?",
-          choices: await getAvailableModelChoices(false, program.openAiKey),
-          initial: 0,
-        },
-        handlers,
-      );
-      program.model = model;
-      preferences.model = model;
-    }
-  }
-
-  if (!program.embeddingModel) {
-    if (ciInfo.isCI || !program.askModels) {
-      program.embeddingModel = defaults.embeddingModel;
-    } else {
-      const { embeddingModel } = await prompts(
-        {
-          type: "select",
-          name: "embeddingModel",
-          message: "Which embedding model would you like to use?",
-          choices: await getAvailableModelChoices(true, program.openAiKey),
-          initial: 0,
-        },
-        handlers,
-      );
-      program.embeddingModel = embeddingModel;
-      preferences.embeddingModel = embeddingModel;
-    }
+  if (!program.modelConfig) {
+    const modelConfig = await askModelConfig({
+      openAiKey,
+      askModels: program.askModels ?? false,
+    });
+    program.modelConfig = modelConfig;
+    preferences.modelConfig = modelConfig;
   }
 
   if (!program.dataSources) {
@@ -596,7 +487,7 @@ export const askQuestions = async (
             ),
             initial: firstQuestion ? 1 : 0,
           },
-          handlers,
+          questionHandlers,
         );
 
         if (selectedSource === "no" || selectedSource === "none") {
@@ -642,7 +533,7 @@ export const askQuestions = async (
                   return true;
                 },
               },
-              handlers,
+              questionHandlers,
             );
 
             program.dataSources.push({
@@ -687,7 +578,7 @@ export const askQuestions = async (
             ];
             program.dataSources.push({
               type: "db",
-              config: await prompts(dbPrompts, handlers),
+              config: await prompts(dbPrompts, questionHandlers),
             });
           }
         }
@@ -714,7 +605,7 @@ export const askQuestions = async (
           active: "yes",
           inactive: "no",
         },
-        handlers,
+        questionHandlers,
       );
       program.useLlamaParse = useLlamaParse;
 
@@ -727,7 +618,7 @@ export const askQuestions = async (
             message:
               "Please provide your LlamaIndex Cloud API key (leave blank to skip):",
           },
-          handlers,
+          questionHandlers,
         );
         program.llamaCloudKey = llamaCloudKey;
       }
@@ -746,7 +637,7 @@ export const askQuestions = async (
           choices: getVectorDbChoices(program.framework),
           initial: 0,
         },
-        handlers,
+        questionHandlers,
       );
       program.vectorDb = vectorDb;
       preferences.vectorDb = vectorDb;
@@ -781,3 +672,7 @@ export const askQuestions = async (
 
   await askPostInstallAction();
 };
+
+export const toChoice = (value: string) => {
+  return { title: value, value };
+};
diff --git a/templates/components/vectordbs/typescript/pg/shared.ts b/templates/components/vectordbs/typescript/pg/shared.ts
index 88774df1..e241f907 100644
--- a/templates/components/vectordbs/typescript/pg/shared.ts
+++ b/templates/components/vectordbs/typescript/pg/shared.ts
@@ -2,7 +2,7 @@ export const PGVECTOR_COLLECTION = "data";
 export const PGVECTOR_SCHEMA = "public";
 export const PGVECTOR_TABLE = "llamaindex_embedding";
 
-const REQUIRED_ENV_VARS = ["PG_CONNECTION_STRING", "OPENAI_API_KEY"];
+const REQUIRED_ENV_VARS = ["PG_CONNECTION_STRING"];
 
 export function checkRequiredEnvVars() {
   const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
diff --git a/templates/types/streaming/express/package.json b/templates/types/streaming/express/package.json
index 834841f5..612076e5 100644
--- a/templates/types/streaming/express/package.json
+++ b/templates/types/streaming/express/package.json
@@ -14,7 +14,7 @@
     "cors": "^2.8.5",
     "dotenv": "^16.3.1",
     "express": "^4.18.2",
-    "llamaindex": "0.2.9"
+    "llamaindex": "0.2.10"
   },
   "devDependencies": {
     "@types/cors": "^2.8.16",
diff --git a/templates/types/streaming/express/src/controllers/engine/settings.ts b/templates/types/streaming/express/src/controllers/engine/settings.ts
index 1f5f397a..25804399 100644
--- a/templates/types/streaming/express/src/controllers/engine/settings.ts
+++ b/templates/types/streaming/express/src/controllers/engine/settings.ts
@@ -1,19 +1,52 @@
-import { OpenAI, OpenAIEmbedding, Settings } from "llamaindex";
+import {
+  Ollama,
+  OllamaEmbedding,
+  OpenAI,
+  OpenAIEmbedding,
+  Settings,
+} from "llamaindex";
 
 const CHUNK_SIZE = 512;
 const CHUNK_OVERLAP = 20;
 
 export const initSettings = async () => {
+  // HINT: you can delete the initialization code for unused model providers
+  console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
+  switch (process.env.MODEL_PROVIDER) {
+    case "ollama":
+      initOllama();
+      break;
+    default:
+      initOpenAI();
+      break;
+  }
+  Settings.chunkSize = CHUNK_SIZE;
+  Settings.chunkOverlap = CHUNK_OVERLAP;
+};
+
+function initOpenAI() {
   Settings.llm = new OpenAI({
     model: process.env.MODEL ?? "gpt-3.5-turbo",
     maxTokens: 512,
   });
-  Settings.chunkSize = CHUNK_SIZE;
-  Settings.chunkOverlap = CHUNK_OVERLAP;
   Settings.embedModel = new OpenAIEmbedding({
     model: process.env.EMBEDDING_MODEL,
     dimensions: process.env.EMBEDDING_DIM
       ? parseInt(process.env.EMBEDDING_DIM)
       : undefined,
   });
-};
+}
+
+function initOllama() {
+  if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
+    throw new Error(
+      "Using Ollama as model provider, 'MODEL' and 'EMBEDDING_MODEL' env variables must be set.",
+    );
+  }
+  Settings.llm = new Ollama({
+    model: process.env.MODEL ?? "",
+  });
+  Settings.embedModel = new OllamaEmbedding({
+    model: process.env.EMBEDDING_MODEL ?? "",
+  });
+}
diff --git a/templates/types/streaming/fastapi/README-template.md b/templates/types/streaming/fastapi/README-template.md
index 6af9712b..7969ff0e 100644
--- a/templates/types/streaming/fastapi/README-template.md
+++ b/templates/types/streaming/fastapi/README-template.md
@@ -11,13 +11,7 @@ poetry install
 poetry shell
 ```
 
-By default, we use the OpenAI LLM (though you can customize, see `app/settings.py`). As a result, you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
-
-Example `.env` file:
-
-```
-OPENAI_API_KEY=<openai_api_key>
-```
+Then check the parameters that have been pre-configured in the `.env` file in this directory. (E.g. you might need to configure an `OPENAI_API_KEY` if you're using OpenAI as model provider).
 
 If you are using any tools or data sources, you can update their config files in the `config` folder.
 
diff --git a/templates/types/streaming/fastapi/app/settings.py b/templates/types/streaming/fastapi/app/settings.py
index 3f2c5e07..be272d54 100644
--- a/templates/types/streaming/fastapi/app/settings.py
+++ b/templates/types/streaming/fastapi/app/settings.py
@@ -1,41 +1,44 @@
 import os
 from typing import Dict
 from llama_index.core.settings import Settings
-from llama_index.llms.openai import OpenAI
-from llama_index.embeddings.openai import OpenAIEmbedding
 
 
-def llm_config_from_env() -> Dict:
+def init_settings():
+    model_provider = os.getenv("MODEL_PROVIDER")
+    if model_provider == "openai":
+        init_openai()
+    elif model_provider == "ollama":
+        init_ollama()
+    else:
+        raise ValueError(f"Invalid model provider: {model_provider}")
+    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
+    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
+
+
+def init_ollama():
+    from llama_index.llms.ollama import Ollama
+    from llama_index.embeddings.ollama import OllamaEmbedding
+
+    Settings.embed_model = OllamaEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
+    Settings.llm = Ollama(model=os.getenv("MODEL"))
+
+
+def init_openai():
+    from llama_index.llms.openai import OpenAI
+    from llama_index.embeddings.openai import OpenAIEmbedding
     from llama_index.core.constants import DEFAULT_TEMPERATURE
 
-    model = os.getenv("MODEL")
-    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
     max_tokens = os.getenv("LLM_MAX_TOKENS")
-
     config = {
-        "model": model,
-        "temperature": float(temperature),
+        "model": os.getenv("MODEL"),
+        "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
         "max_tokens": int(max_tokens) if max_tokens is not None else None,
     }
-    return config
+    Settings.llm = OpenAI(**config)
 
-
-def embedding_config_from_env() -> Dict:
-    model = os.getenv("EMBEDDING_MODEL")
     dimension = os.getenv("EMBEDDING_DIM")
-
     config = {
-        "model": model,
+        "model": os.getenv("EMBEDDING_MODEL"),
         "dimension": int(dimension) if dimension is not None else None,
     }
-    return config
-
-
-def init_settings():
-    llm_configs = llm_config_from_env()
-    embedding_configs = embedding_config_from_env()
-
-    Settings.llm = OpenAI(**llm_configs)
-    Settings.embed_model = OpenAIEmbedding(**embedding_configs)
-    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
-    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
+    Settings.embed_model = OpenAIEmbedding(**config)
diff --git a/templates/types/streaming/fastapi/pyproject.toml b/templates/types/streaming/fastapi/pyproject.toml
index 30cf267f..93176212 100644
--- a/templates/types/streaming/fastapi/pyproject.toml
+++ b/templates/types/streaming/fastapi/pyproject.toml
@@ -15,7 +15,6 @@ uvicorn = { extras = ["standard"], version = "^0.23.2" }
 python-dotenv = "^1.0.0"
 llama-index = "0.10.28"
 llama-index-core = "0.10.28"
-llama-index-agent-openai = "0.2.2"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts b/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts
index 1f5f397a..25804399 100644
--- a/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts
+++ b/templates/types/streaming/nextjs/app/api/chat/engine/settings.ts
@@ -1,19 +1,52 @@
-import { OpenAI, OpenAIEmbedding, Settings } from "llamaindex";
+import {
+  Ollama,
+  OllamaEmbedding,
+  OpenAI,
+  OpenAIEmbedding,
+  Settings,
+} from "llamaindex";
 
 const CHUNK_SIZE = 512;
 const CHUNK_OVERLAP = 20;
 
 export const initSettings = async () => {
+  // HINT: you can delete the initialization code for unused model providers
+  console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
+  switch (process.env.MODEL_PROVIDER) {
+    case "ollama":
+      initOllama();
+      break;
+    default:
+      initOpenAI();
+      break;
+  }
+  Settings.chunkSize = CHUNK_SIZE;
+  Settings.chunkOverlap = CHUNK_OVERLAP;
+};
+
+function initOpenAI() {
   Settings.llm = new OpenAI({
     model: process.env.MODEL ?? "gpt-3.5-turbo",
     maxTokens: 512,
   });
-  Settings.chunkSize = CHUNK_SIZE;
-  Settings.chunkOverlap = CHUNK_OVERLAP;
   Settings.embedModel = new OpenAIEmbedding({
     model: process.env.EMBEDDING_MODEL,
     dimensions: process.env.EMBEDDING_DIM
       ? parseInt(process.env.EMBEDDING_DIM)
       : undefined,
   });
-};
+}
+
+function initOllama() {
+  if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
+    throw new Error(
+      "Using Ollama as model provider, 'MODEL' and 'EMBEDDING_MODEL' env variables must be set.",
+    );
+  }
+  Settings.llm = new Ollama({
+    model: process.env.MODEL ?? "",
+  });
+  Settings.embedModel = new OllamaEmbedding({
+    model: process.env.EMBEDDING_MODEL ?? "",
+  });
+}
diff --git a/templates/types/streaming/nextjs/app/components/chat-section.tsx b/templates/types/streaming/nextjs/app/components/chat-section.tsx
index e9860746..afb59960 100644
--- a/templates/types/streaming/nextjs/app/components/chat-section.tsx
+++ b/templates/types/streaming/nextjs/app/components/chat-section.tsx
@@ -36,7 +36,7 @@ export default function ChatSection() {
         handleSubmit={handleSubmit}
         handleInputChange={handleInputChange}
         isLoading={isLoading}
-        multiModal={process.env.NEXT_PUBLIC_MODEL === "gpt-4-turbo"}
+        multiModal={true}
       />
     </div>
   );
diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json
index baddb358..221c6998 100644
--- a/templates/types/streaming/nextjs/package.json
+++ b/templates/types/streaming/nextjs/package.json
@@ -16,7 +16,7 @@
     "class-variance-authority": "^0.7.0",
     "clsx": "^1.2.1",
     "dotenv": "^16.3.1",
-    "llamaindex": "0.2.9",
+    "llamaindex": "0.2.10",
     "lucide-react": "^0.294.0",
     "next": "^14.0.3",
     "react": "^18.2.0",
-- 
GitLab