From a0aab032267ba8a49ba6841d7cc9ef8458dc69d5 Mon Sep 17 00:00:00 2001 From: Mohammad Amir <amir.zilli@gmail.com> Date: Tue, 2 Jul 2024 20:42:42 +0530 Subject: [PATCH] T-System's LLMHUB is added as model provider backend. (#139) --------- Co-authored-by: Duc Anh Ho <ducanh.ho2296@gmail.com> Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> --- .changeset/silent-buses-dance.md | 5 + helpers/env-variables.ts | 17 ++ helpers/providers/index.ts | 30 ++-- helpers/providers/llmhub.ts | 169 ++++++++++++++++++ helpers/python.ts | 10 ++ helpers/types.ts | 3 +- questions.ts | 1 + .../types/streaming/fastapi/app/llmhub.py | 61 +++++++ .../types/streaming/fastapi/app/settings.py | 7 +- 9 files changed, 290 insertions(+), 13 deletions(-) create mode 100644 .changeset/silent-buses-dance.md create mode 100644 helpers/providers/llmhub.ts create mode 100644 templates/types/streaming/fastapi/app/llmhub.py diff --git a/.changeset/silent-buses-dance.md b/.changeset/silent-buses-dance.md new file mode 100644 index 00000000..613fa740 --- /dev/null +++ b/.changeset/silent-buses-dance.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Add T-System's LLMHUB as a model provider diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts index dcde1f8f..6e19a117 100644 --- a/helpers/env-variables.ts +++ b/helpers/env-variables.ts @@ -8,6 +8,8 @@ import { TemplateVectorDB, } from "./types"; +import { TSYSTEMS_LLMHUB_API_URL } from "./providers/llmhub"; + export type EnvVar = { name?: string; description?: string; @@ -262,6 +264,21 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => { }, ] : []), + ...(modelConfig.provider === "t-systems" + ? [ + { + name: "T_SYSTEMS_LLMHUB_BASE_URL", + description: + "The base URL for the T-Systems AI Foundation Model API. Eg: http://localhost:11434", + value: TSYSTEMS_LLMHUB_API_URL, + }, + { + name: "T_SYSTEMS_LLMHUB_API_KEY", + description: "API Key for T-System's AI Foundation Model.", + value: modelConfig.apiKey, + }, + ] + : []), ]; }; diff --git a/helpers/providers/index.ts b/helpers/providers/index.ts index ce873deb..22e4de3e 100644 --- a/helpers/providers/index.ts +++ b/helpers/providers/index.ts @@ -1,10 +1,11 @@ import ciInfo from "ci-info"; import prompts from "prompts"; import { questionHandlers } from "../../questions"; -import { ModelConfig, ModelProvider } from "../types"; +import { ModelConfig, ModelProvider, TemplateFramework } from "../types"; import { askAnthropicQuestions } from "./anthropic"; import { askGeminiQuestions } from "./gemini"; import { askGroqQuestions } from "./groq"; +import { askLLMHubQuestions } from "./llmhub"; import { askOllamaQuestions } from "./ollama"; import { askOpenAIQuestions } from "./openai"; @@ -13,6 +14,7 @@ const DEFAULT_MODEL_PROVIDER = "openai"; export type ModelConfigQuestionsParams = { openAiKey?: string; askModels: boolean; + framework?: TemplateFramework; }; export type ModelConfigParams = Omit<ModelConfig, "provider">; @@ -20,24 +22,27 @@ export type ModelConfigParams = Omit<ModelConfig, "provider">; export async function askModelConfig({ askModels, openAiKey, + framework, }: ModelConfigQuestionsParams): Promise<ModelConfig> { let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER; if (askModels && !ciInfo.isCI) { + let choices = [ + { title: "OpenAI", value: "openai" }, + { title: "Groq", value: "groq" }, + { title: "Ollama", value: "ollama" }, + { title: "Anthropic", value: "anthropic" }, + { title: "Gemini", value: "gemini" }, + ]; + + if (framework === "fastapi") { + choices.push({ title: "T-Systems", value: "t-systems" }); + } const { provider } = await prompts( { type: "select", name: "provider", message: "Which model provider would you like to use", - choices: [ - { - title: "OpenAI", - value: "openai", - }, - { title: "Groq", value: "groq" }, - { title: "Ollama", value: "ollama" }, - { title: "Anthropic", value: "anthropic" }, - { title: "Gemini", value: "gemini" }, - ], + choices: choices, initial: 0, }, questionHandlers, @@ -59,6 +64,9 @@ export async function askModelConfig({ case "gemini": modelConfig = await askGeminiQuestions({ askModels }); break; + case "t-systems": + modelConfig = await askLLMHubQuestions({ askModels }); + break; default: modelConfig = await askOpenAIQuestions({ openAiKey, diff --git a/helpers/providers/llmhub.ts b/helpers/providers/llmhub.ts new file mode 100644 index 00000000..b1563325 --- /dev/null +++ b/helpers/providers/llmhub.ts @@ -0,0 +1,169 @@ +import ciInfo from "ci-info"; +import got from "got"; +import ora from "ora"; +import { red } from "picocolors"; +import prompts from "prompts"; +import { ModelConfigParams } from "."; +import { questionHandlers } from "../../questions"; + +export const TSYSTEMS_LLMHUB_API_URL = + "https://llm-server.llmhub.t-systems.net/v2"; + +const DEFAULT_MODEL = "gpt-3.5-turbo"; +const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"; + +const LLMHUB_MODELS = [ + "gpt-35-turbo", + "gpt-4-32k-1", + "gpt-4-32k-canada", + "gpt-4-32k-france", + "gpt-4-turbo-128k-france", + "Llama2-70b-Instruct", + "Llama-3-70B-Instruct", + "Mixtral-8x7B-Instruct-v0.1", + "mistral-large-32k-france", + "CodeLlama-2", +]; +const LLMHUB_EMBEDDING_MODELS = [ + "text-embedding-ada-002", + "text-embedding-ada-002-france", + "jina-embeddings-v2-base-de", + "jina-embeddings-v2-base-code", + "text-embedding-bge-m3", +]; + +type LLMHubQuestionsParams = { + apiKey?: string; + askModels: boolean; +}; + +export async function askLLMHubQuestions({ + askModels, + apiKey, +}: LLMHubQuestionsParams): Promise<ModelConfigParams> { + const config: ModelConfigParams = { + apiKey, + model: DEFAULT_MODEL, + embeddingModel: DEFAULT_EMBEDDING_MODEL, + dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL), + isConfigured(): boolean { + if (config.apiKey) { + return true; + } + if (process.env["T_SYSTEMS_LLMHUB_API_KEY"]) { + return true; + } + return false; + }, + }; + + if (!config.apiKey) { + const { key } = await prompts( + { + type: "text", + name: "key", + message: askModels + ? "Please provide your LLMHub API key (or leave blank to use T_SYSTEMS_LLMHUB_API_KEY env variable):" + : "Please provide your LLMHub API key (leave blank to skip):", + validate: (value: string) => { + if (askModels && !value) { + if (process.env.T_SYSTEMS_LLMHUB_API_KEY) { + return true; + } + return "T_SYSTEMS_LLMHUB_API_KEY env variable is not set - key is required"; + } + return true; + }, + }, + questionHandlers, + ); + config.apiKey = key || process.env.T_SYSTEMS_LLMHUB_API_KEY; + } + + // use default model values in CI or if user should not be asked + const useDefaults = ciInfo.isCI || !askModels; + if (!useDefaults) { + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: await getAvailableModelChoices(false, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.model = model; + + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: await getAvailableModelChoices(true, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = getDimensions(embeddingModel); + } + + return config; +} + +async function getAvailableModelChoices( + selectEmbedding: boolean, + apiKey?: string, +) { + if (!apiKey) { + throw new Error("Need LLMHub key to retrieve model choices"); + } + const isLLMModel = (modelId: string) => { + return LLMHUB_MODELS.includes(modelId); + }; + + const isEmbeddingModel = (modelId: string) => { + return LLMHUB_EMBEDDING_MODELS.includes(modelId); + }; + + const spinner = ora("Fetching available models").start(); + try { + const response = await got(`${TSYSTEMS_LLMHUB_API_URL}/models`, { + headers: { + Authorization: "Bearer " + apiKey, + }, + timeout: 5000, + responseType: "json", + }); + const data: any = await response.body; + spinner.stop(); + return data.data + .filter((model: any) => + selectEmbedding ? isEmbeddingModel(model.id) : isLLMModel(model.id), + ) + .map((el: any) => { + return { + title: el.id, + value: el.id, + }; + }); + } catch (error) { + spinner.stop(); + if ((error as any).response?.statusCode === 401) { + console.log( + red( + "Invalid LLMHub API key provided! Please provide a valid key and try again!", + ), + ); + } else { + console.log(red("Request failed: " + error)); + } + process.exit(1); + } +} + +function getDimensions(modelName: string) { + // Assuming dimensions similar to OpenAI for simplicity. Update if different. + return modelName === "text-embedding-004" ? 768 : 1536; +} diff --git a/helpers/python.ts b/helpers/python.ts index c95ab7dc..64faa4a0 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -173,6 +173,16 @@ const getAdditionalDependencies = ( version: "0.1.6", }); break; + case "t-systems": + dependencies.push({ + name: "llama-index-agent-openai", + version: "0.2.2", + }); + dependencies.push({ + name: "llama-index-llms-openai-like", + version: "0.1.3", + }); + break; } return dependencies; diff --git a/helpers/types.ts b/helpers/types.ts index 4bc3d10b..16debfc7 100644 --- a/helpers/types.ts +++ b/helpers/types.ts @@ -6,7 +6,8 @@ export type ModelProvider = | "groq" | "ollama" | "anthropic" - | "gemini"; + | "gemini" + | "t-systems"; export type ModelConfig = { provider: ModelProvider; apiKey?: string; diff --git a/questions.ts b/questions.ts index 4a61a106..d2200909 100644 --- a/questions.ts +++ b/questions.ts @@ -484,6 +484,7 @@ export const askQuestions = async ( const modelConfig = await askModelConfig({ openAiKey, askModels: program.askModels ?? false, + framework: program.framework, }); program.modelConfig = modelConfig; preferences.modelConfig = modelConfig; diff --git a/templates/types/streaming/fastapi/app/llmhub.py b/templates/types/streaming/fastapi/app/llmhub.py new file mode 100644 index 00000000..69e0e324 --- /dev/null +++ b/templates/types/streaming/fastapi/app/llmhub.py @@ -0,0 +1,61 @@ +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.core.settings import Settings +from typing import Dict +import os + +DEFAULT_MODEL = "gpt-3.5-turbo" +DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large" + +class TSIEmbedding(OpenAIEmbedding): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._query_engine = self._text_engine = self.model_name + +def llm_config_from_env() -> Dict: + from llama_index.core.constants import DEFAULT_TEMPERATURE + + model = os.getenv("MODEL", DEFAULT_MODEL) + temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) + max_tokens = os.getenv("LLM_MAX_TOKENS") + api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") + api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") + + config = { + "model": model, + "api_key": api_key, + "api_base": api_base, + "temperature": float(temperature), + "max_tokens": int(max_tokens) if max_tokens is not None else None, + } + return config + + +def embedding_config_from_env() -> Dict: + from llama_index.core.constants import DEFAULT_EMBEDDING_DIM + + model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL) + dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM) + api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") + api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") + + config = { + "model_name": model, + "dimension": int(dimension) if dimension is not None else None, + "api_key": api_key, + "api_base": api_base, + } + return config + +def init_llmhub(): + from llama_index.llms.openai_like import OpenAILike + + llm_configs = llm_config_from_env() + embedding_configs = embedding_config_from_env() + + Settings.embed_model = TSIEmbedding(**embedding_configs) + Settings.llm = OpenAILike( + **llm_configs, + is_chat_model=True, + is_function_calling_model=False, + context_window=4096, + ) \ No newline at end of file diff --git a/templates/types/streaming/fastapi/app/settings.py b/templates/types/streaming/fastapi/app/settings.py index c37c13b7..7ec21975 100644 --- a/templates/types/streaming/fastapi/app/settings.py +++ b/templates/types/streaming/fastapi/app/settings.py @@ -2,7 +2,7 @@ import os from typing import Dict from llama_index.core.settings import Settings - +from .llmhub import init_llmhub def init_settings(): model_provider = os.getenv("MODEL_PROVIDER") @@ -19,8 +19,11 @@ def init_settings(): init_gemini() case "azure-openai": init_azure_openai() + case "t-systems": + init_llmhub() case _: raise ValueError(f"Invalid model provider: {model_provider}") + Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024")) Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20")) @@ -144,3 +147,5 @@ def init_gemini(): Settings.llm = Gemini(model=model_name) Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) + + -- GitLab