From 1135853740ab69776eb50693a96a3f6aff9a5f9c Mon Sep 17 00:00:00 2001 From: Timothy Carambat <rambat1010@gmail.com> Date: Fri, 22 Mar 2024 14:39:30 -0700 Subject: [PATCH] Patch LMStudio Inference server bug integration (#957) --- docker/.env.example | 1 + .../LLMSelection/LMStudioOptions/index.jsx | 80 +++++++++++++++++++ server/.env.example | 1 + server/models/systemSettings.js | 1 + server/utils/AiProviders/lmStudio/index.js | 11 ++- server/utils/helpers/customModels.js | 25 ++++++ server/utils/helpers/updateENV.js | 4 + 7 files changed, 120 insertions(+), 3 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index ed6fd3bce..5efb2c049 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -27,6 +27,7 @@ GID='1000' # LLM_PROVIDER='lmstudio' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1' +# LMSTUDIO_MODEL_PREF='Loaded from Chat UI' # this is a bug in LMStudio 0.2.17 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096 # LLM_PROVIDER='localai' diff --git a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx index fbba7666f..200c77a6e 100644 --- a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx @@ -1,7 +1,14 @@ +import { useEffect, useState } from "react"; import { Info } from "@phosphor-icons/react"; import paths from "@/utils/paths"; +import System from "@/models/system"; export default function LMStudioOptions({ settings, showAlert = false }) { + const [basePathValue, setBasePathValue] = useState( + settings?.LMStudioBasePath + ); + const [basePath, setBasePath] = useState(settings?.LMStudioBasePath); + return ( <div className="w-full flex flex-col"> {showAlert && ( @@ -35,8 +42,11 @@ export default function LMStudioOptions({ settings, showAlert = false }) { required={true} autoComplete="off" spellCheck={false} + onChange={(e) => setBasePathValue(e.target.value)} + onBlur={() => setBasePath(basePathValue)} /> </div> + <LMStudioModelSelection settings={settings} basePath={basePath} /> <div className="flex flex-col w-60"> <label className="text-white text-sm font-semibold block mb-4"> Token context window @@ -57,3 +67,73 @@ export default function LMStudioOptions({ settings, showAlert = false }) { </div> ); } + +function LMStudioModelSelection({ settings, basePath = null }) { + const [customModels, setCustomModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!basePath || !basePath.includes("/v1")) { + setCustomModels([]); + setLoading(false); + return; + } + setLoading(true); + const { models } = await System.customModels("lmstudio", null, basePath); + setCustomModels(models || []); + setLoading(false); + } + findCustomModels(); + }, [basePath]); + + if (loading || customModels.length == 0) { + return ( + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Chat Model Selection + </label> + <select + name="LMStudioModelPref" + disabled={true} + className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + <option disabled={true} selected={true}> + {basePath?.includes("/v1") + ? "-- loading available models --" + : "-- waiting for URL --"} + </option> + </select> + </div> + ); + } + + return ( + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Chat Model Selection + </label> + <select + name="LMStudioModelPref" + required={true} + className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + {customModels.length > 0 && ( + <optgroup label="Your loaded models"> + {customModels.map((model) => { + return ( + <option + key={model.id} + value={model.id} + selected={settings.LMStudioModelPref === model.id} + > + {model.id} + </option> + ); + })} + </optgroup> + )} + </select> + </div> + ); +} diff --git a/server/.env.example b/server/.env.example index c5681db4a..21887d09c 100644 --- a/server/.env.example +++ b/server/.env.example @@ -24,6 +24,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # LLM_PROVIDER='lmstudio' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1' +# LMSTUDIO_MODEL_PREF='Loaded from Chat UI' # this is a bug in LMStudio 0.2.17 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096 # LLM_PROVIDER='localai' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index dbf95238e..f27434ca9 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -137,6 +137,7 @@ const SystemSettings = { ? { LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH, LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT, + LMStudioModelPref: process.env.LMSTUDIO_MODEL_PREF, // For embedding credentials when lmstudio is selected. OpenAiKey: !!process.env.OPEN_AI_KEY, diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js index 455edce3a..a5a8df118 100644 --- a/server/utils/AiProviders/lmStudio/index.js +++ b/server/utils/AiProviders/lmStudio/index.js @@ -12,9 +12,14 @@ class LMStudioLLM { basePath: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance }); this.lmstudio = new OpenAIApi(config); - // When using LMStudios inference server - the model param is not required so - // we can stub it here. LMStudio can only run one model at a time. - this.model = "model-placeholder"; + + // Prior to LMStudio 0.2.17 the `model` param was not required and you could pass anything + // into that field and it would work. On 0.2.17 LMStudio introduced multi-model chat + // which now has a bug that reports the server model id as "Loaded from Chat UI" + // and any other value will crash inferencing. So until this is patched we will + // try to fetch the `/models` and have the user set it, or just fallback to "Loaded from Chat UI" + // which will not impact users with <v0.2.17 and should work as well once the bug is fixed. + this.model = process.env.LMSTUDIO_MODEL_PREF || "Loaded from Chat UI"; this.limits = { history: this.promptWindowLimit() * 0.15, system: this.promptWindowLimit() * 0.15, diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index f434ac078..5dfa30e31 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -10,6 +10,7 @@ const SUPPORT_CUSTOM_MODELS = [ "mistral", "perplexity", "openrouter", + "lmstudio", ]; async function getCustomModels(provider = "", apiKey = null, basePath = null) { @@ -33,6 +34,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getPerplexityModels(); case "openrouter": return await getOpenRouterModels(); + case "lmstudio": + return await getLMStudioModels(basePath); default: return { models: [], error: "Invalid provider for custom models" }; } @@ -81,6 +84,28 @@ async function localAIModels(basePath = null, apiKey = null) { return { models, error: null }; } +async function getLMStudioModels(basePath = null) { + try { + const { Configuration, OpenAIApi } = require("openai"); + const config = new Configuration({ + basePath: basePath || process.env.LMSTUDIO_BASE_PATH, + }); + const openai = new OpenAIApi(config); + const models = await openai + .listModels() + .then((res) => res.data.data) + .catch((e) => { + console.error(`LMStudio:listModels`, e.message); + return []; + }); + + return { models, error: null }; + } catch (e) { + console.error(`LMStudio:getLMStudioModels`, e.message); + return { models: [], error: "Could not fetch LMStudio Models" }; + } +} + async function ollamaAIModels(basePath = null) { let url; try { diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index f8e3d9cec..29fa210ef 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -59,6 +59,10 @@ const KEY_MAPPING = { envKey: "LMSTUDIO_BASE_PATH", checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl], }, + LMStudioModelPref: { + envKey: "LMSTUDIO_MODEL_PREF", + checks: [], + }, LMStudioTokenLimit: { envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT", checks: [nonZero], -- GitLab