diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx index 0aa2900d97dc8b9b5caaa380e73055847175742b..16855b359269782ceb11c7eeeca680a59b04df5f 100644 --- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -111,6 +111,35 @@ export default function OllamaLLMOptions({ settings }) { Enter the URL where Ollama is running. </p> </div> + + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-2"> + Ollama Keep Alive + </label> + <select + name="OllamaLLMKeepAliveSeconds" + required={true} + className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"} + > + <option value="0">No cache</option> + <option value="300">5 minutes</option> + <option value="3600">1 hour</option> + <option value="-1">Forever</option> + </select> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Choose how long Ollama should keep your model in memory before + unloading. + <a + className="underline text-blue-300" + href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately" + target="_blank" + > + {" "} + Learn more → + </a> + </p> + </div> </div> </div> </div> diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 70ed526e781b53d11b5ad1a3a63b2d4fe0105514..cf5219d37a7a286e7ba2606533ea25aa0db066b4 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -411,6 +411,7 @@ const SystemSettings = { OllamaLLMBasePath: process.env.OLLAMA_BASE_PATH, OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF, OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT, + OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300, // TogetherAI Keys TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY, diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index 5c9f24f1e71ccef0b495cfccddb8eb98966a0daa..174670f2c79827f6f978a5a941928b107883c3e2 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -13,6 +13,9 @@ class OllamaAILLM { this.basePath = process.env.OLLAMA_BASE_PATH; this.model = modelPreference || process.env.OLLAMA_MODEL_PREF; + this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT + ? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT) + : 300; // Default 5-minute timeout for Ollama model loading. this.limits = { history: this.promptWindowLimit() * 0.15, system: this.promptWindowLimit() * 0.15, @@ -28,6 +31,7 @@ class OllamaAILLM { return new ChatOllama({ baseUrl: this.basePath, model: this.model, + keepAlive: this.keepAlive, useMLock: true, temperature, }); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index a1304b453a128f278ce12dd38ee6b4e1db3d0b36..5b3e2f66d05653e775f7419c2f10a6b0d2dea6db 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -101,6 +101,10 @@ const KEY_MAPPING = { envKey: "OLLAMA_MODEL_TOKEN_LIMIT", checks: [nonZero], }, + OllamaLLMKeepAliveSeconds: { + envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT", + checks: [isInteger], + }, // Mistral AI API Settings MistralApiKey: { @@ -454,6 +458,11 @@ function nonZero(input = "") { return Number(input) <= 0 ? "Value must be greater than zero" : null; } +function isInteger(input = "") { + if (isNaN(Number(input))) return "Value must be a number"; + return Number(input); +} + function isValidURL(input = "") { try { new URL(input);