From 3fd0fe8fc5884c89200b2f89eceff75ee92c2133 Mon Sep 17 00:00:00 2001 From: Sushanth Srivatsa <65614803+ssbodapati@users.noreply.github.com> Date: Wed, 19 Feb 2025 05:30:17 +0530 Subject: [PATCH] 2749 ollama client auth token (#3005) * ollama auth token provision * auth token provision * ollama auth provision * ollama auth token * ollama auth provision * token input field css fix * Fix provider handler not using key sensible fallback to not break existing installs re-order of input fields null-check for API key and header optional insert on request linting * apply header and auth to agent invocations * upgrading to ollama 5.10 for passing headers to constructor * rename Auth systemSetting key to be more descriptive linting and copy * remove untracked files + update gitignore * remove debug * patch lockfile --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> --- docker/.env.example | 1 + .../LLMSelection/OllamaLLMOptions/index.jsx | 260 ++++++++++-------- .../hooks/useProviderEndpointAutoDiscovery.js | 27 +- server/.env.example | 1 + server/models/systemSettings.js | 1 + server/package.json | 4 +- server/storage/models/.gitignore | 3 +- server/utils/AiProviders/ollama/index.js | 11 +- .../utils/agents/aibitat/providers/ollama.js | 8 +- server/utils/helpers/customModels.js | 11 +- server/utils/helpers/updateENV.js | 4 + server/yarn.lock | 8 +- 12 files changed, 211 insertions(+), 128 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 5f1b0e44c..d3cc68e12 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -42,6 +42,7 @@ GID='1000' # OLLAMA_BASE_PATH='http://host.docker.internal:11434' # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 +# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)' # LLM_PROVIDER='togetherai' # TOGETHER_AI_API_KEY='my-together-ai-key' diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx index d04f7cb62..c98877c5e 100644 --- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -11,12 +11,15 @@ export default function OllamaLLMOptions({ settings }) { autoDetecting: loading, basePath, basePathValue, + authToken, + authTokenValue, showAdvancedControls, setShowAdvancedControls, handleAutoDetectClick, } = useProviderEndpointAutoDiscovery({ provider: "ollama", initialBasePath: settings?.OllamaLLMBasePath, + initialAuthToken: settings?.OllamaLLMAuthToken, ENDPOINTS: OLLAMA_COMMON_URLS, }); const [performanceMode, setPerformanceMode] = useState( @@ -32,6 +35,7 @@ export default function OllamaLLMOptions({ settings }) { <OllamaLLMModelSelection settings={settings} basePath={basePath.value} + authToken={authToken.value} /> <div className="flex flex-col w-60"> <label className="text-white text-sm font-semibold block mb-2"> @@ -73,120 +77,146 @@ export default function OllamaLLMOptions({ settings }) { </div> <div hidden={!showAdvancedControls}> - <div className="w-full flex items-start gap-4"> - <div className="flex flex-col w-60"> - <div className="flex justify-between items-center mb-2"> - <label className="text-white text-sm font-semibold"> - Ollama Base URL - </label> - {loading ? ( - <PreLoader size="6" /> - ) : ( - <> - {!basePathValue.value && ( - <button - onClick={handleAutoDetectClick} - className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]" - > - Auto-Detect - </button> - )} - </> - )} + <div className="flex flex-col"> + <div className="w-full flex items-start gap-4"> + <div className="flex flex-col w-60"> + <div className="flex justify-between items-center mb-2"> + <label className="text-white text-sm font-semibold"> + Ollama Base URL + </label> + {loading ? ( + <PreLoader size="6" /> + ) : ( + <> + {!basePathValue.value && ( + <button + onClick={handleAutoDetectClick} + className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]" + > + Auto-Detect + </button> + )} + </> + )} + </div> + <input + type="url" + name="OllamaLLMBasePath" + className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" + placeholder="http://127.0.0.1:11434" + value={basePathValue.value} + required={true} + autoComplete="off" + spellCheck={false} + onChange={basePath.onChange} + onBlur={basePath.onBlur} + /> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Enter the URL where Ollama is running. + </p> </div> - <input - type="url" - name="OllamaLLMBasePath" - className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5" - placeholder="http://127.0.0.1:11434" - value={basePathValue.value} - required={true} - autoComplete="off" - spellCheck={false} - onChange={basePath.onChange} - onBlur={basePath.onBlur} - /> - <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> - Enter the URL where Ollama is running. - </p> - </div> - - <div className="flex flex-col w-60"> - <label className="text-white text-sm font-semibold block mb-2"> - Ollama Keep Alive - </label> - <select - name="OllamaLLMKeepAliveSeconds" - required={true} - className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" - defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"} - > - <option value="0">No cache</option> - <option value="300">5 minutes</option> - <option value="3600">1 hour</option> - <option value="-1">Forever</option> - </select> - <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> - Choose how long Ollama should keep your model in memory before - unloading. - <a - className="underline text-blue-300" - href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately" - target="_blank" - rel="noreferrer" + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold mb-2 flex items-center"> + Performance Mode + <Info + size={16} + className="ml-2 text-white" + data-tooltip-id="performance-mode-tooltip" + /> + </label> + <select + name="OllamaLLMPerformanceMode" + required={true} + className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + value={performanceMode} + onChange={(e) => setPerformanceMode(e.target.value)} > - {" "} - Learn more → - </a> - </p> - </div> - - <div className="flex flex-col w-60"> - <label className="text-white text-sm font-semibold mb-2 flex items-center"> - Performance Mode - <Info - size={16} - className="ml-2 text-white" - data-tooltip-id="performance-mode-tooltip" - /> - </label> - <select - name="OllamaLLMPerformanceMode" - required={true} - className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" - value={performanceMode} - onChange={(e) => setPerformanceMode(e.target.value)} - > - <option value="base">Base (Default)</option> - <option value="maximum">Maximum</option> - </select> - <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> - Choose the performance mode for the Ollama model. - </p> - <Tooltip - id="performance-mode-tooltip" - place="bottom" - className="tooltip !text-xs max-w-xs" - > - <p className="text-red-500"> - <strong>Note:</strong> Be careful with the Maximum mode. It may - increase resource usage significantly. + <option value="base">Base (Default)</option> + <option value="maximum">Maximum</option> + </select> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Choose the performance mode for the Ollama model. </p> - <br /> - <p> - <strong>Base:</strong> Ollama automatically limits the context - to 2048 tokens, keeping resources usage low while maintaining - good performance. Suitable for most users and models. + <Tooltip + id="performance-mode-tooltip" + place="bottom" + className="tooltip !text-xs max-w-xs" + > + <p className="text-red-500"> + <strong>Note:</strong> Be careful with the Maximum mode. It + may increase resource usage significantly. + </p> + <br /> + <p> + <strong>Base:</strong> Ollama automatically limits the context + to 2048 tokens, keeping resources usage low while maintaining + good performance. Suitable for most users and models. + </p> + <br /> + <p> + <strong>Maximum:</strong> Uses the full context window (up to + Max Tokens). Will result in increased resource usage but + allows for larger context conversations. <br /> + <br /> + This is not recommended for most users. + </p> + </Tooltip> + </div> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-2"> + Ollama Keep Alive + </label> + <select + name="OllamaLLMKeepAliveSeconds" + required={true} + className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"} + > + <option value="0">No cache</option> + <option value="300">5 minutes</option> + <option value="3600">1 hour</option> + <option value="-1">Forever</option> + </select> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Choose how long Ollama should keep your model in memory before + unloading. + <a + className="underline text-blue-300" + href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately" + target="_blank" + rel="noreferrer" + > + {" "} + Learn more → + </a> </p> - <br /> - <p> - <strong>Maximum:</strong> Uses the full context window (up to - Max Tokens). Will result in increased resource usage but allows - for larger context conversations. <br /> + </div> + </div> + <div className="w-full flex items-start gap-4"> + <div className="flex flex-col w-100"> + <label className="text-white text-sm font-semibold"> + Auth Token + </label> + <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2"> + Enter a <code>Bearer</code> Auth Token for interacting with your + Ollama server. <br /> - This is not recommended for most users. + Used <b>only</b> if running Ollama behind an authentication + server. </p> - </Tooltip> + <input + type="password" + name="OllamaLLMAuthToken" + className="border-none bg-theme-settings-input-bg mt-2 text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg outline-none block w-full p-2.5" + placeholder="Ollama Auth Token" + value={authTokenValue.value} + onChange={authToken.onChange} + onBlur={authToken.onBlur} + required={false} + autoComplete="off" + spellCheck={false} + /> + </div> </div> </div> </div> @@ -194,7 +224,11 @@ export default function OllamaLLMOptions({ settings }) { ); } -function OllamaLLMModelSelection({ settings, basePath = null }) { +function OllamaLLMModelSelection({ + settings, + basePath = null, + authToken = null, +}) { const [customModels, setCustomModels] = useState([]); const [loading, setLoading] = useState(true); @@ -207,7 +241,11 @@ function OllamaLLMModelSelection({ settings, basePath = null }) { } setLoading(true); try { - const { models } = await System.customModels("ollama", null, basePath); + const { models } = await System.customModels( + "ollama", + authToken, + basePath + ); setCustomModels(models || []); } catch (error) { console.error("Failed to fetch custom models:", error); @@ -216,7 +254,7 @@ function OllamaLLMModelSelection({ settings, basePath = null }) { setLoading(false); } findCustomModels(); - }, [basePath]); + }, [basePath, authToken]); if (loading || customModels.length == 0) { return ( diff --git a/frontend/src/hooks/useProviderEndpointAutoDiscovery.js b/frontend/src/hooks/useProviderEndpointAutoDiscovery.js index 956b09075..aff13d300 100644 --- a/frontend/src/hooks/useProviderEndpointAutoDiscovery.js +++ b/frontend/src/hooks/useProviderEndpointAutoDiscovery.js @@ -5,11 +5,15 @@ import showToast from "@/utils/toast"; export default function useProviderEndpointAutoDiscovery({ provider = null, initialBasePath = "", + initialAuthToken = null, ENDPOINTS = [], }) { const [loading, setLoading] = useState(false); const [basePath, setBasePath] = useState(initialBasePath); const [basePathValue, setBasePathValue] = useState(initialBasePath); + + const [authToken, setAuthToken] = useState(initialAuthToken); + const [authTokenValue, setAuthTokenValue] = useState(initialAuthToken); const [autoDetectAttempted, setAutoDetectAttempted] = useState(false); const [showAdvancedControls, setShowAdvancedControls] = useState(true); @@ -20,7 +24,7 @@ export default function useProviderEndpointAutoDiscovery({ ENDPOINTS.forEach((endpoint) => { possibleEndpoints.push( new Promise((resolve, reject) => { - System.customModels(provider, null, endpoint, 2_000) + System.customModels(provider, authTokenValue, endpoint, 2_000) .then((results) => { if (!results?.models || results.models.length === 0) throw new Error("No models"); @@ -74,9 +78,18 @@ export default function useProviderEndpointAutoDiscovery({ setBasePath(basePathValue); } + function handleAuthTokenChange(e) { + const value = e.target.value; + setAuthTokenValue(value); + } + + function handleAuthTokenBlur() { + setAuthToken(authTokenValue); + } + useEffect(() => { if (!initialBasePath && !autoDetectAttempted) autoDetect(true); - }, [initialBasePath, autoDetectAttempted]); + }, [initialBasePath, initialAuthToken, autoDetectAttempted]); return { autoDetecting: loading, @@ -93,6 +106,16 @@ export default function useProviderEndpointAutoDiscovery({ value: basePathValue, set: setBasePathValue, }, + authToken: { + value: authToken, + set: setAuthTokenValue, + onChange: handleAuthTokenChange, + onBlur: handleAuthTokenBlur, + }, + authTokenValue: { + value: authTokenValue, + set: setAuthTokenValue, + }, handleAutoDetectClick, runAutoDetect: autoDetect, }; diff --git a/server/.env.example b/server/.env.example index 6ae9d8e63..1975ada8b 100644 --- a/server/.env.example +++ b/server/.env.example @@ -39,6 +39,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # OLLAMA_BASE_PATH='http://host.docker.internal:11434' # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 +# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)' # LLM_PROVIDER='togetherai' # TOGETHER_AI_API_KEY='my-together-ai-key' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 7f7d0ea34..58087f7b7 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -471,6 +471,7 @@ const SystemSettings = { OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT, OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300, OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base", + OllamaLLMAuthToken: process.env.OLLAMA_AUTH_TOKEN ?? null, // Novita LLM Keys NovitaLLMApiKey: !!process.env.NOVITA_LLM_API_KEY, diff --git a/server/package.json b/server/package.json index 497a0d00a..25753ad11 100644 --- a/server/package.json +++ b/server/package.json @@ -63,7 +63,7 @@ "mssql": "^10.0.2", "multer": "^1.4.5-lts.1", "mysql2": "^3.9.8", - "ollama": "^0.5.0", + "ollama": "^0.5.10", "openai": "4.38.5", "pg": "^8.11.5", "pinecone-client": "^1.1.0", @@ -97,4 +97,4 @@ "nodemon": "^2.0.22", "prettier": "^3.0.3" } -} +} \ No newline at end of file diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index 037663a35..12436b21f 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -7,4 +7,5 @@ novita mixedbread-ai* gemini togetherAi -tesseract \ No newline at end of file +tesseract +ppio \ No newline at end of file diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index 5c53dd5f4..25ea07e1c 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -15,6 +15,7 @@ class OllamaAILLM { if (!process.env.OLLAMA_BASE_PATH) throw new Error("No Ollama Base Path was set."); + this.authToken = process.env.OLLAMA_AUTH_TOKEN; this.basePath = process.env.OLLAMA_BASE_PATH; this.model = modelPreference || process.env.OLLAMA_MODEL_PREF; this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base"; @@ -27,7 +28,10 @@ class OllamaAILLM { user: this.promptWindowLimit() * 0.7, }; - this.client = new Ollama({ host: this.basePath }); + const headers = this.authToken + ? { Authorization: `Bearer ${this.authToken}` } + : {}; + this.client = new Ollama({ host: this.basePath, headers: headers }); this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; this.#log( @@ -273,9 +277,8 @@ class OllamaAILLM { type: "textResponseChunk", textResponse: "", close: true, - error: `Ollama:streaming - could not stream chat. ${ - error?.cause ?? error.message - }`, + error: `Ollama:streaming - could not stream chat. ${error?.cause ?? error.message + }`, }); response.removeListener("close", handleAbort); stream?.endMeasurement(usage); diff --git a/server/utils/agents/aibitat/providers/ollama.js b/server/utils/agents/aibitat/providers/ollama.js index 5cb529e51..5261c5b7e 100644 --- a/server/utils/agents/aibitat/providers/ollama.js +++ b/server/utils/agents/aibitat/providers/ollama.js @@ -16,7 +16,13 @@ class OllamaProvider extends InheritMultiple([Provider, UnTooled]) { } = config; super(); - this._client = new Ollama({ host: process.env.OLLAMA_BASE_PATH }); + const headers = process.env.OLLAMA_AUTH_TOKEN + ? { Authorization: `Bearer ${process.env.OLLAMA_AUTH_TOKEN}` } + : {}; + this._client = new Ollama({ + host: process.env.OLLAMA_BASE_PATH, + headers: headers, + }); this.model = model; this.verbose = true; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 3f211d431..32cb977d5 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -41,7 +41,7 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { case "localai": return await localAIModels(basePath, apiKey); case "ollama": - return await ollamaAIModels(basePath); + return await ollamaAIModels(basePath, apiKey); case "togetherai": return await getTogetherAiModels(apiKey); case "fireworksai": @@ -292,7 +292,7 @@ async function getKoboldCPPModels(basePath = null) { } } -async function ollamaAIModels(basePath = null) { +async function ollamaAIModels(basePath = null, _authToken = null) { let url; try { let urlPath = basePath ?? process.env.OLLAMA_BASE_PATH; @@ -304,7 +304,9 @@ async function ollamaAIModels(basePath = null) { return { models: [], error: "Not a valid URL." }; } - const models = await fetch(`${url}/api/tags`) + const authToken = _authToken || process.env.OLLAMA_AUTH_TOKEN || null; + const headers = authToken ? { Authorization: `Bearer ${authToken}` } : {}; + const models = await fetch(`${url}/api/tags`, { headers: headers }) .then((res) => { if (!res.ok) throw new Error(`Could not reach Ollama server! ${res.status}`); @@ -321,6 +323,9 @@ async function ollamaAIModels(basePath = null) { return []; }); + // Api Key was successful so lets save it for future uses + if (models.length > 0 && !!authToken) + process.env.OLLAMA_AUTH_TOKEN = authToken; return { models, error: null }; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index e3c1b9140..d0c50a8c0 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -120,6 +120,10 @@ const KEY_MAPPING = { envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT", checks: [isInteger], }, + OllamaLLMAuthToken: { + envKey: "OLLAMA_AUTH_TOKEN", + checks: [], + }, // Mistral AI API Settings MistralApiKey: { diff --git a/server/yarn.lock b/server/yarn.lock index 90e5e5749..a070488b0 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -5198,10 +5198,10 @@ object.values@^1.1.6, object.values@^1.1.7: define-properties "^1.2.1" es-object-atoms "^1.0.0" -ollama@^0.5.0: - version "0.5.0" - resolved "https://registry.yarnpkg.com/ollama/-/ollama-0.5.0.tgz#cb9bc709d4d3278c9f484f751b0d9b98b06f4859" - integrity sha512-CRtRzsho210EGdK52GrUMohA2pU+7NbgEaBG3DcYeRmvQthDO7E2LHOkLlUUeaYUlNmEd8icbjC02ug9meSYnw== +ollama@^0.5.10: + version "0.5.12" + resolved "https://registry.yarnpkg.com/ollama/-/ollama-0.5.12.tgz#d8aadfaff076b2852cf826d928a03d9a40f308b9" + integrity sha512-flVH1fn1c9NF7VV3bW9kSu0E+bYc40b4DxL/gS2Debhao35osJFRDiPOj9sIWTMvcyj78Paw1OuhfIe7uhDWfQ== dependencies: whatwg-fetch "^3.6.20" -- GitLab