diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index ad7c08d35c2a407c9c78499da0da7f015b8347af..b3ecab4b1738dc9cd7952d1f96e6af29a03986f8 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -21,10 +21,50 @@ import { useModal } from "@/hooks/useModal"; import ModalWrapper from "@/components/ModalWrapper"; import CTAButton from "@/components/lib/CTAButton"; +const EMBEDDERS = [ + { + name: "AnythingLLM Embedder", + value: "native", + logo: AnythingLLMIcon, + options: (settings) => <NativeEmbeddingOptions settings={settings} />, + description: + "Use the built-in embedding engine for AnythingLLM. Zero setup!", + }, + { + name: "OpenAI", + value: "openai", + logo: OpenAiLogo, + options: (settings) => <OpenAiOptions settings={settings} />, + description: "The standard option for most non-commercial use.", + }, + { + name: "Azure OpenAI", + value: "azure", + logo: AzureOpenAiLogo, + options: (settings) => <AzureAiOptions settings={settings} />, + description: "The enterprise option of OpenAI hosted on Azure services.", + }, + { + name: "Local AI", + value: "localai", + logo: LocalAiLogo, + options: (settings) => <LocalAiOptions settings={settings} />, + description: "Run embedding models locally on your own machine.", + }, + { + name: "Ollama", + value: "ollama", + logo: OllamaLogo, + options: (settings) => <OllamaEmbeddingOptions settings={settings} />, + description: "Run embedding models locally on your own machine.", + }, +]; + export default function GeneralEmbeddingPreference() { const [saving, setSaving] = useState(false); const [hasChanges, setHasChanges] = useState(false); const [hasEmbeddings, setHasEmbeddings] = useState(false); + const [hasCachedEmbeddings, setHasCachedEmbeddings] = useState(false); const [settings, setSettings] = useState(null); const [loading, setLoading] = useState(true); const [searchQuery, setSearchQuery] = useState(""); @@ -34,12 +74,24 @@ export default function GeneralEmbeddingPreference() { const searchInputRef = useRef(null); const { isOpen, openModal, closeModal } = useModal(); + function embedderModelChanged(formEl) { + try { + const newModel = new FormData(formEl).get("EmbeddingModelPref") ?? null; + if (newModel === null) return false; + return settings?.EmbeddingModelPref !== newModel; + } catch (error) { + console.error(error); + } + return false; + } + const handleSubmit = async (e) => { e.preventDefault(); if ( - selectedEmbedder !== settings?.EmbeddingEngine && + (selectedEmbedder !== settings?.EmbeddingEngine || + embedderModelChanged(e.target)) && hasChanges && - hasEmbeddings + (hasEmbeddings || hasCachedEmbeddings) ) { openModal(); } else { @@ -89,50 +141,12 @@ export default function GeneralEmbeddingPreference() { setSettings(_settings); setSelectedEmbedder(_settings?.EmbeddingEngine || "native"); setHasEmbeddings(_settings?.HasExistingEmbeddings || false); + setHasCachedEmbeddings(_settings?.HasCachedEmbeddings || false); setLoading(false); } fetchKeys(); }, []); - const EMBEDDERS = [ - { - name: "AnythingLLM Embedder", - value: "native", - logo: AnythingLLMIcon, - options: <NativeEmbeddingOptions settings={settings} />, - description: - "Use the built-in embedding engine for AnythingLLM. Zero setup!", - }, - { - name: "OpenAI", - value: "openai", - logo: OpenAiLogo, - options: <OpenAiOptions settings={settings} />, - description: "The standard option for most non-commercial use.", - }, - { - name: "Azure OpenAI", - value: "azure", - logo: AzureOpenAiLogo, - options: <AzureAiOptions settings={settings} />, - description: "The enterprise option of OpenAI hosted on Azure services.", - }, - { - name: "Local AI", - value: "localai", - logo: LocalAiLogo, - options: <LocalAiOptions settings={settings} />, - description: "Run embedding models locally on your own machine.", - }, - { - name: "Ollama", - value: "ollama", - logo: OllamaLogo, - options: <OllamaEmbeddingOptions settings={settings} />, - description: "Run embedding models locally on your own machine.", - }, - ]; - useEffect(() => { const filtered = EMBEDDERS.filter((embedder) => embedder.name.toLowerCase().includes(searchQuery.toLowerCase()) @@ -282,7 +296,7 @@ export default function GeneralEmbeddingPreference() { {selectedEmbedder && EMBEDDERS.find( (embedder) => embedder.value === selectedEmbedder - )?.options} + )?.options(settings)} </div> </div> </form> @@ -290,7 +304,7 @@ export default function GeneralEmbeddingPreference() { )} <ModalWrapper isOpen={isOpen}> <ChangeWarningModal - warningText="Switching the vector database will ignore previously embedded documents and future similarity search results. They will need to be re-added to each workspace." + warningText="Switching the embedding model will break previously embedded documents from working during chat. They will need to un-embed from every workspace and fully removed and re-uploaded so they can be embed by the new embedding model." onClose={closeModal} onConfirm={handleSaveSettings} /> diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index a6a7e50f0d9a521db45d0399c0f27fda34af0841..20c161cd52dde343ddd3992d35955911bc4e2db2 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -87,6 +87,7 @@ const SystemSettings = { }, }, currentSettings: async function () { + const { hasVectorCachedFiles } = require("../utils/files"); const llmProvider = process.env.LLM_PROVIDER; const vectorDB = process.env.VECTOR_DB; return { @@ -104,7 +105,8 @@ const SystemSettings = { // Embedder Provider Selection Settings & Configs // -------------------------------------------------------- EmbeddingEngine: process.env.EMBEDDING_ENGINE, - HasExistingEmbeddings: await this.hasEmbeddings(), + HasExistingEmbeddings: await this.hasEmbeddings(), // check if they have any currently embedded documents active in workspaces. + HasCachedEmbeddings: hasVectorCachedFiles(), // check if they any currently cached embedded docs. EmbeddingBasePath: process.env.EMBEDDING_BASE_PATH, EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, EmbeddingModelMaxChunkLength: diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 83dd229cca6ddcfebe8cb87c8fe4ad4c8a72a62b..fea6f7f7e139c6357c997f64618384c4c9517c3f 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -192,6 +192,19 @@ function normalizePath(filepath = "") { return result; } +// Check if the vector-cache folder is empty or not +// useful for it the user is changing embedders as this will +// break the previous cache. +function hasVectorCachedFiles() { + try { + return ( + fs.readdirSync(vectorCachePath)?.filter((name) => name.endsWith(".json")) + .length !== 0 + ); + } catch {} + return false; +} + module.exports = { findDocumentInDocuments, cachedVectorInformation, @@ -203,4 +216,5 @@ module.exports = { normalizePath, isWithin, documentsPath, + hasVectorCachedFiles, };