Skip to content
Snippets Groups Projects
Unverified Commit 1a5aacb0 authored by Timothy Carambat's avatar Timothy Carambat Committed by GitHub
Browse files

Support multi-model whispers (#1444)

parent 3794ef8d
No related branches found
No related tags found
No related merge requests found
const fs = require("fs"); const fs = require("fs");
const path = require("path"); const path = require("path");
const { v4 } = require("uuid"); const { v4 } = require("uuid");
const defaultWhisper = "Xenova/whisper-small"; // Model Card: https://huggingface.co/Xenova/whisper-small
const fileSize = {
"Xenova/whisper-small": "250mb",
"Xenova/whisper-large": "1.56GB",
};
class LocalWhisper { class LocalWhisper {
constructor() { constructor({ options }) {
// Model Card: https://huggingface.co/Xenova/whisper-small this.model = options?.WhisperModelPref ?? defaultWhisper;
this.model = "Xenova/whisper-small"; this.fileSize = fileSize[this.model];
this.cacheDir = path.resolve( this.cacheDir = path.resolve(
process.env.STORAGE_DIR process.env.STORAGE_DIR
? path.resolve(process.env.STORAGE_DIR, `models`) ? path.resolve(process.env.STORAGE_DIR, `models`)
: path.resolve(__dirname, `../../../server/storage/models`) : path.resolve(__dirname, `../../../server/storage/models`)
); );
this.modelPath = path.resolve(this.cacheDir, "Xenova", "whisper-small"); this.modelPath = path.resolve(this.cacheDir, ...this.model.split("/"));
// Make directory when it does not exist in existing installations // Make directory when it does not exist in existing installations
if (!fs.existsSync(this.cacheDir)) if (!fs.existsSync(this.cacheDir))
fs.mkdirSync(this.cacheDir, { recursive: true }); fs.mkdirSync(this.cacheDir, { recursive: true });
...@@ -104,7 +108,7 @@ class LocalWhisper { ...@@ -104,7 +108,7 @@ class LocalWhisper {
async client() { async client() {
if (!fs.existsSync(this.modelPath)) { if (!fs.existsSync(this.modelPath)) {
this.#log( this.#log(
`The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)` `The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~${this.fileSize})`
); );
} }
......
import { Gauge } from "@phosphor-icons/react"; import { Gauge } from "@phosphor-icons/react";
export default function NativeTranscriptionOptions() { import { useState } from "react";
export default function NativeTranscriptionOptions({ settings }) {
const [model, setModel] = useState(settings?.WhisperModelPref);
return ( return (
<div className="w-full flex flex-col gap-y-4"> <div className="w-full flex flex-col gap-y-4">
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2"> <LocalWarning model={model} />
<div className="gap-x-2 flex items-center">
<Gauge size={25} />
<p className="text-sm">
Using the local whisper model on machines with limited RAM or CPU
can stall AnythingLLM when processing media files.
<br />
We recommend at least 2GB of RAM and upload files &lt;10Mb.
<br />
<br />
<i>
The built-in model will automatically download on the first use.
</i>
</p>
</div>
</div>
<div className="w-full flex items-center gap-4"> <div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4"> <label className="text-white text-sm font-semibold block mb-4">
Model Selection Model Selection
</label> </label>
<select <select
disabled={true} name="WhisperModelPref"
defaultValue={model}
onChange={(e) => setModel(e.target.value)}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
> >
<option disabled={true} selected={true}> {["Xenova/whisper-small", "Xenova/whisper-large"].map(
Xenova/whisper-small (value, i) => {
</option> return (
<option key={i} value={value}>
{value}
</option>
);
}
)}
</select> </select>
</div> </div>
</div> </div>
</div> </div>
); );
} }
function LocalWarning({ model }) {
switch (model) {
case "Xenova/whisper-small":
return <WhisperSmall />;
case "Xenova/whisper-large":
return <WhisperLarge />;
default:
return <WhisperSmall />;
}
}
function WhisperSmall() {
return (
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
<div className="gap-x-2 flex items-center">
<Gauge size={25} />
<p className="text-sm">
Running the <b>whisper-small</b> model on a machine with limited RAM
or CPU can stall AnythingLLM when processing media files.
<br />
We recommend at least 2GB of RAM and upload files &lt;10Mb.
<br />
<br />
<i>
This model will automatically download on the first use. (250mb)
</i>
</p>
</div>
</div>
);
}
function WhisperLarge() {
return (
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
<div className="gap-x-2 flex items-center">
<Gauge size={25} />
<p className="text-sm">
Using the <b>whisper-large</b> model on machines with limited RAM or
CPU can stall AnythingLLM when processing media files. This model is
substantially larger than the whisper-small.
<br />
We recommend at least 8GB of RAM and upload files &lt;10Mb.
<br />
<br />
<i>
This model will automatically download on the first use. (1.56GB)
</i>
</p>
</div>
</div>
);
}
...@@ -12,6 +12,23 @@ import LLMItem from "@/components/LLMSelection/LLMItem"; ...@@ -12,6 +12,23 @@ import LLMItem from "@/components/LLMSelection/LLMItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
import CTAButton from "@/components/lib/CTAButton"; import CTAButton from "@/components/lib/CTAButton";
const PROVIDERS = [
{
name: "OpenAI",
value: "openai",
logo: OpenAiLogo,
options: (settings) => <OpenAiWhisperOptions settings={settings} />,
description: "Leverage the OpenAI Whisper-large model using your API key.",
},
{
name: "AnythingLLM Built-In",
value: "local",
logo: AnythingLLMIcon,
options: (settings) => <NativeTranscriptionOptions settings={settings} />,
description: "Run a built-in whisper model on this instance privately.",
},
];
export default function TranscriptionModelPreference() { export default function TranscriptionModelPreference() {
const [saving, setSaving] = useState(false); const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false); const [hasChanges, setHasChanges] = useState(false);
...@@ -68,24 +85,6 @@ export default function TranscriptionModelPreference() { ...@@ -68,24 +85,6 @@ export default function TranscriptionModelPreference() {
fetchKeys(); fetchKeys();
}, []); }, []);
const PROVIDERS = [
{
name: "OpenAI",
value: "openai",
logo: OpenAiLogo,
options: <OpenAiWhisperOptions settings={settings} />,
description:
"Leverage the OpenAI Whisper-large model using your API key.",
},
{
name: "AnythingLLM Built-In",
value: "local",
logo: AnythingLLMIcon,
options: <NativeTranscriptionOptions settings={settings} />,
description: "Run a built-in whisper model on this instance privately.",
},
];
useEffect(() => { useEffect(() => {
const filtered = PROVIDERS.filter((provider) => const filtered = PROVIDERS.filter((provider) =>
provider.name.toLowerCase().includes(searchQuery.toLowerCase()) provider.name.toLowerCase().includes(searchQuery.toLowerCase())
...@@ -228,7 +227,7 @@ export default function TranscriptionModelPreference() { ...@@ -228,7 +227,7 @@ export default function TranscriptionModelPreference() {
{selectedProvider && {selectedProvider &&
PROVIDERS.find( PROVIDERS.find(
(provider) => provider.value === selectedProvider (provider) => provider.value === selectedProvider
)?.options} )?.options(settings)}
</div> </div>
</div> </div>
</form> </form>
......
...@@ -150,6 +150,8 @@ const SystemSettings = { ...@@ -150,6 +150,8 @@ const SystemSettings = {
// - then it can be shared. // - then it can be shared.
// -------------------------------------------------------- // --------------------------------------------------------
WhisperProvider: process.env.WHISPER_PROVIDER || "local", WhisperProvider: process.env.WHISPER_PROVIDER || "local",
WhisperModelPref:
process.env.WHISPER_MODEL_PREF || "Xenova/whisper-small",
// -------------------------------------------------------- // --------------------------------------------------------
// TTS/STT Selection Settings & Configs // TTS/STT Selection Settings & Configs
......
...@@ -17,6 +17,7 @@ class CollectorApi { ...@@ -17,6 +17,7 @@ class CollectorApi {
#attachOptions() { #attachOptions() {
return { return {
whisperProvider: process.env.WHISPER_PROVIDER || "local", whisperProvider: process.env.WHISPER_PROVIDER || "local",
WhisperModelPref: process.env.WHISPER_MODEL_PREF,
openAiKey: process.env.OPEN_AI_KEY || null, openAiKey: process.env.OPEN_AI_KEY || null,
}; };
} }
......
...@@ -356,6 +356,11 @@ const KEY_MAPPING = { ...@@ -356,6 +356,11 @@ const KEY_MAPPING = {
checks: [isNotEmpty, supportedTranscriptionProvider], checks: [isNotEmpty, supportedTranscriptionProvider],
postUpdate: [], postUpdate: [],
}, },
WhisperModelPref: {
envKey: "WHISPER_MODEL_PREF",
checks: [validLocalWhisper],
postUpdate: [],
},
// System Settings // System Settings
AuthToken: { AuthToken: {
...@@ -468,6 +473,16 @@ function supportedTTSProvider(input = "") { ...@@ -468,6 +473,16 @@ function supportedTTSProvider(input = "") {
return validSelection ? null : `${input} is not a valid TTS provider.`; return validSelection ? null : `${input} is not a valid TTS provider.`;
} }
function validLocalWhisper(input = "") {
const validSelection = [
"Xenova/whisper-small",
"Xenova/whisper-large",
].includes(input);
return validSelection
? null
: `${input} is not a valid Whisper model selection.`;
}
function supportedLLM(input = "") { function supportedLLM(input = "") {
const validSelection = [ const validSelection = [
"openai", "openai",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment