diff --git a/.hadolint.yaml b/.hadolint.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b76a5107288288a6d6300380d8558b50bcaea578 --- /dev/null +++ b/.hadolint.yaml @@ -0,0 +1,8 @@ +failure-threshold: warning +ignored: + - DL3008 + - DL3013 +format: tty +trustedRegistries: + - docker.io + - gcr.io diff --git a/.vscode/settings.json b/.vscode/settings.json index 096f1c9f30670c4701bfe5651b2c4870715f9433..72b612b862d91612789e701e6feb5f9991844d4e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,16 +4,20 @@ "Astra", "Dockerized", "Embeddable", + "GROQ", "hljs", + "inferencing", "Langchain", "Milvus", "Mintplex", "Ollama", "openai", + "openrouter", "Qdrant", "vectordbs", "Weaviate", "Zilliz" ], - "eslint.experimental.useFlatConfig": true -} \ No newline at end of file + "eslint.experimental.useFlatConfig": true, + "docker.languageserver.formatter.ignoreMultilineInstructions": true +} diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js index 3e6ce3445e19adf42612d435b26b87e4f3439413..4bca62f9f9c51ad4d8f6bc0a725c7c0460927952 100644 --- a/collector/utils/files/index.js +++ b/collector/utils/files/index.js @@ -1,28 +1,16 @@ const fs = require("fs"); const path = require("path"); -const { getType } = require("mime"); +const { MimeDetector } = require("./mime"); function isTextType(filepath) { - if (!fs.existsSync(filepath)) return false; - // These are types of mime primary classes that for sure - // cannot also for forced into a text type. - const nonTextTypes = ["multipart", "image", "model", "audio", "video"]; - // These are full-mimes we for sure cannot parse or interpret as text - // documents - const BAD_MIMES = [ - "application/octet-stream", - "application/zip", - "application/pkcs8", - "application/vnd.microsoft.portable-executable", - "application/x-msdownload", - ]; - try { - const mime = getType(filepath); - if (BAD_MIMES.includes(mime)) return false; + if (!fs.existsSync(filepath)) return false; + const mimeLib = new MimeDetector(); + const mime = mimeLib.getType(filepath); + if (mimeLib.badMimes.includes(mime)) return false; const type = mime.split("/")[0]; - if (nonTextTypes.includes(type)) return false; + if (mimeLib.nonTextTypes.includes(type)) return false; return true; } catch { return false; diff --git a/collector/utils/files/mime.js b/collector/utils/files/mime.js new file mode 100644 index 0000000000000000000000000000000000000000..feabd62092a10bf736219488b6259735813aad1b --- /dev/null +++ b/collector/utils/files/mime.js @@ -0,0 +1,37 @@ +const MimeLib = require("mime"); + +class MimeDetector { + nonTextTypes = ["multipart", "image", "model", "audio", "video"]; + badMimes = [ + "application/octet-stream", + "application/zip", + "application/pkcs8", + "application/vnd.microsoft.portable-executable", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // XLSX are binaries and need to be handled explicitly. + "application/x-msdownload", + ]; + + constructor() { + this.lib = MimeLib; + this.setOverrides(); + } + + setOverrides() { + // the .ts extension maps to video/mp2t because of https://en.wikipedia.org/wiki/MPEG_transport_stream + // which has had this extension far before TS was invented. So need to force re-map this MIME map. + this.lib.define( + { + "text/plain": ["ts", "py", "opts", "lock", "jsonl"], + }, + true + ); + } + + getType(filepath) { + return this.lib.getType(filepath); + } +} + +module.exports = { + MimeDetector, +}; diff --git a/docker/.env.example b/docker/.env.example index ba33bd5c0433028bef8c4f00114042a357ef56ab..ae4913dc44c6680ee8fdd74e0f1175714c23a325 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -61,6 +61,10 @@ GID='1000' # HUGGING_FACE_LLM_API_KEY=hf_xxxxxx # HUGGING_FACE_LLM_TOKEN_LIMIT=8000 +# LLM_PROVIDER='groq' +# GROQ_API_KEY=gsk_abcxyz +# GROQ_MODEL_PREF=llama2-70b-4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/docker/Dockerfile b/docker/Dockerfile index b1ea62a63f14d149d8c6c715e939cb34e99941b4..2edbadb2cfd64aff05f30fcb7548384bc791766c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,12 +1,17 @@ # Setup base image -FROM ubuntu:jammy-20230522 AS base +FROM ubuntu:jammy-20230916 AS base +# Build arguments ARG ARG_UID=1000 ARG ARG_GID=1000 FROM base AS build-arm64 RUN echo "Preparing build of AnythingLLM image for arm64 architecture" +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install system dependencies +# hadolint ignore=DL3008,DL3013 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \ unzip curl gnupg libgfortran5 libgbm1 tzdata netcat \ @@ -25,8 +30,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ && rm yarn_1.22.19_all.deb # Create a group and user with specific UID and GID -RUN groupadd -g $ARG_GID anythingllm && \ - useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \ +RUN groupadd -g "$ARG_GID" anythingllm && \ + useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \ mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app # Copy docker helper scripts @@ -61,6 +66,10 @@ RUN echo "Done running arm64 specific installtion steps" FROM base AS build-amd64 RUN echo "Preparing build of AnythingLLM image for non-ARM architecture" +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install system dependencies +# hadolint ignore=DL3008,DL3013 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \ curl gnupg libgfortran5 libgbm1 tzdata netcat \ @@ -79,8 +88,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ && rm yarn_1.22.19_all.deb # Create a group and user with specific UID and GID -RUN groupadd -g $ARG_GID anythingllm && \ - useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \ +RUN groupadd -g "$ARG_GID" anythingllm && \ + useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \ mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app # Copy docker helper scripts @@ -95,6 +104,8 @@ RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \ ############################################# # COMMON BUILD FLOW FOR ALL ARCHS ############################################# + +# hadolint ignore=DL3006 FROM build-${TARGETARCH} AS build RUN echo "Running common build flow of AnythingLLM image for all architectures" @@ -102,43 +113,54 @@ USER anythingllm WORKDIR /app # Install frontend dependencies -FROM build as frontend-deps +FROM build AS frontend-deps COPY ./frontend/package.json ./frontend/yarn.lock ./frontend/ -RUN cd ./frontend/ && yarn install --network-timeout 100000 && yarn cache clean +WORKDIR /app/frontend +RUN yarn install --network-timeout 100000 && yarn cache clean +WORKDIR /app # Install server dependencies -FROM build as server-deps +FROM build AS server-deps COPY ./server/package.json ./server/yarn.lock ./server/ -RUN cd ./server/ && yarn install --production --network-timeout 100000 && yarn cache clean +WORKDIR /app/server +RUN yarn install --production --network-timeout 100000 && yarn cache clean +WORKDIR /app # Compile Llama.cpp bindings for node-llama-cpp for this operating system. USER root -RUN cd ./server && npx --no node-llama-cpp download +WORKDIR /app/server +RUN npx --no node-llama-cpp download +WORKDIR /app USER anythingllm # Build the frontend -FROM frontend-deps as build-stage +FROM frontend-deps AS build-stage COPY ./frontend/ ./frontend/ -RUN cd ./frontend/ && yarn build && yarn cache clean +WORKDIR /app/frontend +RUN yarn build && yarn cache clean +WORKDIR /app # Setup the server -FROM server-deps as production-stage +FROM server-deps AS production-stage COPY --chown=anythingllm:anythingllm ./server/ ./server/ # Copy built static frontend files to the server public directory -COPY --from=build-stage /app/frontend/dist ./server/public +COPY --chown=anythingllm:anythingllm --from=build-stage /app/frontend/dist ./server/public # Copy the collector COPY --chown=anythingllm:anythingllm ./collector/ ./collector/ # Install collector dependencies +WORKDIR /app/collector ENV PUPPETEER_DOWNLOAD_BASE_URL=https://storage.googleapis.com/chrome-for-testing-public -RUN cd /app/collector && yarn install --production --network-timeout 100000 && yarn cache clean +RUN yarn install --production --network-timeout 100000 && yarn cache clean # Migrate and Run Prisma against known schema -RUN cd ./server && npx prisma generate --schema=./prisma/schema.prisma -RUN cd ./server && npx prisma migrate deploy --schema=./prisma/schema.prisma +WORKDIR /app/server +RUN npx prisma generate --schema=./prisma/schema.prisma && \ + npx prisma migrate deploy --schema=./prisma/schema.prisma +WORKDIR /app # Setup the environment ENV NODE_ENV=production @@ -152,4 +174,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \ CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1 # Run the server -ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"] diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 3d890db1deef40ccd1bdd5d7b4000c9f04eed581..1ac69e5baf46ff19c0d817460b63d5f7e73fc52f 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -1,9 +1,10 @@ #!/bin/bash -{ cd /app/server/ &&\ - npx prisma generate --schema=./prisma/schema.prisma &&\ - npx prisma migrate deploy --schema=./prisma/schema.prisma &&\ - node /app/server/index.js +{ + cd /app/server/ && + npx prisma generate --schema=./prisma/schema.prisma && + npx prisma migrate deploy --schema=./prisma/schema.prisma && + node /app/server/index.js } & { node /app/collector/index.js; } & wait -n -exit $? \ No newline at end of file +exit $? diff --git a/docker/docker-healthcheck.sh b/docker/docker-healthcheck.sh index 45a88477d9195d322973d2aa1fbd45461310af08..49bee3e1bf0e090754226ffd7baf2f5c10e5eb4f 100644 --- a/docker/docker-healthcheck.sh +++ b/docker/docker-healthcheck.sh @@ -4,10 +4,10 @@ response=$(curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:3001/api/ping) # If the HTTP response code is 200 (OK), the server is up -if [ $response -eq 200 ]; then - echo "Server is up" - exit 0 +if [ "$response" -eq 200 ]; then + echo "Server is up" + exit 0 else - echo "Server is down" - exit 1 + echo "Server is down" + exit 1 fi diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx index 3d493f1c43679676152b5435adcea1f27edead35..6bc18a5ac6f367fd21650e551107e820ebbc57aa 100644 --- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx @@ -48,7 +48,13 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) { required={true} className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" > - {["claude-2", "claude-instant-1"].map((model) => { + {[ + "claude-instant-1.2", + "claude-2.0", + "claude-2.1", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + ].map((model) => { return ( <option key={model} value={model}> {model} diff --git a/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx b/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..cc6fbbcc061960609ef79e6e8108d0264150709c --- /dev/null +++ b/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx @@ -0,0 +1,41 @@ +export default function GroqAiOptions({ settings }) { + return ( + <div className="flex gap-x-4"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Groq API Key + </label> + <input + type="password" + name="GroqApiKey" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="Groq API Key" + defaultValue={settings?.GroqApiKey ? "*".repeat(20) : ""} + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-4"> + Chat Model Selection + </label> + <select + name="GroqModelPref" + defaultValue={settings?.GroqModelPref || "llama2-70b-4096"} + required={true} + className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + {["llama2-70b-4096", "mixtral-8x7b-32768"].map((model) => { + return ( + <option key={model} value={model}> + {model} + </option> + ); + })} + </select> + </div> + </div> + ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx index 2ed9110266db6c7fd362ea6566ead22b928f16c7..406fda8687fef28f647afff8d3be9498097307f2 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx @@ -105,7 +105,7 @@ export default function UploadFile({ workspace, fetchKeys, setLoading }) { </div> </div> ) : ( - <div className="grid grid-cols-2 gap-2 overflow-auto max-h-[400px] p-1 overflow-y-auto"> + <div className="grid grid-cols-2 gap-2 overflow-auto max-h-[180px] p-1 overflow-y-scroll no-scroll"> {files.map((file) => ( <FileUploadProgress key={file.uid} diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js index 1f8cce9889f04844f9d058dd06f225ca89b3c24b..57a95ea7aeb2b41e09c04fe7819894ffcfe618f8 100644 --- a/frontend/src/hooks/useGetProvidersModels.js +++ b/frontend/src/hooks/useGetProvidersModels.js @@ -13,12 +13,19 @@ const PROVIDER_DEFAULT_MODELS = { "gpt-4-32k", ], gemini: ["gemini-pro"], - anthropic: ["claude-2", "claude-instant-1"], + anthropic: [ + "claude-instant-1.2", + "claude-2.0", + "claude-2.1", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + ], azure: [], lmstudio: [], localai: [], ollama: [], togetherai: [], + groq: ["llama2-70b-4096", "mixtral-8x7b-32768"], native: [], }; diff --git a/frontend/src/media/llmprovider/groq.png b/frontend/src/media/llmprovider/groq.png new file mode 100644 index 0000000000000000000000000000000000000000..31564145e1068131f3cf0a49766efbd68a0b4d54 Binary files /dev/null and b/frontend/src/media/llmprovider/groq.png differ diff --git a/frontend/src/pages/Admin/Workspaces/WorkspaceRow/EditWorkspaceUsersModal/index.jsx b/frontend/src/pages/Admin/Workspaces/WorkspaceRow/EditWorkspaceUsersModal/index.jsx index cd8d5f01189adcd41f49d0d48f78c3bdcb406b88..1f1ff9a928bd5082f860ee8a63b79a41be17e272 100644 --- a/frontend/src/pages/Admin/Workspaces/WorkspaceRow/EditWorkspaceUsersModal/index.jsx +++ b/frontend/src/pages/Admin/Workspaces/WorkspaceRow/EditWorkspaceUsersModal/index.jsx @@ -52,7 +52,7 @@ export default function EditWorkspaceUsersModal({ </div> <form onSubmit={handleUpdate}> <div className="p-6 space-y-6 flex h-full w-full"> - <div className="w-full flex flex-col gap-y-4"> + <div className="w-full flex flex-col gap-y-4 max-h-[350px] overflow-y-scroll"> {users .filter((user) => user.role !== "admin") .map((user) => { diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index ac4c1e3d1e4efc830f7d36cef6481a1d6648631d..c7b6fb7bd1254bb9bf33ec3f5a2b804ac5ee9dce 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -16,6 +16,7 @@ import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; +import GroqLogo from "@/media/llmprovider/groq.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; @@ -28,11 +29,12 @@ import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions"; import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions"; +import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; +import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; +import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { MagnifyingGlass } from "@phosphor-icons/react"; -import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; -import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); @@ -173,6 +175,14 @@ export default function GeneralLLMPreference() { options: <OpenRouterOptions settings={settings} />, description: "A unified interface for LLMs.", }, + { + name: "Groq", + value: "groq", + logo: GroqLogo, + options: <GroqAiOptions settings={settings} />, + description: + "The fastest LLM inferencing available for real-time AI applications.", + }, { name: "Native", value: "native", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 5beec3c1764f4dffffd64d4041329c4af634f865..af3b3a9d03b908df78fb6d2c10c161de1786c685 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -13,6 +13,7 @@ import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; +import GroqLogo from "@/media/llmprovider/groq.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png"; import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; @@ -127,6 +128,14 @@ const LLM_SELECTION_PRIVACY = { ], logo: OpenRouterLogo, }, + groq: { + name: "Groq", + description: [ + "Your chats will not be used for training", + "Your prompts and document text used in response creation are visible to Groq", + ], + logo: GroqLogo, + }, }; const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 433914ae66dd315ba4650f4a99c8ed016c20fcff..33883dc7f0cb63b27ac7232e2e9fb17f5b0ba944 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -13,6 +13,7 @@ import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; import PerplexityLogo from "@/media/llmprovider/perplexity.png"; import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg"; +import GroqLogo from "@/media/llmprovider/groq.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions"; @@ -25,12 +26,13 @@ import MistralOptions from "@/components/LLMSelection/MistralOptions"; import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; +import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; +import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; import paths from "@/utils/paths"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; -import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; const TITLE = "LLM Preference"; const DESCRIPTION = @@ -147,6 +149,14 @@ export default function LLMPreference({ options: <OpenRouterOptions settings={settings} />, description: "A unified interface for LLMs.", }, + { + name: "Groq", + value: "groq", + logo: GroqLogo, + options: <GroqAiOptions settings={settings} />, + description: + "The fastest LLM inferencing available for real-time AI applications.", + }, { name: "Native", value: "native", diff --git a/server/.env.example b/server/.env.example index 0ca826e89584ceb7c01739eee709ad53f0883f36..88e60182ce0283874bfa19ecb187637cf4fc2bf3 100644 --- a/server/.env.example +++ b/server/.env.example @@ -58,6 +58,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # HUGGING_FACE_LLM_API_KEY=hf_xxxxxx # HUGGING_FACE_LLM_TOKEN_LIMIT=8000 +# LLM_PROVIDER='groq' +# GROQ_API_KEY=gsk_abcxyz +# GROQ_MODEL_PREF=llama2-70b-4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 31d5c59a8d5746b27c9958d523afc4ca4f2e28f6..b06fe123004beaed95bd29350d562145c75ee30a 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -219,12 +219,25 @@ const SystemSettings = { AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, } : {}), + + ...(llmProvider === "groq" + ? { + GroqApiKey: !!process.env.GROQ_API_KEY, + GroqModelPref: process.env.GROQ_MODEL_PREF, + + // For embedding credentials when groq is selected. + OpenAiKey: !!process.env.OPEN_AI_KEY, + AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, + AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, + AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, + } + : {}), ...(llmProvider === "native" ? { NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF, NativeLLMTokenLimit: process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT, - // For embedding credentials when ollama is selected. + // For embedding credentials when native is selected. OpenAiKey: !!process.env.OPEN_AI_KEY, AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, diff --git a/server/package.json b/server/package.json index bf1b85c06e866fd1425aa794d833029812ea8fad..e521d4b62411163dae1f0e4475084e195492cea0 100644 --- a/server/package.json +++ b/server/package.json @@ -20,7 +20,7 @@ "seed": "node prisma/seed.js" }, "dependencies": { - "@anthropic-ai/sdk": "^0.8.1", + "@anthropic-ai/sdk": "^0.16.1", "@azure/openai": "1.0.0-beta.10", "@datastax/astra-db-ts": "^0.1.3", "@google/generative-ai": "^0.1.3", diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index 56d3a80f0a4232bbfa107c1f2290a56a7fdacb06..a48058e81d139501fcfd0c5306dacc4668931376 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -1,6 +1,6 @@ const { v4 } = require("uuid"); const { chatPrompt } = require("../../chats"); - +const { writeResponseChunk } = require("../../helpers/chat/responses"); class AnthropicLLM { constructor(embedder = null, modelPreference = null) { if (!process.env.ANTHROPIC_API_KEY) @@ -13,7 +13,7 @@ class AnthropicLLM { }); this.anthropic = anthropic; this.model = - modelPreference || process.env.ANTHROPIC_MODEL_PREF || "claude-2"; + modelPreference || process.env.ANTHROPIC_MODEL_PREF || "claude-2.0"; this.limits = { history: this.promptWindowLimit() * 0.15, system: this.promptWindowLimit() * 0.15, @@ -35,17 +35,29 @@ class AnthropicLLM { promptWindowLimit() { switch (this.model) { - case "claude-instant-1": - return 72_000; - case "claude-2": + case "claude-instant-1.2": + return 100_000; + case "claude-2.0": return 100_000; + case "claude-2.1": + return 200_000; + case "claude-3-opus-20240229": + return 200_000; + case "claude-3-sonnet-20240229": + return 200_000; default: - return 72_000; // assume a claude-instant-1 model + return 100_000; // assume a claude-instant-1.2 model } } isValidChatCompletionModel(modelName = "") { - const validModels = ["claude-2", "claude-instant-1"]; + const validModels = [ + "claude-instant-1.2", + "claude-2.0", + "claude-2.1", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + ]; return validModels.includes(modelName); } @@ -62,36 +74,43 @@ class AnthropicLLM { chatHistory = [], userPrompt = "", }) { - return `\n\nHuman: Please read question supplied within the <question> tags. Using all information generate an answer to the question and output it within <${ - this.answerKey - }> tags. Previous conversations can be used within the <history> tags and can be used to influence the output. Content between the <system> tag is additional information and instruction that will impact how answers are formatted or responded to. Additional contextual information retrieved to help answer the users specific query is available to use for answering and can be found between <context> tags. When no <context> tags may are present use the knowledge available and in the conversation to answer. When one or more <context> tags are available you will use those to help answer the question or augment pre-existing knowledge. You should never say "Based on the provided context" or other phrasing that is not related to the user question. - <system>${systemPrompt}</system> - ${contextTexts - .map((text, i) => { - return `<context>${text}</context>\n`; - }) - .join("")} - <history>${chatHistory.map((history) => { - switch (history.role) { - case "assistant": - return `\n\nAssistant: ${history.content}`; - case "user": - return `\n\nHuman: ${history.content}`; - default: - return "\n"; - } - })}</history> - <question>${userPrompt}</question> - \n\nAssistant:`; + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.isValidChatCompletionModel(this.model)) + throw new Error( + `Anthropic chat: ${this.model} is not valid for chat completion!` + ); + + try { + const response = await this.anthropic.messages.create({ + model: this.model, + max_tokens: 4096, + system: messages[0].content, // Strip out the system message + messages: messages.slice(1), // Pop off the system message + temperature: Number(temperature ?? this.defaultTemp), + }); + + return response.content[0].text; + } catch (error) { + console.log(error); + return error; + } } - async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { if (!this.isValidChatCompletionModel(this.model)) throw new Error( `Anthropic chat: ${this.model} is not valid for chat completion!` ); - const compressedPrompt = await this.compressMessages( + const messages = await this.compressMessages( { systemPrompt: chatPrompt(workspace), userPrompt: prompt, @@ -99,58 +118,85 @@ class AnthropicLLM { }, rawHistory ); - const { content, error } = await this.anthropic.completions - .create({ - model: this.model, - max_tokens_to_sample: 300, - prompt: compressedPrompt, - }) - .then((res) => { - const { completion } = res; - const re = new RegExp( - "(?:<" + this.answerKey + ">)([\\s\\S]*)(?:</" + this.answerKey + ">)" - ); - const response = completion.match(re)?.[1]?.trim(); - if (!response) - throw new Error("Anthropic: No response could be parsed."); - return { content: response, error: null }; - }) - .catch((e) => { - return { content: null, error: e.message }; - }); - if (error) throw new Error(error); - return content; + const streamRequest = await this.anthropic.messages.stream({ + model: this.model, + max_tokens: 4096, + system: messages[0].content, // Strip out the system message + messages: messages.slice(1), // Pop off the system message + temperature: Number(workspace?.openAiTemp ?? this.defaultTemp), + }); + return streamRequest; } - async getChatCompletion(prompt = "", _opts = {}) { + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { if (!this.isValidChatCompletionModel(this.model)) throw new Error( - `Anthropic chat: ${this.model} is not valid for chat completion!` + `OpenAI chat: ${this.model} is not valid for chat completion!` ); - const { content, error } = await this.anthropic.completions - .create({ - model: this.model, - max_tokens_to_sample: 300, - prompt, - }) - .then((res) => { - const { completion } = res; - const re = new RegExp( - "(?:<" + this.answerKey + ">)([\\s\\S]*)(?:</" + this.answerKey + ">)" - ); - const response = completion.match(re)?.[1]?.trim(); - if (!response) - throw new Error("Anthropic: No response could be parsed."); - return { content: response, error: null }; - }) - .catch((e) => { - return { content: null, error: e.message }; + const streamRequest = await this.anthropic.messages.stream({ + model: this.model, + max_tokens: 4096, + system: messages[0].content, // Strip out the system message + messages: messages.slice(1), // Pop off the system message + temperature: Number(temperature ?? this.defaultTemp), + }); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + return new Promise((resolve) => { + let fullText = ""; + const { uuid = v4(), sources = [] } = responseProps; + + stream.on("streamEvent", (message) => { + const data = message; + if ( + data.type === "content_block_delta" && + data.delta.type === "text_delta" + ) { + const text = data.delta.text; + fullText += text; + + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: text, + close: false, + error: false, + }); + } + + if ( + message.type === "message_stop" || + (data.stop_reason && data.stop_reason === "end_turn") + ) { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + } }); + }); + } - if (error) throw new Error(error); - return content; + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); } async compressMessages(promptArgs = {}, rawHistory = []) { diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js new file mode 100644 index 0000000000000000000000000000000000000000..1b15fe1fe8f5a786b273fb2e8193dfd33dda0e3b --- /dev/null +++ b/server/utils/AiProviders/groq/index.js @@ -0,0 +1,207 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { chatPrompt } = require("../../chats"); +const { handleDefaultStreamResponse } = require("../../helpers/chat/responses"); + +class GroqLLM { + constructor(embedder = null, modelPreference = null) { + const { Configuration, OpenAIApi } = require("openai"); + if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set."); + + const config = new Configuration({ + basePath: "https://api.groq.com/openai/v1", + apiKey: process.env.GROQ_API_KEY, + }); + + this.openai = new OpenAIApi(config); + this.model = + modelPreference || process.env.GROQ_MODEL_PREF || "llama2-70b-4096"; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = !embedder ? new NativeEmbedder() : embedder; + this.defaultTemp = 0.7; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamChat" in this && "streamGetChatCompletion" in this; + } + + promptWindowLimit() { + switch (this.model) { + case "llama2-70b-4096": + return 4096; + case "mixtral-8x7b-32768": + return 32_768; + default: + return 4096; + } + } + + async isValidChatCompletionModel(modelName = "") { + const validModels = ["llama2-70b-4096", "mixtral-8x7b-32768"]; + const isPreset = validModels.some((model) => modelName === model); + if (isPreset) return true; + + const model = await this.openai + .retrieveModel(modelName) + .then((res) => res.data) + .catch(() => null); + return !!model; + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `Groq chat: ${this.model} is not valid for chat completion!` + ); + + const textResponse = await this.openai + .createChatCompletion({ + model: this.model, + temperature: Number(workspace?.openAiTemp ?? this.defaultTemp), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }) + .then((json) => { + const res = json.data; + if (!res.hasOwnProperty("choices")) + throw new Error("GroqAI chat: No results!"); + if (res.choices.length === 0) + throw new Error("GroqAI chat: No results length!"); + return res.choices[0].message.content; + }) + .catch((error) => { + throw new Error( + `GroqAI::createChatCompletion failed with: ${error.message}` + ); + }); + + return textResponse; + } + + async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `GroqAI:streamChat: ${this.model} is not valid for chat completion!` + ); + + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + temperature: Number(workspace?.openAiTemp ?? this.defaultTemp), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }, + { responseType: "stream" } + ); + return streamRequest; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `GroqAI:chatCompletion: ${this.model} is not valid for chat completion!` + ); + + const { data } = await this.openai + .createChatCompletion({ + model: this.model, + messages, + temperature, + }) + .catch((e) => { + throw new Error(e.response.data.error.message); + }); + + if (!data.hasOwnProperty("choices")) return null; + return data.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `GroqAI:streamChatCompletion: ${this.model} is not valid for chat completion!` + ); + + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + messages, + temperature, + }, + { responseType: "stream" } + ); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponse(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + GroqLLM, +}; diff --git a/server/utils/helpers/chat/convertTo.js b/server/utils/helpers/chat/convertTo.js index 119c155aeff95d14f9c814f76bf78bce9872cc30..7d2b5f69127b167b05e58422c40c6b14ff0785ba 100644 --- a/server/utils/helpers/chat/convertTo.js +++ b/server/utils/helpers/chat/convertTo.js @@ -21,12 +21,8 @@ async function convertToCSV(preparedData) { return rows.join("\n"); } -async function convertToJSON(workspaceChatsMap) { - const allMessages = [].concat.apply( - [], - Object.values(workspaceChatsMap).map((workspace) => workspace.messages) - ); - return JSON.stringify(allMessages, null, 4); +async function convertToJSON(preparedData) { + return JSON.stringify(preparedData, null, 4); } // ref: https://raw.githubusercontent.com/gururise/AlpacaDataCleaned/main/alpaca_data.json @@ -48,7 +44,7 @@ async function prepareWorkspaceChatsForExport(format = "jsonl") { id: "asc", }); - if (format === "csv") { + if (format === "csv" || format === "json") { const preparedData = chats.map((chat) => { const responseJson = JSON.parse(chat.response); return { diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index a31a3e4f99b4f423157719822cc4335a8e9c6790..7836097256ce75b3a70b931513b0b76d7c49cd34 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -73,6 +73,9 @@ function getLLMProvider(modelPreference = null) { case "huggingface": const { HuggingFaceLLM } = require("../AiProviders/huggingface"); return new HuggingFaceLLM(embedder, modelPreference); + case "groq": + const { GroqLLM } = require("../AiProviders/groq"); + return new GroqLLM(embedder, modelPreference); default: throw new Error("ENV: No LLM_PROVIDER value found in environment!"); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 1ca9368204f23645409e7b7e4e5c49e6a98b2db9..aa814d690463da8fb7c302f77f3525eed6ab1fe0 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -259,6 +259,16 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // Groq Options + GroqApiKey: { + envKey: "GROQ_API_KEY", + checks: [isNotEmpty], + }, + GroqModelPref: { + envKey: "GROQ_MODEL_PREF", + checks: [isNotEmpty], + }, + // System Settings AuthToken: { envKey: "AUTH_TOKEN", @@ -336,6 +346,7 @@ function supportedLLM(input = "") { "huggingface", "perplexity", "openrouter", + "groq", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; } @@ -348,7 +359,13 @@ function validGeminiModel(input = "") { } function validAnthropicModel(input = "") { - const validModels = ["claude-2", "claude-instant-1"]; + const validModels = [ + "claude-instant-1.2", + "claude-2.0", + "claude-2.1", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + ]; return validModels.includes(input) ? null : `Invalid Model type. Must be one of ${validModels.join(", ")}.`; diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 23f173ddc55057ce3b0fccd47766c84ae805251b..9e3caa7adca018dde5b498c94925aef2f299be5c 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -7,6 +7,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { parseAuthHeader } = require("../../http"); const Chroma = { name: "Chroma", diff --git a/server/yarn.lock b/server/yarn.lock index ecc8f1a75555e57078e20566c12d814f70661aba..61b29e3a7cf1f33b55db6f50dfb354e57349ed20 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -7,10 +7,10 @@ resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf" integrity sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA== -"@anthropic-ai/sdk@^0.8.1": - version "0.8.1" - resolved "https://registry.yarnpkg.com/@anthropic-ai/sdk/-/sdk-0.8.1.tgz#7c7c6cb262abe3e6d0bb8bd1179b4589edd7a6ad" - integrity sha512-59etePenCizVx1O8Qhi1T1ruE04ISfNzCnyhZNcsss1QljsLmYS83jttarMNEvGYcsUF7rwxw2lzcC3Zbxao7g== +"@anthropic-ai/sdk@^0.16.1": + version "0.16.1" + resolved "https://registry.yarnpkg.com/@anthropic-ai/sdk/-/sdk-0.16.1.tgz#7472c42389d9a5323c20afa53995e1c3b922b95d" + integrity sha512-vHgvfWEyFy5ktqam56Nrhv8MVa7EJthsRYNi+1OrFFfyrj9tR2/aji1QbVbQjYU/pPhPFaYrdCEC/MLPFrmKwA== dependencies: "@types/node" "^18.11.18" "@types/node-fetch" "^2.6.4"