diff --git a/.changeset/happy-emus-attend.md b/.changeset/happy-emus-attend.md new file mode 100644 index 0000000000000000000000000000000000000000..c7e042795b39a7b4222b3c1af58d51d07030097a --- /dev/null +++ b/.changeset/happy-emus-attend.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Use Qdrant FastEmbed as local embedding provider diff --git a/.gitignore b/.gitignore index 6c50932b7f7f33d3dcfefb68077863ae5fccc2a7..534068cf291631047cda53408a45930e2d11928d 100644 --- a/.gitignore +++ b/.gitignore @@ -46,5 +46,8 @@ e2e/cache # intellij **/.idea +# Python +.mypy_cache/ + # build artifacts create-llama-*.tgz diff --git a/helpers/python.ts b/helpers/python.ts index d73f6c62b714588f9fd9552acb0c8b6fb9ad2ee4..fd755440278654a065324ab4d26b4af9a241915f 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -153,14 +153,24 @@ const getAdditionalDependencies = ( version: "0.2.6", }); break; + case "groq": + dependencies.push({ + name: "llama-index-llms-groq", + version: "0.1.4", + }); + dependencies.push({ + name: "llama-index-embeddings-fastembed", + version: "^0.1.4", + }); + break; case "anthropic": dependencies.push({ name: "llama-index-llms-anthropic", version: "0.1.10", }); dependencies.push({ - name: "llama-index-embeddings-huggingface", - version: "0.2.0", + name: "llama-index-embeddings-fastembed", + version: "^0.1.4", }); break; case "gemini": diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py index e0c974cc5edc2fbdddc54bcf2f56cb84620ec9db..ce427645e0711f45a0e287af7c8cca7f301f2e43 100644 --- a/templates/components/settings/python/settings.py +++ b/templates/components/settings/python/settings.py @@ -98,8 +98,25 @@ def init_azure_openai(): Settings.embed_model = AzureOpenAIEmbedding(**embedding_config) +def init_fastembed(): + """ + Use Qdrant Fastembed as the local embedding provider. + """ + from llama_index.embeddings.fastembed import FastEmbedEmbedding + + embed_model_map: Dict[str, str] = { + # Small and multilingual + "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + # Large and multilingual + "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 + } + + # This will download the model automatically if it is not already downloaded + Settings.embed_model = FastEmbedEmbedding( + model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] + ) + def init_groq(): - from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.groq import Groq model_map: Dict[str, str] = { @@ -108,19 +125,13 @@ def init_groq(): "mixtral-8x7b": "mixtral-8x7b-32768", } - embed_model_map: Dict[str, str] = { - "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", - "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2", - } Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) - Settings.embed_model = HuggingFaceEmbedding( - model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] - ) + # Groq does not provide embeddings, so we use FastEmbed instead + init_fastembed() def init_anthropic(): - from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.anthropic import Anthropic model_map: Dict[str, str] = { @@ -131,15 +142,9 @@ def init_anthropic(): "claude-instant-1.2": "claude-instant-1.2", } - embed_model_map: Dict[str, str] = { - "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", - "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2", - } - Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) - Settings.embed_model = HuggingFaceEmbedding( - model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] - ) + # Anthropic does not provide embeddings, so we use FastEmbed instead + init_fastembed() def init_gemini():