From df96159e882d2e5717da460891320847ac2e3b8a Mon Sep 17 00:00:00 2001
From: Huu Le <39040748+leehuwuj@users.noreply.github.com>
Date: Fri, 12 Jul 2024 15:01:41 +0700
Subject: [PATCH] feat: Use Qdrant FastEmbed as local embedding provider (#162)

---
 .changeset/happy-emus-attend.md               |  5 +++
 .gitignore                                    |  3 ++
 helpers/python.ts                             | 14 ++++++-
 .../components/settings/python/settings.py    | 39 +++++++++++--------
 4 files changed, 42 insertions(+), 19 deletions(-)
 create mode 100644 .changeset/happy-emus-attend.md

diff --git a/.changeset/happy-emus-attend.md b/.changeset/happy-emus-attend.md
new file mode 100644
index 00000000..c7e04279
--- /dev/null
+++ b/.changeset/happy-emus-attend.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Use Qdrant FastEmbed as local embedding provider
diff --git a/.gitignore b/.gitignore
index 6c50932b..534068cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,5 +46,8 @@ e2e/cache
 # intellij
 **/.idea
 
+# Python
+.mypy_cache/
+
 # build artifacts
 create-llama-*.tgz
diff --git a/helpers/python.ts b/helpers/python.ts
index d73f6c62..fd755440 100644
--- a/helpers/python.ts
+++ b/helpers/python.ts
@@ -153,14 +153,24 @@ const getAdditionalDependencies = (
         version: "0.2.6",
       });
       break;
+    case "groq":
+      dependencies.push({
+        name: "llama-index-llms-groq",
+        version: "0.1.4",
+      });
+      dependencies.push({
+        name: "llama-index-embeddings-fastembed",
+        version: "^0.1.4",
+      });
+      break;
     case "anthropic":
       dependencies.push({
         name: "llama-index-llms-anthropic",
         version: "0.1.10",
       });
       dependencies.push({
-        name: "llama-index-embeddings-huggingface",
-        version: "0.2.0",
+        name: "llama-index-embeddings-fastembed",
+        version: "^0.1.4",
       });
       break;
     case "gemini":
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index e0c974cc..ce427645 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -98,8 +98,25 @@ def init_azure_openai():
     Settings.embed_model = AzureOpenAIEmbedding(**embedding_config)
 
 
+def init_fastembed():
+    """
+    Use Qdrant Fastembed as the local embedding provider.
+    """
+    from llama_index.embeddings.fastembed import FastEmbedEmbedding
+
+    embed_model_map: Dict[str, str] = {
+        # Small and multilingual
+        "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
+        # Large and multilingual
+        "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",   # noqa: E501
+    }
+
+    # This will download the model automatically if it is not already downloaded
+    Settings.embed_model = FastEmbedEmbedding(
+        model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
+    )
+
 def init_groq():
-    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
     from llama_index.llms.groq import Groq
 
     model_map: Dict[str, str] = {
@@ -108,19 +125,13 @@ def init_groq():
         "mixtral-8x7b": "mixtral-8x7b-32768",
     }
 
-    embed_model_map: Dict[str, str] = {
-        "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
-        "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
-    }
 
     Settings.llm = Groq(model=model_map[os.getenv("MODEL")])
-    Settings.embed_model = HuggingFaceEmbedding(
-        model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
-    )
+    # Groq does not provide embeddings, so we use FastEmbed instead
+    init_fastembed()
 
 
 def init_anthropic():
-    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
     from llama_index.llms.anthropic import Anthropic
 
     model_map: Dict[str, str] = {
@@ -131,15 +142,9 @@ def init_anthropic():
         "claude-instant-1.2": "claude-instant-1.2",
     }
 
-    embed_model_map: Dict[str, str] = {
-        "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
-        "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
-    }
-
     Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
-    Settings.embed_model = HuggingFaceEmbedding(
-        model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
-    )
+    # Anthropic does not provide embeddings, so we use FastEmbed instead
+    init_fastembed()
 
 
 def init_gemini():
-- 
GitLab