From bac1b43f10a56d3cd2579ca0a58a17a370eb6aea Mon Sep 17 00:00:00 2001
From: "Huu Le (Lee)" <39040748+leehuwuj@users.noreply.github.com>
Date: Mon, 18 Mar 2024 11:33:53 +0700
Subject: [PATCH] feat: Add Milvus vector database (#4)

---
 .changeset/tasty-hairs-wait.md                |  5 +++
 e2e/utils.ts                                  |  2 +
 helpers/env-variables.ts                      | 23 ++++++++++
 helpers/python.ts                             |  7 +++
 helpers/types.ts                              |  2 +-
 helpers/typescript.ts                         | 25 +++++++----
 index.ts                                      |  4 ++
 questions.ts                                  |  1 +
 .../vectordbs/python/milvus/__init__.py       |  0
 .../vectordbs/python/milvus/generate.py       | 39 +++++++++++++++++
 .../vectordbs/python/milvus/index.py          | 22 ++++++++++
 .../vectordbs/typescript/milvus/generate.mjs  | 43 +++++++++++++++++++
 .../vectordbs/typescript/milvus/index.ts      | 35 +++++++++++++++
 .../vectordbs/typescript/milvus/shared.mjs    | 41 ++++++++++++++++++
 .../src/observability/{init.ts => index.ts}   |  0
 15 files changed, 240 insertions(+), 9 deletions(-)
 create mode 100644 .changeset/tasty-hairs-wait.md
 create mode 100644 templates/components/vectordbs/python/milvus/__init__.py
 create mode 100644 templates/components/vectordbs/python/milvus/generate.py
 create mode 100644 templates/components/vectordbs/python/milvus/index.py
 create mode 100644 templates/components/vectordbs/typescript/milvus/generate.mjs
 create mode 100644 templates/components/vectordbs/typescript/milvus/index.ts
 create mode 100644 templates/components/vectordbs/typescript/milvus/shared.mjs
 rename templates/types/simple/express/src/observability/{init.ts => index.ts} (100%)

diff --git a/.changeset/tasty-hairs-wait.md b/.changeset/tasty-hairs-wait.md
new file mode 100644
index 00000000..deaa3f6f
--- /dev/null
+++ b/.changeset/tasty-hairs-wait.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add Milvus vector database
diff --git a/e2e/utils.ts b/e2e/utils.ts
index 03b69d07..b9b9ec0d 100644
--- a/e2e/utils.ts
+++ b/e2e/utils.ts
@@ -123,6 +123,8 @@ export async function runCreateLlama(
     "--tools",
     "none",
     "--no-llama-parse",
+    "--observability",
+    "none",
   ].join(" ");
   console.log(`running command '${command}' in ${cwd}`);
   const appProcess = exec(command, {
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
index f9f6d370..c5e65442 100644
--- a/helpers/env-variables.ts
+++ b/helpers/env-variables.ts
@@ -71,6 +71,29 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
           name: "PINECONE_INDEX_NAME",
         },
       ];
+    case "milvus":
+      return [
+        {
+          name: "MILVUS_ADDRESS",
+          description:
+            "The address of the Milvus server. Eg: http://localhost:19530",
+          value: "http://localhost:19530",
+        },
+        {
+          name: "MILVUS_COLLECTION",
+          description:
+            "The name of the Milvus collection to store the vectors.",
+          value: "llamacollection",
+        },
+        {
+          name: "MILVUS_USERNAME",
+          description: "The username to access the Milvus server.",
+        },
+        {
+          name: "MILVUS_PASSWORD",
+          description: "The password to access the Milvus server.",
+        },
+      ];
     default:
       return [];
   }
diff --git a/helpers/python.ts b/helpers/python.ts
index 285beb9a..031e997d 100644
--- a/helpers/python.ts
+++ b/helpers/python.ts
@@ -49,6 +49,13 @@ const getAdditionalDependencies = (
       });
       break;
     }
+    case "milvus": {
+      dependencies.push({
+        name: "llama-index-vector-stores-milvus",
+        version: "^0.1.6",
+      });
+      break;
+    }
   }
 
   // Add data source dependencies
diff --git a/helpers/types.ts b/helpers/types.ts
index dee6a685..bcc969e6 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -5,7 +5,7 @@ export type TemplateType = "simple" | "streaming" | "community" | "llamapack";
 export type TemplateFramework = "nextjs" | "express" | "fastapi";
 export type TemplateEngine = "simple" | "context";
 export type TemplateUI = "html" | "shadcn";
-export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone";
+export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone" | "milvus";
 export type TemplatePostInstallAction =
   | "none"
   | "VSCode"
diff --git a/helpers/typescript.ts b/helpers/typescript.ts
index 92a04be1..902a7d4d 100644
--- a/helpers/typescript.ts
+++ b/helpers/typescript.ts
@@ -85,19 +85,28 @@ export const installTSTemplate = async ({
    * If next.js is used, update its configuration if necessary
    */
   if (framework === "nextjs") {
+    const nextConfigJsonFile = path.join(root, "next.config.json");
+    const nextConfigJson: any = JSON.parse(
+      await fs.readFile(nextConfigJsonFile, "utf8"),
+    );
     if (!backend) {
       // update next.config.json for static site generation
-      const nextConfigJsonFile = path.join(root, "next.config.json");
-      const nextConfigJson: any = JSON.parse(
-        await fs.readFile(nextConfigJsonFile, "utf8"),
-      );
       nextConfigJson.output = "export";
       nextConfigJson.images = { unoptimized: true };
-      await fs.writeFile(
-        nextConfigJsonFile,
-        JSON.stringify(nextConfigJson, null, 2) + os.EOL,
-      );
+      console.log("\nUsing static site generation\n");
+    } else {
+      if (vectorDb === "milvus") {
+        nextConfigJson.experimental.serverComponentsExternalPackages =
+          nextConfigJson.experimental.serverComponentsExternalPackages ?? [];
+        nextConfigJson.experimental.serverComponentsExternalPackages.push(
+          "@zilliz/milvus2-sdk-node",
+        );
+      }
     }
+    await fs.writeFile(
+      nextConfigJsonFile,
+      JSON.stringify(nextConfigJson, null, 2) + os.EOL,
+    );
 
     const webpackConfigOtelFile = path.join(root, "webpack.config.o11y.mjs");
     if (observability === "opentelemetry") {
diff --git a/index.ts b/index.ts
index 43ba741d..47ff6c2c 100644
--- a/index.ts
+++ b/index.ts
@@ -176,6 +176,10 @@ const program = new Commander.Command(packageJson.name)
   Provide a LlamaCloud API key.
 `,
   )
+  .option(
+    "--observability <observability>",
+    "Specify observability tools to use. Eg: none, opentelemetry",
+  )
   .allowUnknownOption()
   .parse(process.argv);
 if (process.argv.includes("--no-frontend")) {
diff --git a/questions.ts b/questions.ts
index 46b7d7e9..4118423d 100644
--- a/questions.ts
+++ b/questions.ts
@@ -96,6 +96,7 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
     { title: "MongoDB", value: "mongo" },
     { title: "PostgreSQL", value: "pg" },
     { title: "Pinecone", value: "pinecone" },
+    { title: "Milvus", value: "milvus" },
   ];
 
   const vectordbLang = framework === "fastapi" ? "python" : "typescript";
diff --git a/templates/components/vectordbs/python/milvus/__init__.py b/templates/components/vectordbs/python/milvus/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/components/vectordbs/python/milvus/generate.py b/templates/components/vectordbs/python/milvus/generate.py
new file mode 100644
index 00000000..862ddd84
--- /dev/null
+++ b/templates/components/vectordbs/python/milvus/generate.py
@@ -0,0 +1,39 @@
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import os
+import logging
+from llama_index.core.storage import StorageContext
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.milvus import MilvusVectorStore
+from app.settings import init_settings
+from app.engine.loader import get_documents
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_datasource():
+    logger.info("Creating new index")
+    # load the documents and create the index
+    documents = get_documents()
+    store = MilvusVectorStore(
+        uri=os.environ["MILVUS_ADDRESS"],
+        user=os.getenv("MILVUS_USER"),
+        password=os.getenv("MILVUS_PASSWORD"),
+        collection_name=os.getenv("MILVUS_COLLECTION"),
+        dim=int(os.getenv("MILVUS_DIMENSION", "1536")),
+    )
+    storage_context = StorageContext.from_defaults(vector_store=store)
+    VectorStoreIndex.from_documents(
+        documents,
+        storage_context=storage_context,
+        show_progress=True,  # this will show you a progress bar as the embeddings are created
+    )
+    logger.info(f"Successfully created embeddings in the Milvus")
+
+
+if __name__ == "__main__":
+    init_settings()
+    generate_datasource()
diff --git a/templates/components/vectordbs/python/milvus/index.py b/templates/components/vectordbs/python/milvus/index.py
new file mode 100644
index 00000000..130840cc
--- /dev/null
+++ b/templates/components/vectordbs/python/milvus/index.py
@@ -0,0 +1,22 @@
+import logging
+import os
+
+from llama_index.core.indices import VectorStoreIndex
+from llama_index.vector_stores.milvus import MilvusVectorStore
+
+
+logger = logging.getLogger("uvicorn")
+
+
+def get_index():
+    logger.info("Connecting to index from Milvus...")
+    store = MilvusVectorStore(
+        uri=os.getenv("MILVUS_ADDRESS"),
+        user=os.getenv("MILVUS_USER"),
+        password=os.getenv("MILVUS_PASSWORD"),
+        collection_name=os.getenv("MILVUS_COLLECTION"),
+        dim=int(os.getenv("EMBEDDING_DIM", "1536")),
+    )
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished connecting to index from Milvus.")
+    return index
diff --git a/templates/components/vectordbs/typescript/milvus/generate.mjs b/templates/components/vectordbs/typescript/milvus/generate.mjs
new file mode 100644
index 00000000..905a066c
--- /dev/null
+++ b/templates/components/vectordbs/typescript/milvus/generate.mjs
@@ -0,0 +1,43 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import {
+  MilvusVectorStore,
+  SimpleDirectoryReader,
+  VectorStoreIndex,
+  storageContextFromDefaults,
+} from "llamaindex";
+import {
+  STORAGE_DIR,
+  checkRequiredEnvVars,
+  getMilvusClient,
+} from "./shared.mjs";
+
+dotenv.config();
+
+const collectionName = process.env.MILVUS_COLLECTION;
+
+async function loadAndIndex() {
+  // load objects from storage and convert them into LlamaIndex Document objects
+  const documents = await new SimpleDirectoryReader().loadData({
+    directoryPath: STORAGE_DIR,
+  });
+
+  // Connect to Milvus
+  const milvusClient = getMilvusClient();
+  const vectorStore = new MilvusVectorStore({ milvusClient });
+
+  // now create an index from all the Documents and store them in Milvus
+  const storageContext = await storageContextFromDefaults({ vectorStore });
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext: storageContext,
+  });
+  console.log(
+    `Successfully created embeddings in the Milvus collection ${collectionName}.`,
+  );
+}
+
+(async () => {
+  checkRequiredEnvVars();
+  await loadAndIndex();
+  console.log("Finished generating storage.");
+})();
diff --git a/templates/components/vectordbs/typescript/milvus/index.ts b/templates/components/vectordbs/typescript/milvus/index.ts
new file mode 100644
index 00000000..cf487ad3
--- /dev/null
+++ b/templates/components/vectordbs/typescript/milvus/index.ts
@@ -0,0 +1,35 @@
+import {
+  ContextChatEngine,
+  LLM,
+  MilvusVectorStore,
+  serviceContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import {
+  checkRequiredEnvVars,
+  CHUNK_OVERLAP,
+  CHUNK_SIZE,
+  getMilvusClient,
+} from "./shared.mjs";
+
+async function getDataSource(llm: LLM) {
+  checkRequiredEnvVars();
+  const serviceContext = serviceContextFromDefaults({
+    llm,
+    chunkSize: CHUNK_SIZE,
+    chunkOverlap: CHUNK_OVERLAP,
+  });
+  const milvusClient = getMilvusClient();
+  const store = new MilvusVectorStore({ milvusClient });
+
+  return await VectorStoreIndex.fromVectorStore(store, serviceContext);
+}
+
+export async function createChatEngine(llm: LLM) {
+  const index = await getDataSource(llm);
+  const retriever = index.asRetriever({ similarityTopK: 3 });
+  return new ContextChatEngine({
+    chatModel: llm,
+    retriever,
+  });
+}
diff --git a/templates/components/vectordbs/typescript/milvus/shared.mjs b/templates/components/vectordbs/typescript/milvus/shared.mjs
new file mode 100644
index 00000000..0a35d715
--- /dev/null
+++ b/templates/components/vectordbs/typescript/milvus/shared.mjs
@@ -0,0 +1,41 @@
+import { MilvusClient } from "@zilliz/milvus2-sdk-node";
+
+export const STORAGE_DIR = "./data";
+export const CHUNK_SIZE = 512;
+export const CHUNK_OVERLAP = 20;
+
+const REQUIRED_ENV_VARS = [
+  "MILVUS_ADDRESS",
+  "MILVUS_USERNAME",
+  "MILVUS_PASSWORD",
+  "MILVUS_COLLECTION",
+];
+
+export function getMilvusClient() {
+  const milvusAddress = process.env.MILVUS_ADDRESS;
+  if (!milvusAddress) {
+    throw new Error("MILVUS_ADDRESS environment variable is required");
+  }
+  return new MilvusClient({
+    address: process.env.MILVUS_ADDRESS,
+    username: process.env.MILVUS_USERNAME,
+    password: process.env.MILVUS_PASSWORD,
+  });
+}
+
+export function checkRequiredEnvVars() {
+  const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
+    return !process.env[envVar];
+  });
+
+  if (missingEnvVars.length > 0) {
+    console.log(
+      `The following environment variables are required but missing: ${missingEnvVars.join(
+        ", ",
+      )}`,
+    );
+    throw new Error(
+      `Missing environment variables: ${missingEnvVars.join(", ")}`,
+    );
+  }
+}
diff --git a/templates/types/simple/express/src/observability/init.ts b/templates/types/simple/express/src/observability/index.ts
similarity index 100%
rename from templates/types/simple/express/src/observability/init.ts
rename to templates/types/simple/express/src/observability/index.ts
-- 
GitLab