diff --git a/.changeset/tasty-hairs-wait.md b/.changeset/tasty-hairs-wait.md new file mode 100644 index 0000000000000000000000000000000000000000..deaa3f6f13193c6fc0a2f4e3f753f35e353571c6 --- /dev/null +++ b/.changeset/tasty-hairs-wait.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Add Milvus vector database diff --git a/e2e/utils.ts b/e2e/utils.ts index 03b69d07efcfd414725a080488512f8c7b2c9771..b9b9ec0dba52bd18a11a893e740df2a729182b57 100644 --- a/e2e/utils.ts +++ b/e2e/utils.ts @@ -123,6 +123,8 @@ export async function runCreateLlama( "--tools", "none", "--no-llama-parse", + "--observability", + "none", ].join(" "); console.log(`running command '${command}' in ${cwd}`); const appProcess = exec(command, { diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts index f9f6d370a44962efec562c65004d17c5f5aae807..c5e6544249b721cf7587a48c31dba3c8184305ea 100644 --- a/helpers/env-variables.ts +++ b/helpers/env-variables.ts @@ -71,6 +71,29 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => { name: "PINECONE_INDEX_NAME", }, ]; + case "milvus": + return [ + { + name: "MILVUS_ADDRESS", + description: + "The address of the Milvus server. Eg: http://localhost:19530", + value: "http://localhost:19530", + }, + { + name: "MILVUS_COLLECTION", + description: + "The name of the Milvus collection to store the vectors.", + value: "llamacollection", + }, + { + name: "MILVUS_USERNAME", + description: "The username to access the Milvus server.", + }, + { + name: "MILVUS_PASSWORD", + description: "The password to access the Milvus server.", + }, + ]; default: return []; } diff --git a/helpers/python.ts b/helpers/python.ts index 285beb9a8b377b1c757c84765fca5e5766bba9f4..031e997d44f330486b29056a0fd8887bbb8bfdd5 100644 --- a/helpers/python.ts +++ b/helpers/python.ts @@ -49,6 +49,13 @@ const getAdditionalDependencies = ( }); break; } + case "milvus": { + dependencies.push({ + name: "llama-index-vector-stores-milvus", + version: "^0.1.6", + }); + break; + } } // Add data source dependencies diff --git a/helpers/types.ts b/helpers/types.ts index dee6a68570af7a4b391772151ad03bb6321d6a17..bcc969e685eb56c6b1f730ca8147f43ee4322c70 100644 --- a/helpers/types.ts +++ b/helpers/types.ts @@ -5,7 +5,7 @@ export type TemplateType = "simple" | "streaming" | "community" | "llamapack"; export type TemplateFramework = "nextjs" | "express" | "fastapi"; export type TemplateEngine = "simple" | "context"; export type TemplateUI = "html" | "shadcn"; -export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone"; +export type TemplateVectorDB = "none" | "mongo" | "pg" | "pinecone" | "milvus"; export type TemplatePostInstallAction = | "none" | "VSCode" diff --git a/helpers/typescript.ts b/helpers/typescript.ts index 92a04be12d9d00e2f210222ef956181857e53950..902a7d4d3d59f933ac431a1d1bb6eb49686293a8 100644 --- a/helpers/typescript.ts +++ b/helpers/typescript.ts @@ -85,19 +85,28 @@ export const installTSTemplate = async ({ * If next.js is used, update its configuration if necessary */ if (framework === "nextjs") { + const nextConfigJsonFile = path.join(root, "next.config.json"); + const nextConfigJson: any = JSON.parse( + await fs.readFile(nextConfigJsonFile, "utf8"), + ); if (!backend) { // update next.config.json for static site generation - const nextConfigJsonFile = path.join(root, "next.config.json"); - const nextConfigJson: any = JSON.parse( - await fs.readFile(nextConfigJsonFile, "utf8"), - ); nextConfigJson.output = "export"; nextConfigJson.images = { unoptimized: true }; - await fs.writeFile( - nextConfigJsonFile, - JSON.stringify(nextConfigJson, null, 2) + os.EOL, - ); + console.log("\nUsing static site generation\n"); + } else { + if (vectorDb === "milvus") { + nextConfigJson.experimental.serverComponentsExternalPackages = + nextConfigJson.experimental.serverComponentsExternalPackages ?? []; + nextConfigJson.experimental.serverComponentsExternalPackages.push( + "@zilliz/milvus2-sdk-node", + ); + } } + await fs.writeFile( + nextConfigJsonFile, + JSON.stringify(nextConfigJson, null, 2) + os.EOL, + ); const webpackConfigOtelFile = path.join(root, "webpack.config.o11y.mjs"); if (observability === "opentelemetry") { diff --git a/index.ts b/index.ts index 43ba741d2f7f910a6aa58917114ee472c3ee23d1..47ff6c2c839d6cbef27c39ee9adf852d6d4fafdd 100644 --- a/index.ts +++ b/index.ts @@ -176,6 +176,10 @@ const program = new Commander.Command(packageJson.name) Provide a LlamaCloud API key. `, ) + .option( + "--observability <observability>", + "Specify observability tools to use. Eg: none, opentelemetry", + ) .allowUnknownOption() .parse(process.argv); if (process.argv.includes("--no-frontend")) { diff --git a/questions.ts b/questions.ts index 46b7d7e97a02f3691728d9f3f1a44b2abbc35ab5..4118423d7e2c259a15c99a5e21e5cf49daf6990f 100644 --- a/questions.ts +++ b/questions.ts @@ -96,6 +96,7 @@ const getVectorDbChoices = (framework: TemplateFramework) => { { title: "MongoDB", value: "mongo" }, { title: "PostgreSQL", value: "pg" }, { title: "Pinecone", value: "pinecone" }, + { title: "Milvus", value: "milvus" }, ]; const vectordbLang = framework === "fastapi" ? "python" : "typescript"; diff --git a/templates/components/vectordbs/python/milvus/__init__.py b/templates/components/vectordbs/python/milvus/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/templates/components/vectordbs/python/milvus/generate.py b/templates/components/vectordbs/python/milvus/generate.py new file mode 100644 index 0000000000000000000000000000000000000000..862ddd84181392a10e74054151f3882f7aebbff6 --- /dev/null +++ b/templates/components/vectordbs/python/milvus/generate.py @@ -0,0 +1,39 @@ +from dotenv import load_dotenv + +load_dotenv() + +import os +import logging +from llama_index.core.storage import StorageContext +from llama_index.core.indices import VectorStoreIndex +from llama_index.vector_stores.milvus import MilvusVectorStore +from app.settings import init_settings +from app.engine.loader import get_documents + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + + +def generate_datasource(): + logger.info("Creating new index") + # load the documents and create the index + documents = get_documents() + store = MilvusVectorStore( + uri=os.environ["MILVUS_ADDRESS"], + user=os.getenv("MILVUS_USER"), + password=os.getenv("MILVUS_PASSWORD"), + collection_name=os.getenv("MILVUS_COLLECTION"), + dim=int(os.getenv("MILVUS_DIMENSION", "1536")), + ) + storage_context = StorageContext.from_defaults(vector_store=store) + VectorStoreIndex.from_documents( + documents, + storage_context=storage_context, + show_progress=True, # this will show you a progress bar as the embeddings are created + ) + logger.info(f"Successfully created embeddings in the Milvus") + + +if __name__ == "__main__": + init_settings() + generate_datasource() diff --git a/templates/components/vectordbs/python/milvus/index.py b/templates/components/vectordbs/python/milvus/index.py new file mode 100644 index 0000000000000000000000000000000000000000..130840ccb46bdffe009e00d81713c0e818ddc7e0 --- /dev/null +++ b/templates/components/vectordbs/python/milvus/index.py @@ -0,0 +1,22 @@ +import logging +import os + +from llama_index.core.indices import VectorStoreIndex +from llama_index.vector_stores.milvus import MilvusVectorStore + + +logger = logging.getLogger("uvicorn") + + +def get_index(): + logger.info("Connecting to index from Milvus...") + store = MilvusVectorStore( + uri=os.getenv("MILVUS_ADDRESS"), + user=os.getenv("MILVUS_USER"), + password=os.getenv("MILVUS_PASSWORD"), + collection_name=os.getenv("MILVUS_COLLECTION"), + dim=int(os.getenv("EMBEDDING_DIM", "1536")), + ) + index = VectorStoreIndex.from_vector_store(store) + logger.info("Finished connecting to index from Milvus.") + return index diff --git a/templates/components/vectordbs/typescript/milvus/generate.mjs b/templates/components/vectordbs/typescript/milvus/generate.mjs new file mode 100644 index 0000000000000000000000000000000000000000..905a066c2df84f90b5dc19d718939c0c71ac8735 --- /dev/null +++ b/templates/components/vectordbs/typescript/milvus/generate.mjs @@ -0,0 +1,43 @@ +/* eslint-disable turbo/no-undeclared-env-vars */ +import * as dotenv from "dotenv"; +import { + MilvusVectorStore, + SimpleDirectoryReader, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; +import { + STORAGE_DIR, + checkRequiredEnvVars, + getMilvusClient, +} from "./shared.mjs"; + +dotenv.config(); + +const collectionName = process.env.MILVUS_COLLECTION; + +async function loadAndIndex() { + // load objects from storage and convert them into LlamaIndex Document objects + const documents = await new SimpleDirectoryReader().loadData({ + directoryPath: STORAGE_DIR, + }); + + // Connect to Milvus + const milvusClient = getMilvusClient(); + const vectorStore = new MilvusVectorStore({ milvusClient }); + + // now create an index from all the Documents and store them in Milvus + const storageContext = await storageContextFromDefaults({ vectorStore }); + await VectorStoreIndex.fromDocuments(documents, { + storageContext: storageContext, + }); + console.log( + `Successfully created embeddings in the Milvus collection ${collectionName}.`, + ); +} + +(async () => { + checkRequiredEnvVars(); + await loadAndIndex(); + console.log("Finished generating storage."); +})(); diff --git a/templates/components/vectordbs/typescript/milvus/index.ts b/templates/components/vectordbs/typescript/milvus/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..cf487ad3264ea7424f8b9b34a6c3ebbf98710f4c --- /dev/null +++ b/templates/components/vectordbs/typescript/milvus/index.ts @@ -0,0 +1,35 @@ +import { + ContextChatEngine, + LLM, + MilvusVectorStore, + serviceContextFromDefaults, + VectorStoreIndex, +} from "llamaindex"; +import { + checkRequiredEnvVars, + CHUNK_OVERLAP, + CHUNK_SIZE, + getMilvusClient, +} from "./shared.mjs"; + +async function getDataSource(llm: LLM) { + checkRequiredEnvVars(); + const serviceContext = serviceContextFromDefaults({ + llm, + chunkSize: CHUNK_SIZE, + chunkOverlap: CHUNK_OVERLAP, + }); + const milvusClient = getMilvusClient(); + const store = new MilvusVectorStore({ milvusClient }); + + return await VectorStoreIndex.fromVectorStore(store, serviceContext); +} + +export async function createChatEngine(llm: LLM) { + const index = await getDataSource(llm); + const retriever = index.asRetriever({ similarityTopK: 3 }); + return new ContextChatEngine({ + chatModel: llm, + retriever, + }); +} diff --git a/templates/components/vectordbs/typescript/milvus/shared.mjs b/templates/components/vectordbs/typescript/milvus/shared.mjs new file mode 100644 index 0000000000000000000000000000000000000000..0a35d7152a03863cb7239d06fd98df6baf818819 --- /dev/null +++ b/templates/components/vectordbs/typescript/milvus/shared.mjs @@ -0,0 +1,41 @@ +import { MilvusClient } from "@zilliz/milvus2-sdk-node"; + +export const STORAGE_DIR = "./data"; +export const CHUNK_SIZE = 512; +export const CHUNK_OVERLAP = 20; + +const REQUIRED_ENV_VARS = [ + "MILVUS_ADDRESS", + "MILVUS_USERNAME", + "MILVUS_PASSWORD", + "MILVUS_COLLECTION", +]; + +export function getMilvusClient() { + const milvusAddress = process.env.MILVUS_ADDRESS; + if (!milvusAddress) { + throw new Error("MILVUS_ADDRESS environment variable is required"); + } + return new MilvusClient({ + address: process.env.MILVUS_ADDRESS, + username: process.env.MILVUS_USERNAME, + password: process.env.MILVUS_PASSWORD, + }); +} + +export function checkRequiredEnvVars() { + const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => { + return !process.env[envVar]; + }); + + if (missingEnvVars.length > 0) { + console.log( + `The following environment variables are required but missing: ${missingEnvVars.join( + ", ", + )}`, + ); + throw new Error( + `Missing environment variables: ${missingEnvVars.join(", ")}`, + ); + } +} diff --git a/templates/types/simple/express/src/observability/init.ts b/templates/types/simple/express/src/observability/index.ts similarity index 100% rename from templates/types/simple/express/src/observability/init.ts rename to templates/types/simple/express/src/observability/index.ts