Skip to content
Snippets Groups Projects
Unverified Commit b13fb36d authored by Marcus Schiesser's avatar Marcus Schiesser Committed by GitHub
Browse files

Merge pull request #295 from run-llama/ms/cl-python-add-mongodb

Feat: Add MongoDB Vector DB support for Python projects in create-llama
parents a7eb59f4 d8fe65a2
Branches
Tags
No related merge requests found
Showing
with 458 additions and 282 deletions
......@@ -48,6 +48,7 @@
"picocolors": "1.0.0",
"prompts": "2.1.0",
"rimraf": "^5.0.5",
"smol-toml": "^1.1.3",
"tar": "6.1.15",
"terminal-link": "^3.0.0",
"update-check": "1.5.4",
......
......@@ -239,28 +239,24 @@ export const askQuestions = async (
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
} else {
if (program.framework === "fastapi") {
program.vectorDb = "none";
} else {
const { vectorDb } = await prompts(
{
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: [
{
title: "No, just store the data in the file system",
value: "none",
},
{ title: "MongoDB", value: "mongo" },
],
initial: 0,
},
handlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
}
const { vectorDb } = await prompts(
{
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: [
{
title: "No, just store the data in the file system",
value: "none",
},
{ title: "MongoDB", value: "mongo" },
],
initial: 0,
},
handlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
}
}
......
DATA_DIR = "data" # directory containing the documents to index
CHUNK_SIZE = 1024
CHUNK_OVERLAP = 20
from llama_index import ServiceContext
from app.context import create_base_context
from app.engine.constants import CHUNK_SIZE, CHUNK_OVERLAP
def create_service_context():
base = create_base_context()
return ServiceContext.from_defaults(
llm=base.llm,
embed_model=base.embed_model,
chunk_size=CHUNK_SIZE,
chunk_overlap=CHUNK_OVERLAP,
)
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.vector_stores import MongoDBAtlasVectorSearch
from app.engine.constants import DATA_DIR
from app.engine.context import create_service_context
from llama_index import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource(service_context):
logger.info("Creating new index")
# load the documents and create the index
documents = SimpleDirectoryReader(DATA_DIR).load_data()
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings in the MongoDB collection {os.environ['MONGODB_VECTORS']}"
)
logger.info(
"""IMPORTANT: You can't query your index yet because you need to create a vector search index in MongoDB's UI now.
See https://github.com/run-llama/mongodb-demo/tree/main?tab=readme-ov-file#create-a-vector-search-index"""
)
if __name__ == "__main__":
generate_datasource(create_service_context())
import logging
import os
from llama_index import (
VectorStoreIndex,
)
from llama_index.vector_stores import MongoDBAtlasVectorSearch
from app.engine.context import create_service_context
def get_chat_engine():
service_context = create_service_context()
logger = logging.getLogger("uvicorn")
logger.info("Connecting to index from MongoDB...")
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
index = VectorStoreIndex.from_vector_store(store, service_context)
logger.info("Finished connecting to index from MongoDB.")
return index.as_chat_engine(similarity_top_k=5)
......@@ -11,7 +11,7 @@ import { STORAGE_DIR, checkRequiredEnvVars } from "./shared.mjs";
dotenv.config();
const mongoUri = process.env.MONGODB_URI;
const mongoUri = process.env.MONGO_URI;
const databaseName = process.env.MONGODB_DATABASE;
const vectorCollectionName = process.env.MONGODB_VECTORS;
const indexName = process.env.MONGODB_VECTOR_INDEX;
......
......@@ -11,7 +11,7 @@ import { checkRequiredEnvVars, CHUNK_OVERLAP, CHUNK_SIZE } from "./shared.mjs";
async function getDataSource(llm: LLM) {
checkRequiredEnvVars();
const client = new MongoClient(process.env.MONGODB_URI!);
const client = new MongoClient(process.env.MONGO_URI!);
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
......
......@@ -3,7 +3,7 @@ export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [
"MONGODB_URI",
"MONGO_URI",
"MONGODB_DATABASE",
"MONGODB_VECTORS",
"MONGODB_VECTOR_INDEX",
......
......@@ -2,20 +2,20 @@ import { copy } from "../helpers/copy";
import { callPackageManager } from "../helpers/install";
import fs from "fs/promises";
import os from "os";
import path from "path";
import { bold, cyan } from "picocolors";
import { version } from "../../core/package.json";
import { cyan } from "picocolors";
import { COMMUNITY_OWNER, COMMUNITY_REPO } from "../helpers/constant";
import { PackageManager } from "../helpers/get-pkg-manager";
import { downloadAndExtractRepo } from "../helpers/repo";
import { installPythonTemplate } from "./python";
import {
InstallTemplateArgs,
TemplateEngine,
TemplateFramework,
TemplateVectorDB,
} from "./types";
import { installTSTemplate } from "./typescript";
const createEnvLocalFile = async (
root: string,
......@@ -42,7 +42,8 @@ const createEnvLocalFile = async (
switch (opts?.vectorDb) {
case "mongo": {
content += `MONGODB_URI=\n`;
content += `# For generating a connection URI, see https://www.mongodb.com/docs/guides/atlas/connection-string\n`;
content += `MONGO_URI=\n`;
content += `MONGODB_DATABASE=\n`;
content += `MONGODB_VECTORS=\n`;
content += `MONGODB_VECTOR_INDEX=\n`;
......@@ -101,259 +102,6 @@ const copyTestData = async (
}
};
const rename = (name: string) => {
switch (name) {
case "gitignore":
case "eslintrc.json": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
};
/**
* Install a LlamaIndex internal template to a given `root` directory.
*/
const installTSTemplate = async ({
appName,
root,
packageManager,
isOnline,
template,
framework,
engine,
ui,
eslint,
customApiPath,
forBackend,
model,
vectorDb,
}: InstallTemplateArgs) => {
console.log(bold(`Using ${packageManager}.`));
/**
* Copy the template files to the target directory.
*/
console.log("\nInitializing project with template:", template, "\n");
const templatePath = path.join(__dirname, "types", template, framework);
const copySource = ["**"];
if (!eslint) copySource.push("!eslintrc.json");
await copy(copySource, root, {
parents: true,
cwd: templatePath,
rename,
});
/**
* If the backend is next.js, rename next.config.app.js to next.config.js
* If not, rename next.config.static.js to next.config.js
*/
if (framework == "nextjs" && forBackend === "nextjs") {
const nextConfigAppPath = path.join(root, "next.config.app.js");
const nextConfigPath = path.join(root, "next.config.js");
await fs.rename(nextConfigAppPath, nextConfigPath);
// delete next.config.static.js
const nextConfigStaticPath = path.join(root, "next.config.static.js");
await fs.rm(nextConfigStaticPath);
} else if (framework == "nextjs" && typeof forBackend === "undefined") {
const nextConfigStaticPath = path.join(root, "next.config.static.js");
const nextConfigPath = path.join(root, "next.config.js");
await fs.rename(nextConfigStaticPath, nextConfigPath);
// delete next.config.app.js
const nextConfigAppPath = path.join(root, "next.config.app.js");
await fs.rm(nextConfigAppPath);
}
/**
* Copy the selected chat engine files to the target directory and reference it.
*/
let relativeEngineDestPath;
const compPath = path.join(__dirname, "components");
if (engine && (framework === "express" || framework === "nextjs")) {
console.log("\nUsing chat engine:", engine, "\n");
let vectorDBFolder: string = engine;
if (engine !== "simple" && vectorDb) {
console.log("\nUsing vector DB:", vectorDb, "\n");
vectorDBFolder = vectorDb;
}
const VectorDBPath = path.join(
compPath,
"vectordbs",
"typescript",
vectorDBFolder,
);
relativeEngineDestPath =
framework === "nextjs"
? path.join("app", "api", "chat")
: path.join("src", "controllers");
await copy("**", path.join(root, relativeEngineDestPath, "engine"), {
parents: true,
cwd: VectorDBPath,
});
}
/**
* Copy the selected UI files to the target directory and reference it.
*/
if (framework === "nextjs" && ui !== "shadcn") {
console.log("\nUsing UI:", ui, "\n");
const uiPath = path.join(compPath, "ui", ui);
const destUiPath = path.join(root, "app", "components", "ui");
// remove the default ui folder
await fs.rm(destUiPath, { recursive: true });
// copy the selected ui folder
await copy("**", destUiPath, {
parents: true,
cwd: uiPath,
rename,
});
}
/**
* Update the package.json scripts.
*/
const packageJsonFile = path.join(root, "package.json");
const packageJson: any = JSON.parse(
await fs.readFile(packageJsonFile, "utf8"),
);
packageJson.name = appName;
packageJson.version = "0.1.0";
packageJson.dependencies = {
...packageJson.dependencies,
llamaindex: version,
};
if (framework === "nextjs" && customApiPath) {
console.log(
"\nUsing external API with custom API path:",
customApiPath,
"\n",
);
// remove the default api folder
const apiPath = path.join(root, "app", "api");
await fs.rm(apiPath, { recursive: true });
// modify the dev script to use the custom api path
packageJson.scripts = {
...packageJson.scripts,
dev: `cross-env NEXT_PUBLIC_CHAT_API=${customApiPath} next dev`,
};
}
if (engine === "context" && relativeEngineDestPath) {
// add generate script if using context engine
packageJson.scripts = {
...packageJson.scripts,
generate: `node ${path.join(
relativeEngineDestPath,
"engine",
"generate.mjs",
)}`,
};
}
if (framework === "nextjs" && ui === "html") {
// remove shadcn dependencies if html ui is selected
packageJson.dependencies = {
...packageJson.dependencies,
"tailwind-merge": undefined,
"@radix-ui/react-slot": undefined,
"class-variance-authority": undefined,
clsx: undefined,
"lucide-react": undefined,
remark: undefined,
"remark-code-import": undefined,
"remark-gfm": undefined,
"remark-math": undefined,
"react-markdown": undefined,
"react-syntax-highlighter": undefined,
};
packageJson.devDependencies = {
...packageJson.devDependencies,
"@types/react-syntax-highlighter": undefined,
};
}
if (!eslint) {
// Remove packages starting with "eslint" from devDependencies
packageJson.devDependencies = Object.fromEntries(
Object.entries(packageJson.devDependencies).filter(
([key]) => !key.startsWith("eslint"),
),
);
}
await fs.writeFile(
packageJsonFile,
JSON.stringify(packageJson, null, 2) + os.EOL,
);
console.log("\nInstalling dependencies:");
for (const dependency in packageJson.dependencies)
console.log(`- ${cyan(dependency)}`);
console.log("\nInstalling devDependencies:");
for (const dependency in packageJson.devDependencies)
console.log(`- ${cyan(dependency)}`);
console.log();
await callPackageManager(packageManager, isOnline);
};
const installPythonTemplate = async ({
root,
template,
framework,
engine,
}: Pick<InstallTemplateArgs, "root" | "framework" | "template" | "engine">) => {
console.log("\nInitializing Python project with template:", template, "\n");
const templatePath = path.join(__dirname, "types", template, framework);
await copy("**", root, {
parents: true,
cwd: templatePath,
rename(name) {
switch (name) {
case "gitignore": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
},
});
if (engine === "context") {
const compPath = path.join(__dirname, "components");
const VectorDBPath = path.join(compPath, "vectordbs", "python", "none");
await copy("**", path.join(root, "app", "engine"), {
parents: true,
cwd: VectorDBPath,
});
}
console.log(
"\nPython project, dependencies won't be installed automatically.\n",
);
};
const installCommunityProject = async ({
root,
communityProjectPath,
......
import fs from "fs/promises";
import path from "path";
import { cyan } from "picocolors";
import { parse, stringify } from "smol-toml";
import { copy } from "../helpers/copy";
import { InstallTemplateArgs, TemplateVectorDB } from "./types";
interface Dependency {
name: string;
version: string;
}
const getAdditionalDependencies = (vectorDb?: TemplateVectorDB) => {
const dependencies: Dependency[] = [];
switch (vectorDb) {
case "mongo": {
dependencies.push({
name: "pymongo",
version: "^4.6.1",
});
break;
}
}
return dependencies;
};
const addDependencies = async (
projectDir: string,
dependencies: Dependency[],
) => {
if (dependencies.length === 0) return;
const FILENAME = "pyproject.toml";
try {
// Parse toml file
const file = path.join(projectDir, FILENAME);
const fileContent = await fs.readFile(file, "utf8");
const fileParsed = parse(fileContent);
// Modify toml dependencies
const tool = fileParsed.tool as any;
const dependencies = tool.poetry.dependencies as any;
for (const dependency of dependencies) {
dependencies[dependency.name] = dependency.version;
}
// Write toml file
const newFileContent = stringify(fileParsed);
await fs.writeFile(file, newFileContent);
const dependenciesString = dependencies
.map((d: Dependency) => d.name)
.join(", ");
console.log(`\nAdded ${dependenciesString} to ${cyan(FILENAME)}\n`);
} catch (error) {
console.log(
`Error while updating dependencies for Poetry project file ${FILENAME}\n`,
error,
);
console.log(error);
}
};
export const installPythonTemplate = async ({
root,
template,
framework,
engine,
vectorDb,
}: Pick<
InstallTemplateArgs,
"root" | "framework" | "template" | "engine" | "vectorDb"
>) => {
console.log("\nInitializing Python project with template:", template, "\n");
const templatePath = path.join(__dirname, "types", template, framework);
await copy("**", root, {
parents: true,
cwd: templatePath,
rename(name) {
switch (name) {
case "gitignore": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
},
});
if (engine === "context") {
const compPath = path.join(__dirname, "components");
const VectorDBPath = path.join(
compPath,
"vectordbs",
"python",
vectorDb || "none",
);
await copy("**", path.join(root, "app", "engine"), {
parents: true,
cwd: VectorDBPath,
});
}
const addOnDependencies = getAdditionalDependencies(vectorDb);
await addDependencies(root, addOnDependencies);
console.log(
"\nPython project, dependencies won't be installed automatically.\n",
);
};
import fs from "fs/promises";
import os from "os";
import path from "path";
import { bold, cyan } from "picocolors";
import { version } from "../../core/package.json";
import { copy } from "../helpers/copy";
import { callPackageManager } from "../helpers/install";
import { InstallTemplateArgs } from "./types";
const rename = (name: string) => {
switch (name) {
case "gitignore":
case "eslintrc.json": {
return `.${name}`;
}
// README.md is ignored by webpack-asset-relocator-loader used by ncc:
// https://github.com/vercel/webpack-asset-relocator-loader/blob/e9308683d47ff507253e37c9bcbb99474603192b/src/asset-relocator.js#L227
case "README-template.md": {
return "README.md";
}
default: {
return name;
}
}
};
/**
* Install a LlamaIndex internal template to a given `root` directory.
*/
export const installTSTemplate = async ({
appName,
root,
packageManager,
isOnline,
template,
framework,
engine,
ui,
eslint,
customApiPath,
forBackend,
vectorDb,
}: InstallTemplateArgs) => {
console.log(bold(`Using ${packageManager}.`));
/**
* Copy the template files to the target directory.
*/
console.log("\nInitializing project with template:", template, "\n");
const templatePath = path.join(__dirname, "types", template, framework);
const copySource = ["**"];
if (!eslint) copySource.push("!eslintrc.json");
await copy(copySource, root, {
parents: true,
cwd: templatePath,
rename,
});
/**
* If the backend is next.js, rename next.config.app.js to next.config.js
* If not, rename next.config.static.js to next.config.js
*/
if (framework == "nextjs" && forBackend === "nextjs") {
const nextConfigAppPath = path.join(root, "next.config.app.js");
const nextConfigPath = path.join(root, "next.config.js");
await fs.rename(nextConfigAppPath, nextConfigPath);
// delete next.config.static.js
const nextConfigStaticPath = path.join(root, "next.config.static.js");
await fs.rm(nextConfigStaticPath);
} else if (framework == "nextjs" && typeof forBackend === "undefined") {
const nextConfigStaticPath = path.join(root, "next.config.static.js");
const nextConfigPath = path.join(root, "next.config.js");
await fs.rename(nextConfigStaticPath, nextConfigPath);
// delete next.config.app.js
const nextConfigAppPath = path.join(root, "next.config.app.js");
await fs.rm(nextConfigAppPath);
}
/**
* Copy the selected chat engine files to the target directory and reference it.
*/
let relativeEngineDestPath;
const compPath = path.join(__dirname, "components");
if (engine && (framework === "express" || framework === "nextjs")) {
console.log("\nUsing chat engine:", engine, "\n");
let vectorDBFolder: string = engine;
if (engine !== "simple" && vectorDb) {
console.log("\nUsing vector DB:", vectorDb, "\n");
vectorDBFolder = vectorDb;
}
const VectorDBPath = path.join(
compPath,
"vectordbs",
"typescript",
vectorDBFolder,
);
relativeEngineDestPath =
framework === "nextjs"
? path.join("app", "api", "chat")
: path.join("src", "controllers");
await copy("**", path.join(root, relativeEngineDestPath, "engine"), {
parents: true,
cwd: VectorDBPath,
});
}
/**
* Copy the selected UI files to the target directory and reference it.
*/
if (framework === "nextjs" && ui !== "shadcn") {
console.log("\nUsing UI:", ui, "\n");
const uiPath = path.join(compPath, "ui", ui);
const destUiPath = path.join(root, "app", "components", "ui");
// remove the default ui folder
await fs.rm(destUiPath, { recursive: true });
// copy the selected ui folder
await copy("**", destUiPath, {
parents: true,
cwd: uiPath,
rename,
});
}
/**
* Update the package.json scripts.
*/
const packageJsonFile = path.join(root, "package.json");
const packageJson: any = JSON.parse(
await fs.readFile(packageJsonFile, "utf8"),
);
packageJson.name = appName;
packageJson.version = "0.1.0";
packageJson.dependencies = {
...packageJson.dependencies,
llamaindex: version,
};
if (framework === "nextjs" && customApiPath) {
console.log(
"\nUsing external API with custom API path:",
customApiPath,
"\n",
);
// remove the default api folder
const apiPath = path.join(root, "app", "api");
await fs.rm(apiPath, { recursive: true });
// modify the dev script to use the custom api path
packageJson.scripts = {
...packageJson.scripts,
dev: `cross-env NEXT_PUBLIC_CHAT_API=${customApiPath} next dev`,
};
}
if (engine === "context" && relativeEngineDestPath) {
// add generate script if using context engine
packageJson.scripts = {
...packageJson.scripts,
generate: `node ${path.join(
relativeEngineDestPath,
"engine",
"generate.mjs",
)}`,
};
}
if (framework === "nextjs" && ui === "html") {
// remove shadcn dependencies if html ui is selected
packageJson.dependencies = {
...packageJson.dependencies,
"tailwind-merge": undefined,
"@radix-ui/react-slot": undefined,
"class-variance-authority": undefined,
clsx: undefined,
"lucide-react": undefined,
remark: undefined,
"remark-code-import": undefined,
"remark-gfm": undefined,
"remark-math": undefined,
"react-markdown": undefined,
"react-syntax-highlighter": undefined,
};
packageJson.devDependencies = {
...packageJson.devDependencies,
"@types/react-syntax-highlighter": undefined,
};
}
if (!eslint) {
// Remove packages starting with "eslint" from devDependencies
packageJson.devDependencies = Object.fromEntries(
Object.entries(packageJson.devDependencies).filter(
([key]) => !key.startsWith("eslint"),
),
);
}
await fs.writeFile(
packageJsonFile,
JSON.stringify(packageJson, null, 2) + os.EOL,
);
console.log("\nInstalling dependencies:");
for (const dependency in packageJson.dependencies)
console.log(`- ${cyan(dependency)}`);
console.log("\nInstalling devDependencies:");
for (const dependency in packageJson.devDependencies)
console.log(`- ${cyan(dependency)}`);
console.log();
await callPackageManager(packageManager, isOnline);
};
......@@ -312,6 +312,9 @@ importers:
rimraf:
specifier: ^5.0.5
version: 5.0.5
smol-toml:
specifier: ^1.1.3
version: 1.1.3
tar:
specifier: 6.1.15
version: 6.1.15
......@@ -14478,6 +14481,11 @@ packages:
yargs: 15.4.1
dev: true
 
/smol-toml@1.1.3:
resolution: {integrity: sha512-qTyy6Owjho1ISBmxj4HdrFWB2kMQ5RczU6J04OqslSfdSH656OIHuomHS4ZDvhwm37nig/uXyiTMJxlC9zIVfw==}
engines: {node: '>= 18', pnpm: '>= 8'}
dev: true
/snake-case@2.1.0:
resolution: {integrity: sha512-FMR5YoPFwOLuh4rRz92dywJjyKYZNLpMn1R5ujVpIYkbA9p01fq8RMg0FkO4M+Yobt4MjHeLTJVm5xFFBHSV2Q==}
dependencies:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment