diff --git a/packages/create-llama/create-app.ts b/packages/create-llama/create-app.ts index 6c684d9decf6718a91970f7cbedf333545af6f65..ad3343d30f3ce5ed5e8644bd2c4b35932453320e 100644 --- a/packages/create-llama/create-app.ts +++ b/packages/create-llama/create-app.ts @@ -36,7 +36,7 @@ export async function createApp({ vectorDb, externalPort, postInstallAction, - dataSource, + contextFile, }: InstallAppArgs): Promise<void> { const root = path.resolve(appPath); @@ -80,7 +80,7 @@ export async function createApp({ vectorDb, externalPort, postInstallAction, - dataSource, + contextFile, }; if (frontend) { diff --git a/packages/create-llama/helpers/index.ts b/packages/create-llama/helpers/index.ts index ee3d5a94cfacfd1c05317f6d2440988b1556ed4f..40c1b4eb92fe9356b51474618eb6383f678527ae 100644 --- a/packages/create-llama/helpers/index.ts +++ b/packages/create-llama/helpers/index.ts @@ -13,10 +13,9 @@ import { installPythonTemplate } from "./python"; import { downloadAndExtractRepo } from "./repo"; import { InstallTemplateArgs, - TemplateDataSource, + TemplateEngine, TemplateFramework, TemplateVectorDB, - WebSourceConfig, } from "./types"; import { installTSTemplate } from "./typescript"; @@ -27,7 +26,6 @@ const createEnvLocalFile = async ( vectorDb?: TemplateVectorDB; model?: string; framework?: TemplateFramework; - dataSource?: TemplateDataSource; }, ) => { const envFileName = ".env"; @@ -60,30 +58,48 @@ const createEnvLocalFile = async ( } } - switch (opts?.dataSource?.type) { - case "web": { - let webConfig = opts?.dataSource.config as WebSourceConfig; - content += `# web loader config\n`; - content += `BASE_URL=${webConfig.baseUrl}\n`; - content += `URL_PREFIX=${webConfig.baseUrl}\n`; - content += `MAX_DEPTH=${webConfig.depth}\n`; - break; - } - } - if (content) { await fs.writeFile(path.join(root, envFileName), content); console.log(`Created '${envFileName}' file. Please check the settings.`); } }; -const installDependencies = async ( +const copyTestData = async ( + root: string, framework: TemplateFramework, packageManager?: PackageManager, + engine?: TemplateEngine, openAiKey?: string, vectorDb?: TemplateVectorDB, + contextFile?: string, + // eslint-disable-next-line max-params ) => { - if (packageManager) { + if (engine === "context") { + const destPath = path.join(root, "data"); + if (contextFile) { + console.log(`\nCopying provided file to ${cyan(destPath)}\n`); + await fs.mkdir(destPath, { recursive: true }); + await fs.copyFile( + contextFile, + path.join(destPath, path.basename(contextFile)), + ); + } else { + const srcPath = path.join( + __dirname, + "..", + "templates", + "components", + "data", + ); + console.log(`\nCopying test data to ${cyan(destPath)}\n`); + await copy("**", destPath, { + parents: true, + cwd: srcPath, + }); + } + } + + if (packageManager && engine === "context") { const runGenerate = `${cyan( framework === "fastapi" ? "poetry run python app/engine/generate.py" @@ -115,31 +131,6 @@ const installDependencies = async ( } }; -const copyTestData = async (root: string, contextFile?: string) => { - const destPath = path.join(root, "data"); - if (contextFile) { - console.log(`\nCopying provided file to ${cyan(destPath)}\n`); - await fs.mkdir(destPath, { recursive: true }); - await fs.copyFile( - contextFile, - path.join(destPath, path.basename(contextFile)), - ); - } else { - const srcPath = path.join( - __dirname, - "..", - "templates", - "components", - "data", - ); - console.log(`\nCopying test data to ${cyan(destPath)}\n`); - await copy("**", destPath, { - parents: true, - cwd: srcPath, - }); - } -}; - const installCommunityProject = async ({ root, communityProjectPath, @@ -183,21 +174,18 @@ export const installTemplate = async ( vectorDb: props.vectorDb, model: props.model, framework: props.framework, - dataSource: props.dataSource, }); - if (props.engine === "context") { - if (props.dataSource?.type === "file") { - // Copy test pdf file - await copyTestData(props.root, props.framework); - } - installDependencies( - props.framework, - props.packageManager, - props.openAiKey, - props.vectorDb, - ); - } + // Copy test pdf file + await copyTestData( + props.root, + props.framework, + props.packageManager, + props.engine, + props.openAiKey, + props.vectorDb, + props.contextFile, + ); } else { // this is a frontend for a full-stack app, create .env file with model information const content = `MODEL=${props.model}\nNEXT_PUBLIC_MODEL=${props.model}\n`; diff --git a/packages/create-llama/helpers/python.ts b/packages/create-llama/helpers/python.ts index 8b0d717029c2467790d080bc1cdf47448b80397b..015e2c269033eb3a4afed8e03428cc6959a32503 100644 --- a/packages/create-llama/helpers/python.ts +++ b/packages/create-llama/helpers/python.ts @@ -126,7 +126,6 @@ export const installPythonTemplate = async ({ framework, engine, vectorDb, - dataSource, postInstallAction, }: Pick< InstallTemplateArgs, @@ -135,7 +134,6 @@ export const installPythonTemplate = async ({ | "template" | "engine" | "vectorDb" - | "dataSource" | "postInstallAction" >) => { console.log("\nInitializing Python project with template:", template, "\n"); @@ -175,24 +173,10 @@ export const installPythonTemplate = async ({ "python", vectorDb || "none", ); - const enginePath = path.join(root, "app", "engine"); - await copy("**", path.join(root, "app", "engine"), { parents: true, cwd: VectorDBPath, }); - if (dataSource?.type !== "none" && dataSource?.type !== undefined) { - const loaderPath = path.join( - compPath, - "loaders", - "python", - dataSource.type, - ); - await copy("**", enginePath, { - parents: true, - cwd: loaderPath, - }); - } } const addOnDependencies = getAdditionalDependencies(vectorDb); diff --git a/packages/create-llama/helpers/types.ts b/packages/create-llama/helpers/types.ts index e26608609e3be1402f4454f00640bc127e133b0d..d7e6e92adc6b0fc7d8dd703f80c711a496d1172f 100644 --- a/packages/create-llama/helpers/types.ts +++ b/packages/create-llama/helpers/types.ts @@ -6,18 +6,6 @@ export type TemplateEngine = "simple" | "context"; export type TemplateUI = "html" | "shadcn"; export type TemplateVectorDB = "none" | "mongo" | "pg"; export type TemplatePostInstallAction = "none" | "dependencies" | "runApp"; -export type TemplateDataSource = { - type: "none" | "file" | "web"; - config: TemplateDataSourceConfig; -}; -export type FileSourceConfig = { - contextFile?: string; -}; -export type WebSourceConfig = { - baseUrl?: string; - depth?: number; -}; -export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig; export interface InstallTemplateArgs { appName: string; @@ -27,8 +15,8 @@ export interface InstallTemplateArgs { template: TemplateType; framework: TemplateFramework; engine: TemplateEngine; + contextFile?: string; ui: TemplateUI; - dataSource?: TemplateDataSource; eslint: boolean; customApiPath?: string; openAiKey?: string; diff --git a/packages/create-llama/index.ts b/packages/create-llama/index.ts index 81b64d62239edd3248409944fa8ad352e6a801f3..5978dd37af7daa6b8412481904add75165776e13 100644 --- a/packages/create-llama/index.ts +++ b/packages/create-llama/index.ts @@ -241,7 +241,7 @@ async function run(): Promise<void> { vectorDb: program.vectorDb, externalPort: program.externalPort, postInstallAction: program.postInstallAction, - dataSource: program.dataSource, + contextFile: program.contextFile, }); conf.set("preferences", preferences); diff --git a/packages/create-llama/questions.ts b/packages/create-llama/questions.ts index 36d07cc69c2fb8b10f39baa23feb8ec1d6e6edac..531a8642ed80151347f4994b8cd4b436cf71f335 100644 --- a/packages/create-llama/questions.ts +++ b/packages/create-llama/questions.ts @@ -40,10 +40,6 @@ const defaults: QuestionArgs = { communityProjectPath: "", llamapack: "", postInstallAction: "dependencies", - dataSource: { - type: "none", - config: {}, - }, }; const handlers = { @@ -382,9 +378,6 @@ export const askQuestions = async ( if (process.platform === "win32" || process.platform === "darwin") { choices.push({ title: "Use a local PDF file", value: "localFile" }); } - if (program.framework === "fastapi") { - choices.push({ title: "Use website content", value: "web" }); - } const { dataSource } = await prompts( { @@ -396,47 +389,20 @@ export const askQuestions = async ( }, handlers, ); - // Initialize with default config - program.dataSource = getPrefOrDefault("dataSource"); - if (program.dataSource) { - switch (dataSource) { - case "simple": - program.engine = "simple"; - break; - case "exampleFile": - program.engine = "context"; - break; - case "localFile": - program.engine = "context"; - program.dataSource.type = "file"; - // If the user selected the "pdf" option, ask them to select a file - program.dataSource.config = { - contextFile: await selectPDFFile(), - }; - break; - case "web": - program.engine = "context"; - program.dataSource.type = "web"; - break; - } + switch (dataSource) { + case "simple": + program.engine = "simple"; + break; + case "exampleFile": + program.engine = "context"; + break; + case "localFile": + program.engine = "context"; + // If the user selected the "pdf" option, ask them to select a file + program.contextFile = await selectPDFFile(); + break; } } - - if (program.dataSource?.type === "web" && program.framework === "fastapi") { - const { baseUrl } = await prompts( - { - type: "text", - name: "baseUrl", - message: "Please provide base URL of the website:", - initial: "https://ts.llamaindex.ai/modules/", - }, - handlers, - ); - program.dataSource.config = { - baseUrl: baseUrl, - depth: 2, - }; - } if (program.engine !== "simple" && !program.vectorDb) { if (ciInfo.isCI) { program.vectorDb = getPrefOrDefault("vectorDb"); diff --git a/packages/create-llama/templates/components/loaders/python/file/loader.py b/packages/create-llama/templates/components/loaders/python/file/loader.py deleted file mode 100644 index d343cec34a60f4c3004d9c00c53f7f3b7734bf82..0000000000000000000000000000000000000000 --- a/packages/create-llama/templates/components/loaders/python/file/loader.py +++ /dev/null @@ -1,8 +0,0 @@ -import os -from app.engine.constants import DATA_DIR -from llama_index import VectorStoreIndex, download_loader -from llama_index import SimpleDirectoryReader - - -def get_documents(): - return SimpleDirectoryReader(DATA_DIR).load_data() diff --git a/packages/create-llama/templates/components/loaders/python/web/loader.py b/packages/create-llama/templates/components/loaders/python/web/loader.py deleted file mode 100644 index 026dd1019e555d438b896a2cf9a7e7161fa1d189..0000000000000000000000000000000000000000 --- a/packages/create-llama/templates/components/loaders/python/web/loader.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -from llama_index import VectorStoreIndex, download_loader - - -def get_documents(): - WholeSiteReader = download_loader("WholeSiteReader") - - # Initialize the scraper with a prefix URL and maximum depth - scraper = WholeSiteReader( - prefix=os.environ.get("URL_PREFIX"), max_depth=int(os.environ.get("MAX_DEPTH")) - ) - # Start scraping from a base URL - documents = scraper.load_data(base_url=os.environ.get("BASE_URL")) - - return documents diff --git a/packages/create-llama/templates/components/vectordbs/python/mongo/generate.py b/packages/create-llama/templates/components/vectordbs/python/mongo/generate.py index f52b3b48b1fdecd2102b172d1a85d356a2b2338c..fe0ee9aa016d5adab9d95be06f2e0708f8aa903d 100644 --- a/packages/create-llama/templates/components/vectordbs/python/mongo/generate.py +++ b/packages/create-llama/templates/components/vectordbs/python/mongo/generate.py @@ -7,7 +7,6 @@ from llama_index.vector_stores import MongoDBAtlasVectorSearch from app.engine.constants import DATA_DIR from app.engine.context import create_service_context -from app.engine.loader import get_documents from llama_index import ( @@ -23,7 +22,7 @@ logger = logging.getLogger() def generate_datasource(service_context): logger.info("Creating new index") # load the documents and create the index - documents = get_documents() + documents = SimpleDirectoryReader(DATA_DIR).load_data() store = MongoDBAtlasVectorSearch( db_name=os.environ["MONGODB_DATABASE"], collection_name=os.environ["MONGODB_VECTORS"], diff --git a/packages/create-llama/templates/components/vectordbs/python/none/generate.py b/packages/create-llama/templates/components/vectordbs/python/none/generate.py index 7ff20012e9fbd1d3189ccf9527ef928ec1a85a12..3c4cd6a9e310f3f2e2f7e4709e94b9073282f151 100644 --- a/packages/create-llama/templates/components/vectordbs/python/none/generate.py +++ b/packages/create-llama/templates/components/vectordbs/python/none/generate.py @@ -4,7 +4,6 @@ from dotenv import load_dotenv from app.engine.constants import DATA_DIR, STORAGE_DIR from app.engine.context import create_service_context -from app.engine.loader import get_documents load_dotenv() @@ -20,7 +19,7 @@ logger = logging.getLogger() def generate_datasource(service_context): logger.info("Creating new index") # load the documents and create the index - documents = get_documents() + documents = SimpleDirectoryReader(DATA_DIR).load_data() index = VectorStoreIndex.from_documents(documents, service_context=service_context) # store it for later index.storage_context.persist(STORAGE_DIR) diff --git a/packages/create-llama/templates/components/vectordbs/python/pg/generate.py b/packages/create-llama/templates/components/vectordbs/python/pg/generate.py index 5c77ee07e910349c19bd6320c359d3885aff879b..ee07e7a4ec3f19a83e731a265e4451b68915330d 100644 --- a/packages/create-llama/templates/components/vectordbs/python/pg/generate.py +++ b/packages/create-llama/templates/components/vectordbs/python/pg/generate.py @@ -6,7 +6,6 @@ import logging from app.engine.constants import DATA_DIR from app.engine.context import create_service_context from app.engine.utils import init_pg_vector_store_from_env -from app.engine.loader import get_documents from llama_index import ( SimpleDirectoryReader, @@ -21,7 +20,7 @@ logger = logging.getLogger() def generate_datasource(service_context): logger.info("Creating new index") # load the documents and create the index - documents = get_documents() + documents = SimpleDirectoryReader(DATA_DIR).load_data() store = init_pg_vector_store_from_env() storage_context = StorageContext.from_defaults(vector_store=store) VectorStoreIndex.from_documents(