diff --git a/helpers/index.ts b/helpers/index.ts index 60aec506218a5f4c975f46ddd8890191d8a13548..6a2173468b4bec8aaf8890890a651b34728e93af 100644 --- a/helpers/index.ts +++ b/helpers/index.ts @@ -84,13 +84,15 @@ const copyContextData = async ( // Copy file if (dataSource?.type === "file") { - if (dataSourceConfig.path) { - console.log(`\nCopying file to ${cyan(destPath)}\n`); + if (dataSourceConfig.paths) { await fs.mkdir(destPath, { recursive: true }); - await fs.copyFile( - dataSourceConfig.path, - path.join(destPath, path.basename(dataSourceConfig.path)), + console.log( + "Copying data from files:", + dataSourceConfig.paths.toString(), ); + for (const p of dataSourceConfig.paths) { + await fs.copyFile(p, path.join(destPath, path.basename(p))); + } } else { console.log("Missing file path in config"); process.exit(1); @@ -100,13 +102,20 @@ const copyContextData = async ( // Copy folder if (dataSource?.type === "folder") { - const srcPath = - dataSourceConfig.path ?? path.join(templatesDir, "components", "data"); - console.log(`\nCopying data to ${cyan(destPath)}\n`); - await copy("**", destPath, { - parents: true, - cwd: srcPath, - }); + // Example data does not have path config, set the default path + const srcPaths = dataSourceConfig.paths ?? [ + path.join(templatesDir, "components", "data"), + ]; + console.log("Copying data from folders: ", srcPaths); + for (const p of srcPaths) { + const folderName = path.basename(p); + const destFolderPath = path.join(destPath, folderName); + await fs.mkdir(destFolderPath, { recursive: true }); + await copy("**", destFolderPath, { + parents: true, + cwd: p, + }); + } return; } }; diff --git a/helpers/types.ts b/helpers/types.ts index bcc969e685eb56c6b1f730ca8147f43ee4322c70..f8db9a28d76e29356bdbaa4ac43099d25cf77aa3 100644 --- a/helpers/types.ts +++ b/helpers/types.ts @@ -19,7 +19,7 @@ export type TemplateDataSourceType = "none" | "file" | "folder" | "web"; export type TemplateObservability = "none" | "opentelemetry"; // Config for both file and folder export type FileSourceConfig = { - path?: string; + paths?: string[]; useLlamaParse?: boolean; }; export type WebSourceConfig = { diff --git a/questions.ts b/questions.ts index 3134c31515b20d5ee24c441e8c86e02a8f133232..bbf9322af8b680d749a5f8f672378a27b6b3a689 100644 --- a/questions.ts +++ b/questions.ts @@ -40,21 +40,22 @@ const MACOS_FILE_SELECTION_SCRIPT = ` osascript -l JavaScript -e ' a = Application.currentApplication(); a.includeStandardAdditions = true; - a.chooseFile({ withPrompt: "Please select a file to process:" }).toString() + a.chooseFile({ withPrompt: "Please select files to process:", multipleSelectionsAllowed: true }).map(file => file.toString()) '`; const MACOS_FOLDER_SELECTION_SCRIPT = ` osascript -l JavaScript -e ' a = Application.currentApplication(); a.includeStandardAdditions = true; - a.chooseFolder({ withPrompt: "Please select a folder to process:" }).toString() + a.chooseFolder({ withPrompt: "Please select folders to process:", multipleSelectionsAllowed: true }).map(folder => folder.toString()) '`; const WINDOWS_FILE_SELECTION_SCRIPT = ` Add-Type -AssemblyName System.Windows.Forms $openFileDialog = New-Object System.Windows.Forms.OpenFileDialog $openFileDialog.InitialDirectory = [Environment]::GetFolderPath('Desktop') +$openFileDialog.Multiselect = $true $result = $openFileDialog.ShowDialog() if ($result -eq 'OK') { - $openFileDialog.FileName + $openFileDialog.FileNames } `; const WINDOWS_FOLDER_SELECTION_SCRIPT = ` @@ -132,11 +133,14 @@ const getDataSourceChoices = (framework: TemplateFramework) => { ]; if (process.platform === "win32" || process.platform === "darwin") { choices.push({ - title: `Use a local file (${supportedContextFileTypes.join(", ")})`, + title: `Use local files (${supportedContextFileTypes.join(", ")})`, value: "localFile", }); choices.push({ - title: `Use a local folder`, + title: + process.platform === "win32" + ? "Use a local folder" + : "Use local folders", value: "localFolder", }); } @@ -173,9 +177,15 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => { process.exit(1); } selectedPath = execSync(execScript, execOpts).toString().trim(); - if (type === "file") { - const fileType = path.extname(selectedPath); - if (!supportedContextFileTypes.includes(fileType)) { + const paths = + process.platform === "win32" + ? selectedPath.split("\r\n") + : selectedPath.split(", "); + for (const p of paths) { + if ( + type == "file" && + !supportedContextFileTypes.includes(path.extname(p)) + ) { console.log( red( `Please select a supported file type: ${supportedContextFileTypes}`, @@ -184,7 +194,7 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => { process.exit(1); } } - return selectedPath; + return paths; } catch (error) { console.log( red( @@ -617,7 +627,7 @@ export const askQuestions = async ( program.dataSource = { type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file", config: { - path: program.files, + paths: program.files.split(","), }, }; } @@ -655,7 +665,7 @@ export const askQuestions = async ( program.dataSource = { type: "file", config: { - path: await selectLocalContextData("file"), + paths: await selectLocalContextData("file"), }, }; break; @@ -664,7 +674,7 @@ export const askQuestions = async ( program.dataSource = { type: "folder", config: { - path: await selectLocalContextData("folder"), + paths: await selectLocalContextData("folder"), }, }; break; @@ -703,10 +713,9 @@ export const askQuestions = async ( // Is pdf file selected as data source or is it a folder data source const askingLlamaParse = dataSourceConfig.useLlamaParse === undefined && - (program.dataSource.type === "folder" - ? true - : dataSourceConfig.path && - path.extname(dataSourceConfig.path) === ".pdf"); + (program.dataSource.type === "folder" || + (program.dataSource.type === "file" && + dataSourceConfig.paths?.some((p) => path.extname(p) === ".pdf"))); // Ask if user wants to use LlamaParse if (askingLlamaParse) { diff --git a/templates/components/loaders/python/file/loader.py b/templates/components/loaders/python/file/loader.py index c1178444e255602c994b0ce45e84ca7baa61b404..40923709f247cb309715aa5af75b464022b5359f 100644 --- a/templates/components/loaders/python/file/loader.py +++ b/templates/components/loaders/python/file/loader.py @@ -4,4 +4,7 @@ DATA_DIR = "data" # directory containing the documents def get_documents(): - return SimpleDirectoryReader(DATA_DIR).load_data() + return SimpleDirectoryReader( + DATA_DIR, + recursive=True, + ).load_data() diff --git a/templates/components/loaders/python/llama_parse/loader.py b/templates/components/loaders/python/llama_parse/loader.py index 4c9fbf109f32918d70c06d08e0a671c5377b4f0b..efaf342139b8f318939242fa74344a7df9fc0761 100644 --- a/templates/components/loaders/python/llama_parse/loader.py +++ b/templates/components/loaders/python/llama_parse/loader.py @@ -13,5 +13,7 @@ def get_documents(): ) parser = LlamaParse(result_type="markdown", verbose=True, language="en") - reader = SimpleDirectoryReader(DATA_DIR, file_extractor={".pdf": parser}) + reader = SimpleDirectoryReader( + DATA_DIR, recursive=True, file_extractor={".pdf": parser} + ) return reader.load_data()