Skip to content
Snippets Groups Projects
Commit 2eaf11ba authored by Huu Le (Lee)'s avatar Huu Le (Lee) Committed by GitHub
Browse files

feat(create-llama) add folder selection & support more context data types (#489)

parent d7bcefa8
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
import { installPythonTemplate } from "./python";
import { downloadAndExtractRepo } from "./repo";
import {
FileSourceConfig,
InstallTemplateArgs,
TemplateDataSource,
TemplateFramework,
......@@ -120,28 +121,41 @@ const installDependencies = async (
}
};
const copyContextData = async (root: string, contextFile?: string) => {
const copyContextData = async (
root: string,
dataSource?: TemplateDataSource,
) => {
const destPath = path.join(root, "data");
if (contextFile) {
console.log(`\nCopying provided file to ${cyan(destPath)}\n`);
await fs.mkdir(destPath, { recursive: true });
await fs.copyFile(
contextFile,
path.join(destPath, path.basename(contextFile)),
);
} else {
const srcPath = path.join(
__dirname,
"..",
"templates",
"components",
"data",
);
console.log(`\nCopying test data to ${cyan(destPath)}\n`);
let dataSourceConfig = dataSource?.config as FileSourceConfig;
// Copy file
if (dataSource?.type === "file") {
if (dataSourceConfig.path) {
console.log(`\nCopying file to ${cyan(destPath)}\n`);
await fs.mkdir(destPath, { recursive: true });
await fs.copyFile(
dataSourceConfig.path,
path.join(destPath, path.basename(dataSourceConfig.path)),
);
} else {
console.log("Missing file path in config");
process.exit(1);
}
return;
}
// Copy folder
if (dataSource?.type === "folder") {
let srcPath =
dataSourceConfig.path ??
path.join(__dirname, "..", "templates", "components", "data");
console.log(`\nCopying data to ${cyan(destPath)}\n`);
await copy("**", destPath, {
parents: true,
cwd: srcPath,
});
return;
}
};
......@@ -192,14 +206,7 @@ export const installTemplate = async (
});
if (props.engine === "context") {
if (
props.dataSource?.type === "file" &&
"contextFile" in props.dataSource.config
) {
await copyContextData(props.root, props.dataSource.config.contextFile);
} else {
await copyContextData(props.root);
}
await copyContextData(props.root, props.dataSource);
await installDependencies(
props.framework,
props.packageManager,
......
......@@ -169,6 +169,8 @@ export const installPythonTemplate = async ({
if (engine === "context") {
const compPath = path.join(__dirname, "..", "templates", "components");
// Copy engine code
let vectorDbDirName = vectorDb ?? "none";
const VectorDBPath = path.join(
compPath,
......@@ -177,17 +179,22 @@ export const installPythonTemplate = async ({
vectorDbDirName,
);
const enginePath = path.join(root, "app", "engine");
await copy("**", path.join(root, "app", "engine"), {
parents: true,
cwd: VectorDBPath,
});
let dataSourceDir = dataSource?.type ?? "file";
const loaderPath = path.join(compPath, "loaders", "python", dataSourceDir);
await copy("**", enginePath, {
parents: true,
cwd: loaderPath,
});
const dataSourceType = dataSource?.type;
if (dataSourceType !== undefined && dataSourceType !== "none") {
let loaderPath =
dataSourceType === "folder"
? path.join(compPath, "loaders", "python", "file")
: path.join(compPath, "loaders", "python", dataSourceType);
await copy("**", enginePath, {
parents: true,
cwd: loaderPath,
});
}
}
const addOnDependencies = getAdditionalDependencies(vectorDb);
......
......@@ -7,11 +7,13 @@ export type TemplateUI = "html" | "shadcn";
export type TemplateVectorDB = "none" | "mongo" | "pg";
export type TemplatePostInstallAction = "none" | "dependencies" | "runApp";
export type TemplateDataSource = {
type: "none" | "file" | "web";
type: TemplateDataSourceType;
config: TemplateDataSourceConfig;
};
export type TemplateDataSourceType = "none" | "file" | "folder" | "web";
// Config for both file and folder
export type FileSourceConfig = {
contextFile?: string;
path?: string;
};
export type WebSourceConfig = {
baseUrl?: string;
......
......@@ -83,6 +83,13 @@ const program = new Commander.Command(packageJson.name)
`
Select a framework to bootstrap the application with.
`,
)
.option(
"--files <path>",
`
Specify the path to a local file or folder for chatting.
`,
)
.option(
......
......@@ -5,19 +5,35 @@ import path from "path";
import { blue, green, red } from "picocolors";
import prompts from "prompts";
import { InstallAppArgs } from "./create-app";
import { TemplateFramework } from "./helpers";
import { TemplateDataSourceType, TemplateFramework } from "./helpers";
import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
import { getRepoRootFolders } from "./helpers/repo";
export type QuestionArgs = Omit<InstallAppArgs, "appPath" | "packageManager">;
export type QuestionArgs = Omit<
InstallAppArgs,
"appPath" | "packageManager"
> & { files?: string };
const supportedContextFileTypes = [
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".csv",
];
const MACOS_FILE_SELECTION_SCRIPT = `
osascript -l JavaScript -e '
a = Application.currentApplication();
a.includeStandardAdditions = true;
a.chooseFile({ withPrompt: "Please select a file to process:" }).toString()
'`;
const MACOS_FOLDER_SELECTION_SCRIPT = `
osascript -l JavaScript -e '
a = Application.currentApplication();
a.includeStandardAdditions = true;
a.chooseFolder({ withPrompt: "Please select a folder to process:" }).toString()
'`;
const WINDOWS_FILE_SELECTION_SCRIPT = `
Add-Type -AssemblyName System.Windows.Forms
$openFileDialog = New-Object System.Windows.Forms.OpenFileDialog
......@@ -27,6 +43,15 @@ if ($result -eq 'OK') {
$openFileDialog.FileName
}
`;
const WINDOWS_FOLDER_SELECTION_SCRIPT = `
Add-Type -AssemblyName System.windows.forms
$folderBrowser = New-Object System.Windows.Forms.FolderBrowserDialog
$dialogResult = $folderBrowser.ShowDialog()
if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
{
$folderBrowser.SelectedPath
}
`;
const defaults: QuestionArgs = {
template: "streaming",
......@@ -78,39 +103,70 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
return displayedChoices;
};
const selectPDFFile = async () => {
// Popup to select a PDF file
const getDataSourceChoices = (framework: TemplateFramework) => {
let choices = [
{
title: "No data, just a simple chat",
value: "simple",
},
{ title: "Use an example PDF", value: "exampleFile" },
];
if (process.platform === "win32" || process.platform === "darwin") {
choices.push({
title: `Use a local file (${supportedContextFileTypes})`,
value: "localFile",
});
choices.push({
title: `Use a local folder`,
value: "localFolder",
});
}
if (framework === "fastapi") {
choices.push({ title: "Use website content", value: "web" });
}
return choices;
};
const selectLocalContextData = async (type: TemplateDataSourceType) => {
try {
let selectedFilePath: string = "";
let selectedPath: string = "";
let execScript: string;
let execOpts: any = {};
switch (process.platform) {
case "win32": // Windows
selectedFilePath = execSync(WINDOWS_FILE_SELECTION_SCRIPT, {
shell: "powershell.exe",
})
.toString()
.trim();
execScript =
type === "file"
? WINDOWS_FILE_SELECTION_SCRIPT
: WINDOWS_FOLDER_SELECTION_SCRIPT;
execOpts = { shell: "powershell.exe" };
break;
case "darwin": // MacOS
selectedFilePath = execSync(MACOS_FILE_SELECTION_SCRIPT)
.toString()
.trim();
execScript =
type === "file"
? MACOS_FILE_SELECTION_SCRIPT
: MACOS_FOLDER_SELECTION_SCRIPT;
break;
default: // Unsupported OS
console.log(red("Unsupported OS error!"));
process.exit(1);
}
// Check is pdf file
if (!selectedFilePath.endsWith(".pdf")) {
console.log(
red("Unsupported file error! Please select a valid PDF file!"),
);
process.exit(1);
selectedPath = execSync(execScript, execOpts).toString().trim();
if (type === "file") {
let fileType = path.extname(selectedPath);
if (!supportedContextFileTypes.includes(fileType)) {
console.log(
red(
`Please select a supported file type: ${supportedContextFileTypes}`,
),
);
process.exit(1);
}
}
return selectedFilePath;
return selectedPath;
} catch (error) {
console.log(
red(
"Got error when trying to select file! Please try again or select other options.",
"Got an error when trying to select local context data! Please try again or select another data source option.",
),
);
process.exit(1);
......@@ -369,30 +425,32 @@ export const askQuestions = async (
}
}
if (program.files) {
// If user specified files option, then the program should use context engine
program.engine == "context";
if (!fs.existsSync(program.files)) {
console.log("File or folder not found");
process.exit(1);
} else {
program.dataSource = {
type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file",
config: {
path: program.files,
},
};
}
}
if (!program.engine) {
if (ciInfo.isCI) {
program.engine = getPrefOrDefault("engine");
} else {
let choices = [
{
title: "No data, just a simple chat",
value: "simple",
},
{ title: "Use an example PDF", value: "exampleFile" },
];
if (process.platform === "win32" || process.platform === "darwin") {
choices.push({ title: "Use a local PDF file", value: "localFile" });
}
if (program.framework === "fastapi") {
choices.push({ title: "Use website content", value: "web" });
}
const { dataSource } = await prompts(
{
type: "select",
name: "dataSource",
message: "Which data source would you like to use?",
choices: choices,
choices: getDataSourceChoices(program.framework),
initial: 1,
},
handlers,
......@@ -403,18 +461,29 @@ export const askQuestions = async (
switch (dataSource) {
case "simple":
program.engine = "simple";
program.dataSource = { type: "none", config: {} };
break;
case "exampleFile":
program.engine = "context";
// example file is a context app with dataSource.type = file but has no config
program.dataSource = { type: "file", config: {} };
// Treat example as a folder data source with no config
program.dataSource = { type: "folder", config: {} };
break;
case "localFile":
program.engine = "context";
program.dataSource.type = "file";
// If the user selected the "pdf" option, ask them to select a file
program.dataSource.config = {
contextFile: await selectPDFFile(),
program.dataSource = {
type: "file",
config: {
path: await selectLocalContextData("file"),
},
};
break;
case "localFolder":
program.engine = "context";
program.dataSource = {
type: "folder",
config: {
path: await selectLocalContextData("folder"),
},
};
break;
case "web":
......@@ -424,56 +493,69 @@ export const askQuestions = async (
}
}
}
} else if (!program.dataSource) {
// Handle a case when engine is specified but dataSource is not
if (program.engine === "context") {
program.dataSource = {
type: "folder",
config: {},
};
} else if (program.engine === "simple") {
program.dataSource = {
type: "none",
config: {},
};
}
}
if (program.dataSource?.type === "web" && program.framework === "fastapi") {
let { baseUrl } = await prompts(
{
type: "text",
name: "baseUrl",
message: "Please provide base URL of the website:",
initial: "https://www.llamaindex.ai",
},
handlers,
);
try {
if (!baseUrl.includes("://")) {
baseUrl = `https://${baseUrl}`;
}
let checkUrl = new URL(baseUrl);
if (checkUrl.protocol !== "https:" && checkUrl.protocol !== "http:") {
throw new Error("Invalid protocol");
}
} catch (error) {
console.log(
red(
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
),
);
process.exit(1);
}
program.dataSource.config = {
baseUrl: baseUrl,
depth: 1,
};
}
if (program.dataSource?.type === "web" && program.framework === "fastapi") {
let { baseUrl } = await prompts(
if (!program.engine && program.engine !== "simple" && !program.vectorDb) {
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
} else {
const { vectorDb } = await prompts(
{
type: "text",
name: "baseUrl",
message: "Please provide base URL of the website:",
initial: "https://www.llamaindex.ai",
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: getVectorDbChoices(program.framework),
initial: 0,
},
handlers,
);
try {
if (!baseUrl.includes("://")) {
baseUrl = `https://${baseUrl}`;
}
let checkUrl = new URL(baseUrl);
if (checkUrl.protocol !== "https:" && checkUrl.protocol !== "http:") {
throw new Error("Invalid protocol");
}
} catch (error) {
console.log(
red(
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
),
);
process.exit(1);
}
program.dataSource.config = {
baseUrl: baseUrl,
depth: 1,
};
}
if (program.engine !== "simple" && !program.vectorDb) {
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
} else {
const { vectorDb } = await prompts(
{
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: getVectorDbChoices(program.framework),
initial: 0,
},
handlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
}
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
}
}
......
......@@ -12,7 +12,7 @@ uvicorn = { extras = ["standard"], version = "^0.23.2" }
llama-index = "^0.9.19"
pypdf = "^3.17.0"
python-dotenv = "^1.0.0"
docx2txt = "^0.8"
[build-system]
requires = ["poetry-core"]
......
......@@ -12,7 +12,7 @@ uvicorn = { extras = ["standard"], version = "^0.23.2" }
llama-index = "^0.9.19"
pypdf = "^3.17.0"
python-dotenv = "^1.0.0"
docx2txt = "^0.8"
[build-system]
requires = ["poetry-core"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment