From 3808d078e1120bbc8d359ff703d6f9468ea5e7d9 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:48:14 +0700
Subject: [PATCH] feat: ability to download community submodules (#3)

---
 create-app.ts    |  4 +--
 helpers/index.ts | 19 ++++++------
 helpers/repo.ts  | 75 ++++++++++++++++++++++++++++++++++++++++++++----
 helpers/types.ts |  9 +++++-
 index.ts         |  2 +-
 questions.ts     | 21 +++++++-------
 6 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/create-app.ts b/create-app.ts
index 8cb6f287..e453deb1 100644
--- a/create-app.ts
+++ b/create-app.ts
@@ -36,7 +36,7 @@ export async function createApp({
   llamaCloudKey,
   model,
   embeddingModel,
-  communityProjectPath,
+  communityProjectConfig,
   llamapack,
   vectorDb,
   externalPort,
@@ -84,7 +84,7 @@ export async function createApp({
     llamaCloudKey,
     model,
     embeddingModel,
-    communityProjectPath,
+    communityProjectConfig,
     llamapack,
     vectorDb,
     externalPort,
diff --git a/helpers/index.ts b/helpers/index.ts
index 91280af3..7e1b2d31 100644
--- a/helpers/index.ts
+++ b/helpers/index.ts
@@ -5,7 +5,6 @@ import fs from "fs/promises";
 import path from "path";
 import { cyan } from "picocolors";
 
-import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./constant";
 import { templatesDir } from "./dir";
 import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
 import { PackageManager } from "./get-pkg-manager";
@@ -14,6 +13,7 @@ import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
 import { installPythonTemplate } from "./python";
 import { downloadAndExtractRepo } from "./repo";
 import {
+  CommunityProjectConfig,
   FileSourceConfig,
   InstallTemplateArgs,
   TemplateDataSource,
@@ -117,14 +117,15 @@ const copyContextData = async (
 
 const installCommunityProject = async ({
   root,
-  communityProjectPath,
-}: Pick<InstallTemplateArgs, "root" | "communityProjectPath">) => {
-  console.log("\nInstalling community project:", communityProjectPath!);
+  communityProjectConfig,
+}: Pick<InstallTemplateArgs, "root" | "communityProjectConfig">) => {
+  const { owner, repo, branch, filePath } = communityProjectConfig!;
+  console.log("\nInstalling community project:", filePath || repo);
   await downloadAndExtractRepo(root, {
-    username: COMMUNITY_OWNER,
-    name: COMMUNITY_REPO,
-    branch: "main",
-    filePath: communityProjectPath!,
+    username: owner,
+    name: repo,
+    branch,
+    filePath: filePath || "",
   });
 };
 
@@ -133,7 +134,7 @@ export const installTemplate = async (
 ) => {
   process.chdir(props.root);
 
-  if (props.template === "community" && props.communityProjectPath) {
+  if (props.template === "community" && props.communityProjectConfig) {
     await installCommunityProject(props);
     return;
   }
diff --git a/helpers/repo.ts b/helpers/repo.ts
index 3942c28c..7a33ecb3 100644
--- a/helpers/repo.ts
+++ b/helpers/repo.ts
@@ -6,6 +6,7 @@ import { Stream } from "stream";
 import tar from "tar";
 import { promisify } from "util";
 import { makeDir } from "./make-dir";
+import { CommunityProjectConfig } from "./types";
 
 export type RepoInfo = {
   username: string;
@@ -47,19 +48,81 @@ export async function downloadAndExtractRepo(
   await promises.unlink(tempFile);
 }
 
-export async function getRepoRootFolders(
+const getRepoInfo = async (owner: string, repo: string) => {
+  const repoInfoRes = await got(
+    `https://api.github.com/repos/${owner}/${repo}`,
+    {
+      responseType: "json",
+    },
+  );
+  const data = repoInfoRes.body as any;
+  return data;
+};
+
+export async function getProjectOptions(
   owner: string,
   repo: string,
-): Promise<string[]> {
-  const url = `https://api.github.com/repos/${owner}/${repo}/contents`;
+): Promise<
+  {
+    value: CommunityProjectConfig;
+    title: string;
+  }[]
+> {
+  // TODO: consider using octokit (https://github.com/octokit) if more changes are needed in the future
+  const getCommunityProjectConfig = async (
+    item: any,
+  ): Promise<CommunityProjectConfig | null> => {
+    // if item is a folder, return the path with default owner, repo, and main branch
+    if (item.type === "dir")
+      return {
+        owner,
+        repo,
+        branch: "main",
+        filePath: item.path,
+      };
+
+    // check if it's a submodule (has size = 0 and different owner & repo)
+    if (item.type === "file") {
+      if (item.size !== 0) return null; // submodules have size = 0
+
+      // get owner and repo from git_url
+      const { git_url } = item;
+      const startIndex = git_url.indexOf("repos/") + 6;
+      const endIndex = git_url.indexOf("/git");
+      const ownerRepoStr = git_url.substring(startIndex, endIndex);
+      const [owner, repo] = ownerRepoStr.split("/");
+
+      // quick fetch repo info to get the default branch
+      const { default_branch } = await getRepoInfo(owner, repo);
 
+      // return the path with default owner, repo, and main branch (path is empty for submodules)
+      return {
+        owner,
+        repo,
+        branch: default_branch,
+      };
+    }
+
+    return null;
+  };
+
+  const url = `https://api.github.com/repos/${owner}/${repo}/contents`;
   const response = await got(url, {
     responseType: "json",
   });
-
   const data = response.body as any[];
-  const folders = data.filter((item) => item.type === "dir");
-  return folders.map((item) => item.name);
+
+  const projectConfigs: CommunityProjectConfig[] = [];
+  for (const item of data) {
+    const communityProjectConfig = await getCommunityProjectConfig(item);
+    if (communityProjectConfig) projectConfigs.push(communityProjectConfig);
+  }
+  return projectConfigs.map((config) => {
+    return {
+      value: config,
+      title: config.filePath || config.repo, // for submodules, use repo name as title
+    };
+  });
 }
 
 export async function getRepoRawContent(repoFilePath: string) {
diff --git a/helpers/types.ts b/helpers/types.ts
index 0d359423..dee6a685 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -28,6 +28,13 @@ export type WebSourceConfig = {
 };
 export type TemplateDataSourceConfig = FileSourceConfig | WebSourceConfig;
 
+export type CommunityProjectConfig = {
+  owner: string;
+  repo: string;
+  branch: string;
+  filePath?: string;
+};
+
 export interface InstallTemplateArgs {
   appName: string;
   root: string;
@@ -44,7 +51,7 @@ export interface InstallTemplateArgs {
   llamaCloudKey?: string;
   model: string;
   embeddingModel: string;
-  communityProjectPath?: string;
+  communityProjectConfig?: CommunityProjectConfig;
   llamapack?: string;
   vectorDb?: TemplateVectorDB;
   externalPort?: number;
diff --git a/index.ts b/index.ts
index 131d6eba..057b30ce 100644
--- a/index.ts
+++ b/index.ts
@@ -290,7 +290,7 @@ async function run(): Promise<void> {
     llamaCloudKey: program.llamaCloudKey,
     model: program.model,
     embeddingModel: program.embeddingModel,
-    communityProjectPath: program.communityProjectPath,
+    communityProjectConfig: program.communityProjectConfig,
     llamapack: program.llamapack,
     vectorDb: program.vectorDb,
     externalPort: program.externalPort,
diff --git a/questions.ts b/questions.ts
index 275c2d0a..f9eb2539 100644
--- a/questions.ts
+++ b/questions.ts
@@ -13,7 +13,7 @@ import {
 import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
 import { templatesDir } from "./helpers/dir";
 import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
-import { getRepoRootFolders } from "./helpers/repo";
+import { getProjectOptions } from "./helpers/repo";
 import { supportedTools, toolsRequireConfig } from "./helpers/tools";
 
 export type QuestionArgs = Omit<
@@ -70,7 +70,7 @@ const defaults: QuestionArgs = {
   llamaCloudKey: "",
   model: "gpt-3.5-turbo",
   embeddingModel: "text-embedding-ada-002",
-  communityProjectPath: "",
+  communityProjectConfig: undefined,
   llamapack: "",
   postInstallAction: "dependencies",
   dataSource: {
@@ -299,25 +299,26 @@ export const askQuestions = async (
   }
 
   if (program.template === "community") {
-    const rootFolderNames = await getRepoRootFolders(
+    const projectOptions = await getProjectOptions(
       COMMUNITY_OWNER,
       COMMUNITY_REPO,
     );
-    const { communityProjectPath } = await prompts(
+    const { communityProjectConfig } = await prompts(
       {
         type: "select",
-        name: "communityProjectPath",
+        name: "communityProjectConfig",
         message: "Select community template",
-        choices: rootFolderNames.map((name) => ({
-          title: name,
-          value: name,
+        choices: projectOptions.map(({ title, value }) => ({
+          title,
+          value: JSON.stringify(value), // serialize value to string in terminal
         })),
         initial: 0,
       },
       handlers,
     );
-    program.communityProjectPath = communityProjectPath;
-    preferences.communityProjectPath = communityProjectPath;
+    const projectConfig = JSON.parse(communityProjectConfig);
+    program.communityProjectConfig = projectConfig;
+    preferences.communityProjectConfig = projectConfig;
     return; // early return - no further questions needed for community projects
   }
 
-- 
GitLab