From 40c5c8412c1d0710ce258d893da022c7617353d0 Mon Sep 17 00:00:00 2001
From: Huu Le <39040748+leehuwuj@users.noreply.github.com>
Date: Mon, 23 Sep 2024 13:02:29 +0700
Subject: [PATCH] feat: add test and fix python dependencies (#304)

---------

Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
---
 e2e/resolve_python_dependencies.spec.ts       | 139 ++++++++++++++++++
 e2e/utils.ts                                  |  20 ++-
 helpers/python.ts                             |  18 +--
 index.ts                                      |  35 +++++
 .../types/multiagent/fastapi/pyproject.toml   |   2 +-
 .../types/streaming/fastapi/pyproject.toml    |   2 +-
 6 files changed, 202 insertions(+), 14 deletions(-)
 create mode 100644 e2e/resolve_python_dependencies.spec.ts

diff --git a/e2e/resolve_python_dependencies.spec.ts b/e2e/resolve_python_dependencies.spec.ts
new file mode 100644
index 00000000..d48e82d5
--- /dev/null
+++ b/e2e/resolve_python_dependencies.spec.ts
@@ -0,0 +1,139 @@
+import { expect, test } from "@playwright/test";
+import { exec } from "child_process";
+import fs from "fs";
+import path from "path";
+import util from "util";
+import { TemplateFramework, TemplateVectorDB } from "../helpers/types";
+import { createTestDir, runCreateLlama } from "./utils";
+
+const execAsync = util.promisify(exec);
+
+const templateFramework: TemplateFramework = process.env.FRAMEWORK
+  ? (process.env.FRAMEWORK as TemplateFramework)
+  : "fastapi";
+const dataSource: string = process.env.DATASOURCE
+  ? process.env.DATASOURCE
+  : "--example-file";
+
+if (
+  templateFramework == "fastapi" && // test is only relevant for fastapi
+  process.version.startsWith("v20.") && // XXX: Only run for Node.js version 20 (CI matrix will trigger other versions)
+  dataSource === "--example-file" // XXX: this test provides its own data source - only trigger it on one data source (usually the CI matrix will trigger multiple data sources)
+) {
+  // vectorDBs, tools, and data source combinations to test
+  const vectorDbs: TemplateVectorDB[] = [
+    "mongo",
+    "pg",
+    "pinecone",
+    "milvus",
+    "astra",
+    "qdrant",
+    "chroma",
+    "weaviate",
+  ];
+
+  const toolOptions = [
+    "wikipedia.WikipediaToolSpec",
+    "google.GoogleSearchToolSpec",
+  ];
+
+  const dataSources = [
+    "--example-file",
+    "--web-source https://www.example.com",
+    "--db-source mysql+pymysql://user:pass@localhost:3306/mydb",
+  ];
+
+  test.describe("Test resolve python dependencies", () => {
+    for (const vectorDb of vectorDbs) {
+      for (const tool of toolOptions) {
+        for (const dataSource of dataSources) {
+          const dataSourceType = dataSource.split(" ")[0];
+          const optionDescription = `vectorDb: ${vectorDb}, tools: ${tool}, dataSource: ${dataSourceType}`;
+
+          test(`options: ${optionDescription}`, async () => {
+            const cwd = await createTestDir();
+
+            const result = await runCreateLlama(
+              cwd,
+              "streaming",
+              "fastapi",
+              dataSource,
+              vectorDb,
+              3000, // port
+              8000, // externalPort
+              "none", // postInstallAction
+              undefined, // ui
+              "--no-frontend", // appType
+              undefined, // llamaCloudProjectName
+              undefined, // llamaCloudIndexName
+              tool,
+            );
+            const name = result.projectName;
+
+            // Check if the app folder exists
+            const dirExists = fs.existsSync(path.join(cwd, name));
+            expect(dirExists).toBeTruthy();
+
+            // Check if pyproject.toml exists
+            const pyprojectPath = path.join(cwd, name, "pyproject.toml");
+            const pyprojectExists = fs.existsSync(pyprojectPath);
+            expect(pyprojectExists).toBeTruthy();
+
+            // Run poetry lock
+            try {
+              const { stdout, stderr } = await execAsync(
+                "poetry config virtualenvs.in-project true && poetry lock --no-update",
+                {
+                  cwd: path.join(cwd, name),
+                },
+              );
+              console.log("poetry lock stdout:", stdout);
+              console.error("poetry lock stderr:", stderr);
+            } catch (error) {
+              console.error("Error running poetry lock:", error);
+              throw error;
+            }
+
+            // Check if poetry.lock file was created
+            const poetryLockExists = fs.existsSync(
+              path.join(cwd, name, "poetry.lock"),
+            );
+            expect(poetryLockExists).toBeTruthy();
+
+            // Verify that specific dependencies are in pyproject.toml
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (vectorDb !== "none") {
+              if (vectorDb === "pg") {
+                expect(pyprojectContent).toContain(
+                  "llama-index-vector-stores-postgres",
+                );
+              } else {
+                expect(pyprojectContent).toContain(
+                  `llama-index-vector-stores-${vectorDb}`,
+                );
+              }
+            }
+            if (tool !== "none") {
+              if (tool === "wikipedia.WikipediaToolSpec") {
+                expect(pyprojectContent).toContain("wikipedia");
+              }
+              if (tool === "google.GoogleSearchToolSpec") {
+                expect(pyprojectContent).toContain("google");
+              }
+            }
+
+            // Check for data source specific dependencies
+            if (dataSource.includes("--web-source")) {
+              expect(pyprojectContent).toContain("llama-index-readers-web");
+            }
+            if (dataSource.includes("--db-source")) {
+              expect(pyprojectContent).toContain(
+                "llama-index-readers-database ",
+              );
+            }
+          });
+        }
+      }
+    }
+  });
+}
diff --git a/e2e/utils.ts b/e2e/utils.ts
index 9437c815..afd450b7 100644
--- a/e2e/utils.ts
+++ b/e2e/utils.ts
@@ -32,6 +32,7 @@ export async function runCreateLlama(
   appType?: AppType,
   llamaCloudProjectName?: string,
   llamaCloudIndexName?: string,
+  tools?: string,
 ): Promise<CreateLlamaResult> {
   if (!process.env.OPENAI_API_KEY || !process.env.LLAMA_CLOUD_API_KEY) {
     throw new Error(
@@ -41,10 +42,23 @@ export async function runCreateLlama(
   const name = [
     templateType,
     templateFramework,
-    dataSource,
+    dataSource.split(" ")[0],
     templateUI,
     appType,
   ].join("-");
+
+  // Handle different data source types
+  let dataSourceArgs = [];
+  if (dataSource.includes("--web-source" || "--db-source")) {
+    const webSource = dataSource.split(" ")[1];
+    dataSourceArgs.push("--web-source", webSource);
+  } else if (dataSource.includes("--db-source")) {
+    const dbSource = dataSource.split(" ")[1];
+    dataSourceArgs.push("--db-source", dbSource);
+  } else {
+    dataSourceArgs.push(dataSource);
+  }
+
   const commandArgs = [
     "create-llama",
     name,
@@ -52,7 +66,7 @@ export async function runCreateLlama(
     templateType,
     "--framework",
     templateFramework,
-    dataSource,
+    ...dataSourceArgs,
     "--vector-db",
     vectorDb,
     "--open-ai-key",
@@ -65,7 +79,7 @@ export async function runCreateLlama(
     "--post-install-action",
     postInstallAction,
     "--tools",
-    "none",
+    tools ?? "none",
     "--no-llama-parse",
     "--observability",
     "none",
diff --git a/helpers/python.ts b/helpers/python.ts
index b9f8b200..f5dac282 100644
--- a/helpers/python.ts
+++ b/helpers/python.ts
@@ -36,28 +36,28 @@ const getAdditionalDependencies = (
     case "mongo": {
       dependencies.push({
         name: "llama-index-vector-stores-mongodb",
-        version: "^0.1.3",
+        version: "^0.3.1",
       });
       break;
     }
     case "pg": {
       dependencies.push({
         name: "llama-index-vector-stores-postgres",
-        version: "^0.1.1",
+        version: "^0.2.5",
       });
       break;
     }
     case "pinecone": {
       dependencies.push({
         name: "llama-index-vector-stores-pinecone",
-        version: "^0.1.3",
+        version: "^0.2.1",
       });
       break;
     }
     case "milvus": {
       dependencies.push({
         name: "llama-index-vector-stores-milvus",
-        version: "^0.1.20",
+        version: "^0.2.0",
       });
       dependencies.push({
         name: "pymilvus",
@@ -68,28 +68,28 @@ const getAdditionalDependencies = (
     case "astra": {
       dependencies.push({
         name: "llama-index-vector-stores-astra-db",
-        version: "^0.1.5",
+        version: "^0.2.0",
       });
       break;
     }
     case "qdrant": {
       dependencies.push({
         name: "llama-index-vector-stores-qdrant",
-        version: "^0.2.8",
+        version: "^0.3.0",
       });
       break;
     }
     case "chroma": {
       dependencies.push({
         name: "llama-index-vector-stores-chroma",
-        version: "^0.1.8",
+        version: "^0.2.0",
       });
       break;
     }
     case "weaviate": {
       dependencies.push({
         name: "llama-index-vector-stores-weaviate",
-        version: "^1.0.2",
+        version: "^1.1.1",
       });
       break;
     }
@@ -130,7 +130,7 @@ const getAdditionalDependencies = (
         case "llamacloud":
           dependencies.push({
             name: "llama-index-indices-managed-llama-cloud",
-            version: "^0.3.0",
+            version: "^0.3.1",
           });
           break;
       }
diff --git a/index.ts b/index.ts
index 8ced8b3f..e187aedf 100644
--- a/index.ts
+++ b/index.ts
@@ -90,6 +90,20 @@ const program = new Commander.Command(packageJson.name)
     `
 
   Select to use an example PDF as data source.
+`,
+  )
+  .option(
+    "--web-source <url>",
+    `
+  
+  Specify a website URL to use as a data source.
+`,
+  )
+  .option(
+    "--db-source <connection-string>",
+    `
+  
+  Specify a database connection string to use as a data source.
 `,
   )
   .option(
@@ -215,6 +229,27 @@ if (process.argv.includes("--no-files")) {
     },
     EXAMPLE_FILE,
   ];
+} else if (process.argv.includes("--web-source")) {
+  program.dataSources = [
+    {
+      type: "web",
+      config: {
+        baseUrl: program.webSource,
+        prefix: program.webSource,
+        depth: 1,
+      },
+    },
+  ];
+} else if (process.argv.includes("--db-source")) {
+  program.dataSources = [
+    {
+      type: "db",
+      config: {
+        uri: program.dbSource,
+        queries: program.dbQuery || "SELECT * FROM mytable",
+      },
+    },
+  ];
 }
 
 const packageManager = !!program.useNpm
diff --git a/templates/types/multiagent/fastapi/pyproject.toml b/templates/types/multiagent/fastapi/pyproject.toml
index 5e5f6a98..5c779f27 100644
--- a/templates/types/multiagent/fastapi/pyproject.toml
+++ b/templates/types/multiagent/fastapi/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.md"
 generate = "app.engine.generate:generate_datasource"
 
 [tool.poetry.dependencies]
-python = "^3.11"
+python = ">=3.11,<3.13"
 llama-index-agent-openai = ">=0.3.0,<0.4.0"
 llama-index = "0.11.11"
 fastapi = "^0.112.2"
diff --git a/templates/types/streaming/fastapi/pyproject.toml b/templates/types/streaming/fastapi/pyproject.toml
index 69d87ba6..9d857a08 100644
--- a/templates/types/streaming/fastapi/pyproject.toml
+++ b/templates/types/streaming/fastapi/pyproject.toml
@@ -9,7 +9,7 @@ readme = "README.md"
 generate = "app.engine.generate:generate_datasource"
 
 [tool.poetry.dependencies]
-python = "^3.11,<4.0"
+python = ">=3.11,<3.13"
 fastapi = "^0.109.1"
 uvicorn = { extras = ["standard"], version = "^0.23.2" }
 python-dotenv = "^1.0.0"
-- 
GitLab