import fs from "fs/promises";
import path from "path";
import yaml, { Document } from "yaml";
import { templatesDir } from "./dir";
import { DbSourceConfig, TemplateDataSource, WebSourceConfig } from "./types";

export const EXAMPLE_FILE: TemplateDataSource = {
  type: "file",
  config: {
    path: path.join(templatesDir, "components", "data", "101.pdf"),
  },
};

export function getDataSources(
  files?: string,
  exampleFile?: boolean,
): TemplateDataSource[] | undefined {
  let dataSources: TemplateDataSource[] | undefined = undefined;
  if (files) {
    // If user specified files option, then the program should use context engine
    dataSources = files.split(",").map((filePath) => ({
      type: "file",
      config: {
        path: filePath,
      },
    }));
  }
  if (exampleFile) {
    dataSources = [...(dataSources ? dataSources : []), EXAMPLE_FILE];
  }
  return dataSources;
}

export async function writeLoadersConfig(
  root: string,
  dataSources: TemplateDataSource[],
  useLlamaParse?: boolean,
) {
  if (dataSources.length === 0) return; // no datasources, no config needed
  const loaderConfig = new Document({});
  // Web loader config
  if (dataSources.some((ds) => ds.type === "web")) {
    const webLoaderConfig = new Document({});

    // Create config for browser driver arguments
    const driverArgNodeValue = webLoaderConfig.createNode([
      "--no-sandbox",
      "--disable-dev-shm-usage",
    ]);
    driverArgNodeValue.commentBefore =
      " The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
    webLoaderConfig.set("driver_arguments", driverArgNodeValue);

    // Create config for urls
    const urlConfigs = dataSources
      .filter((ds) => ds.type === "web")
      .map((ds) => {
        const dsConfig = ds.config as WebSourceConfig;
        return {
          base_url: dsConfig.baseUrl,
          prefix: dsConfig.prefix,
          depth: dsConfig.depth,
        };
      });
    const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
    urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
 prefix: Only crawl URLs matching the specified prefix
 depth: The maximum depth for BFS traversal
 You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
    webLoaderConfig.set("urls", urlConfigNode);

    // Add web config to the loaders config
    loaderConfig.set("web", webLoaderConfig);
  }

  // File loader config
  if (dataSources.some((ds) => ds.type === "file")) {
    // Add documentation to web loader config
    const node = loaderConfig.createNode({
      use_llama_parse: useLlamaParse,
    });
    node.commentBefore = ` use_llama_parse: Use LlamaParse if \`true\`. Needs a \`LLAMA_CLOUD_API_KEY\` from https://cloud.llamaindex.ai set as environment variable`;
    loaderConfig.set("file", node);
  }

  // DB loader config
  const dbLoaders = dataSources.filter((ds) => ds.type === "db");
  if (dbLoaders.length > 0) {
    const dbLoaderConfig = new Document({});
    const configEntries = dbLoaders.map((ds) => {
      const dsConfig = ds.config as DbSourceConfig;
      return {
        uri: dsConfig.uri,
        queries: [dsConfig.queries],
      };
    });

    const node = dbLoaderConfig.createNode(configEntries);
    node.commentBefore = ` The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
 uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
 query: The query to fetch data from the database. E.g.: SELECT * FROM table`;
    loaderConfig.set("db", node);
  }

  // Write loaders config
  const loaderConfigPath = path.join(root, "config", "loaders.yaml");
  await fs.mkdir(path.join(root, "config"), { recursive: true });
  await fs.writeFile(loaderConfigPath, yaml.stringify(loaderConfig));
}