diff --git a/collector/extensions/index.js b/collector/extensions/index.js index 0e91d173161be940fbdf2079a6aef910abc5f733..6a3f3393e131b09e479cc76a0bde8b8be4fc2e22 100644 --- a/collector/extensions/index.js +++ b/collector/extensions/index.js @@ -4,69 +4,112 @@ const { reqBody } = require("../utils/http"); function extensions(app) { if (!app) return; - app.post("/ext/github-repo", [verifyPayloadIntegrity], async function (request, response) { - try { - const loadGithubRepo = require("../utils/extensions/GithubRepo"); - const { success, reason, data } = await loadGithubRepo(reqBody(request)); - response.status(200).json({ - success, - reason, - data - }); - } catch (e) { - console.error(e); - response.status(200).json({ - success: false, - reason: e.message || "A processing error occurred.", - data: {}, - }); + app.post( + "/ext/github-repo", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadGithubRepo = require("../utils/extensions/GithubRepo"); + const { success, reason, data } = await loadGithubRepo( + reqBody(request) + ); + response.status(200).json({ + success, + reason, + data, + }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + reason: e.message || "A processing error occurred.", + data: {}, + }); + } + return; } - return; - }); + ); // gets all branches for a specific repo - app.post("/ext/github-repo/branches", [verifyPayloadIntegrity], async function (request, response) { - try { - const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); - const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches() - response.status(200).json({ - success: true, - reason: null, - data: { - branches: allBranches - } - }); - } catch (e) { - console.error(e); - response.status(400).json({ - success: false, - reason: e.message, - data: { - branches: [] - } - }); + app.post( + "/ext/github-repo/branches", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); + const allBranches = await new GithubRepoLoader( + reqBody(request) + ).getRepoBranches(); + response.status(200).json({ + success: true, + reason: null, + data: { + branches: allBranches, + }, + }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + branches: [], + }, + }); + } + return; } - return; - }); + ); - app.post("/ext/youtube-transcript", [verifyPayloadIntegrity], async function (request, response) { - try { - const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); - const { success, reason, data } = await loadYouTubeTranscript(reqBody(request)); - response.status(200).json({ success, reason, data }); - } catch (e) { - console.error(e); - response.status(400).json({ - success: false, - reason: e.message, - data: { - title: null, - author: null - } - }); + app.post( + "/ext/youtube-transcript", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); + const { success, reason, data } = await loadYouTubeTranscript( + reqBody(request) + ); + response.status(200).json({ success, reason, data }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + title: null, + author: null, + }, + }); + } + return; } - return; - }); + ); + + app.post( + "/ext/confluence", + [verifyPayloadIntegrity], + async function (request, response) { + try { + const loadConfluence = require("../utils/extensions/Confluence"); + const { success, reason, data } = await loadConfluence( + reqBody(request) + ); + response.status(200).json({ success, reason, data }); + } catch (e) { + console.error(e); + response.status(400).json({ + success: false, + reason: e.message, + data: { + title: null, + author: null, + }, + }); + } + return; + } + ); } module.exports = extensions; diff --git a/collector/package.json b/collector/package.json index 4a5a99fffdefd09b61b6c5bbec2d35a4ae0e6acd..5d2e5f0f58d1d8d9137ce2e46f9bd6974d0dc746 100644 --- a/collector/package.json +++ b/collector/package.json @@ -49,4 +49,4 @@ "nodemon": "^2.0.22", "prettier": "^2.4.1" } -} \ No newline at end of file +} diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js new file mode 100644 index 0000000000000000000000000000000000000000..1ea642e1aa3bf7493245bb01ae956e56c833463b --- /dev/null +++ b/collector/utils/extensions/Confluence/index.js @@ -0,0 +1,110 @@ +const fs = require("fs"); +const path = require("path"); +const { default: slugify } = require("slugify"); +const { v4 } = require("uuid"); +const { writeToServerDocuments } = require("../../files"); +const { tokenizeString } = require("../../tokenizer"); +const { + ConfluencePagesLoader, +} = require("langchain/document_loaders/web/confluence"); + +function validSpaceUrl(spaceUrl = "") { + const UrlPattern = require("url-pattern"); + const pattern = new UrlPattern( + "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" + ); + const match = pattern.match(spaceUrl); + if (!match) return { valid: false, result: null }; + return { valid: true, result: match }; +} + +async function loadConfluence({ pageUrl, username, accessToken }) { + if (!pageUrl || !username || !accessToken) { + return { + success: false, + reason: + "You need either a username and access token, or a personal access token (PAT), to use the Confluence connector.", + }; + } + + const validSpace = validSpaceUrl(pageUrl); + if (!validSpace.result) { + return { + success: false, + reason: + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + }; + } + + const { subdomain, spaceKey } = validSpace.result; + console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const loader = new ConfluencePagesLoader({ + baseUrl: `https://${subdomain}.atlassian.net/wiki`, + spaceKey, + username, + accessToken, + }); + + const { docs, error } = await loader + .load() + .then((docs) => { + return { docs, error: null }; + }) + .catch((e) => { + return { + docs: [], + error: e.message?.split("Error:")?.[1] || e.message, + }; + }); + + if (!docs.length || !!error) { + return { + success: false, + reason: error ?? "No pages found for that Confluence space.", + }; + } + const outFolder = slugify( + `${subdomain}-confluence-${v4().slice(0, 4)}` + ).toLowerCase(); + const outFolderPath = path.resolve( + __dirname, + `../../../../server/storage/documents/${outFolder}` + ); + fs.mkdirSync(outFolderPath); + + docs.forEach((doc) => { + const data = { + id: v4(), + url: doc.metadata.url + ".page", + title: doc.metadata.title || doc.metadata.source, + docAuthor: subdomain, + description: doc.metadata.title, + docSource: `${subdomain} Confluence`, + chunkSource: `confluence://${doc.metadata.url}`, + published: new Date().toLocaleString(), + wordCount: doc.pageContent.split(" ").length, + pageContent: doc.pageContent, + token_count_estimate: tokenizeString(doc.pageContent).length, + }; + + console.log( + `[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}` + ); + writeToServerDocuments( + data, + `${slugify(doc.metadata.title)}-${data.id}`, + outFolderPath + ); + }); + + return { + success: true, + reason: null, + data: { + spaceKey, + destination: outFolder, + }, + }; +} + +module.exports = loadConfluence; diff --git a/frontend/src/components/DataConnectorOption/media/confluence.jpeg b/frontend/src/components/DataConnectorOption/media/confluence.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..7559663a68ac1dfdc3f5a64f4de891d3d449272c Binary files /dev/null and b/frontend/src/components/DataConnectorOption/media/confluence.jpeg differ diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js index 543bed5f74b60b67940860088f72943321b44adb..ac8105975ed4dcdf71ba87f9052017a6a5942dbd 100644 --- a/frontend/src/components/DataConnectorOption/media/index.js +++ b/frontend/src/components/DataConnectorOption/media/index.js @@ -1,9 +1,11 @@ import Github from "./github.svg"; import YouTube from "./youtube.svg"; +import Confluence from "./confluence.jpeg"; const ConnectorImages = { github: Github, youtube: YouTube, + confluence: Confluence, }; export default ConnectorImages; diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx new file mode 100644 index 0000000000000000000000000000000000000000..52ca7e63df6d3210932605e24fb87b377816b44e --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Confluence/index.jsx @@ -0,0 +1,164 @@ +import { useState } from "react"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; +import { Warning } from "@phosphor-icons/react"; +import { Tooltip } from "react-tooltip"; + +export default function ConfluenceOptions() { + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast( + "Fetching all pages for Confluence space - this may take a while.", + "info", + { + clear: true, + autoClose: false, + } + ); + const { data, error } = await System.dataConnectors.confluence.collect({ + pageUrl: form.get("pageUrl"), + username: form.get("username"), + accessToken: form.get("accessToken"), + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `Pages collected from Confluence space ${data.spaceKey}. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( + <div className="flex w-full"> + <div className="flex flex-col w-full px-1 md:pb-6 pb-16"> + <form className="w-full" onSubmit={handleSubmit}> + <div className="w-full flex flex-col py-2"> + <div className="w-full flex flex-col gap-4"> + <div className="flex flex-col pr-10"> + <div className="flex flex-col gap-y-1 mb-4"> + <label className="text-white text-sm font-bold flex gap-x-2 items-center"> + <p className="font-bold text-white">Confluence Page URL</p> + </label> + <p className="text-xs font-normal text-white/50"> + URL of a page in the Confluence space. + </p> + </div> + <input + type="url" + name="pageUrl" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="https://example.atlassian.net/wiki/spaces/~7120208c08555d52224113949698b933a3bb56/pages/851969/Test+anythingLLM+page" + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col pr-10"> + <div className="flex flex-col gap-y-1 mb-4"> + <label className="text-white text-sm font-bold"> + Confluence Username + </label> + <p className="text-xs font-normal text-white/50"> + Your Confluence username. + </p> + </div> + <input + type="email" + name="username" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="jdoe@example.com" + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + <div className="flex flex-col pr-10"> + <div className="flex flex-col gap-y-1 mb-4"> + <label className="text-white text-sm font-bold flex gap-x-2 items-center"> + <p className="font-bold text-white"> + Confluence Access Token + </p> + <Warning + size={14} + className="ml-1 text-orange-500 cursor-pointer" + data-tooltip-id="access-token-tooltip" + data-tooltip-place="right" + /> + <Tooltip + delayHide={300} + id="access-token-tooltip" + className="max-w-xs" + clickable={true} + > + <p className="text-sm"> + You need to provide an access token for authentication. + You can generate an access token{" "} + <a + href="https://id.atlassian.com/manage-profile/security/api-tokens" + target="_blank" + rel="noopener noreferrer" + className="underline" + onClick={(e) => e.stopPropagation()} + > + here + </a> + . + </p> + </Tooltip> + </label> + <p className="text-xs font-normal text-white/50"> + Access token for authentication. + </p> + </div> + <input + type="password" + name="accessToken" + className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5" + placeholder="abcd1234" + required={true} + autoComplete="off" + spellCheck={false} + /> + </div> + </div> + </div> + + <div className="flex flex-col gap-y-2 w-full pr-10"> + <button + type="submit" + disabled={loading} + className="mt-2 w-full justify-center border border-slate-200 px-4 py-2 rounded-lg text-[#222628] text-sm font-bold items-center flex gap-x-2 bg-slate-200 hover:bg-slate-300 hover:text-slate-800 disabled:bg-slate-300 disabled:cursor-not-allowed" + > + {loading ? "Collecting pages..." : "Submit"} + </button> + {loading && ( + <p className="text-xs text-white/50"> + Once complete, all pages will be available for embedding into + workspaces. + </p> + )} + </div> + </form> + </div> + </div> + ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx index 419fc1fc9e1c72d451357a31ffdfde4b32b4b813..69d30e281992f1aac77fe877959db853c3e9fcd3 100644 --- a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx @@ -2,6 +2,7 @@ import ConnectorImages from "@/components/DataConnectorOption/media"; import { MagnifyingGlass } from "@phosphor-icons/react"; import GithubOptions from "./Connectors/Github"; import YoutubeOptions from "./Connectors/Youtube"; +import ConfluenceOptions from "./Connectors/Confluence"; import { useState } from "react"; import ConnectorOption from "./ConnectorOption"; @@ -20,6 +21,12 @@ export const DATA_CONNECTORS = { "Import the transcription of an entire YouTube video from a link.", options: <YoutubeOptions />, }, + confluence: { + name: "Confluence", + image: ConnectorImages.confluence, + description: "Import an entire Confluence page in a single click.", + options: <ConfluenceOptions />, + }, }; export default function DataConnectors() { diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx index 1dfeaaaf363be417dbca29ce3f25146268327484..7105901d382c3d2bf711e91810223dcc4eff5361 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx @@ -1,4 +1,4 @@ -import { memo, useState } from "react"; +import React, { memo, useState } from "react"; import { v4 } from "uuid"; import { decode as HTMLDecode } from "he"; import truncate from "truncate"; @@ -14,6 +14,7 @@ import { X, YoutubeLogo, } from "@phosphor-icons/react"; +import ConfluenceLogo from "@/media/dataConnectors/confluence.png"; import { Tooltip } from "react-tooltip"; import { toPercentString } from "@/utils/numbers"; @@ -202,13 +203,6 @@ function CitationDetailModal({ source, onClose }) { ); } -const ICONS = { - file: FileText, - link: Link, - youtube: YoutubeLogo, - github: GithubLogo, -}; - // Show the correct title and/or display text for citations // which contain valid outbound links that can be clicked by the // user when viewing a citation. Optionally allows various icons @@ -221,10 +215,17 @@ function parseChunkSource({ title = "", chunks = [] }) { icon: "file", }; - if (!chunks.length || !chunks[0].chunkSource.startsWith("link://")) + if ( + !chunks.length || + (!chunks[0].chunkSource.startsWith("link://") && + !chunks[0].chunkSource.startsWith("confluence://")) + ) return nullResponse; try { - const url = new URL(chunks[0].chunkSource.split("link://")[1]); + const url = new URL( + chunks[0].chunkSource.split("link://")[1] || + chunks[0].chunkSource.split("confluence://")[1] + ); let text = url.host + url.pathname; let icon = "link"; @@ -238,6 +239,11 @@ function parseChunkSource({ title = "", chunks = [] }) { icon = "github"; } + if (url.host.includes("atlassian.net")) { + text = title; + icon = "confluence"; + } + return { isUrl: true, href: url.toString(), @@ -247,3 +253,16 @@ function parseChunkSource({ title = "", chunks = [] }) { } catch {} return nullResponse; } + +// Patch to render Confluence icon as a element like we do with Phosphor +const ConfluenceIcon = ({ ...props }) => ( + <img src={ConfluenceLogo} {...props} /> +); + +const ICONS = { + file: FileText, + link: Link, + youtube: YoutubeLogo, + github: GithubLogo, + confluence: ConfluenceIcon, +}; diff --git a/frontend/src/media/dataConnectors/confluence.png b/frontend/src/media/dataConnectors/confluence.png new file mode 100644 index 0000000000000000000000000000000000000000..27a5da07bd1402f48dc9cc39d26225505dd85242 Binary files /dev/null and b/frontend/src/media/dataConnectors/confluence.png differ diff --git a/frontend/src/models/dataConnector.js b/frontend/src/models/dataConnector.js index e0b3c0c3e36c7a807d85f3a6184b435931b50125..19fa5f9124fbe9135e9571e88aee9752016d140b 100644 --- a/frontend/src/models/dataConnector.js +++ b/frontend/src/models/dataConnector.js @@ -60,6 +60,29 @@ const DataConnector = { }); }, }, + + confluence: { + collect: async function ({ pageUrl, username, accessToken }) { + return await fetch(`${API_BASE}/ext/confluence`, { + method: "POST", + headers: baseHeaders(), + body: JSON.stringify({ + pageUrl, + username, + accessToken, + }), + }) + .then((res) => res.json()) + .then((res) => { + if (!res.success) throw new Error(res.reason); + return { data: res.data, error: null }; + }) + .catch((e) => { + console.error(e); + return { data: null, error: e.message }; + }); + }, + }, }; export default DataConnector; diff --git a/server/endpoints/extensions/index.js b/server/endpoints/extensions/index.js index bf07ec56c94a669d25c49adfabb9ca2fd7914cf8..07eb7130db6e6d0abd449951e4ed44c423f7fc6e 100644 --- a/server/endpoints/extensions/index.js +++ b/server/endpoints/extensions/index.js @@ -71,6 +71,28 @@ function extensionEndpoints(app) { } } ); + + app.post( + "/ext/confluence", + [validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])], + async (request, response) => { + try { + const responseFromProcessor = + await new CollectorApi().forwardExtensionRequest({ + endpoint: "/ext/confluence", + method: "POST", + body: request.body, + }); + await Telemetry.sendTelemetry("extension_invoked", { + type: "confluence", + }); + response.status(200).json(responseFromProcessor); + } catch (e) { + console.error(e); + response.sendStatus(500).end(); + } + } + ); } module.exports = { extensionEndpoints };