diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 5a473f654cc5fa3e75677ac83f8710fa15455113..5bb44deeb759944b5e2097d65ca1a938642f6483 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -2,6 +2,7 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); +const UrlPattern = require("url-pattern"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); const { @@ -9,13 +10,29 @@ const { } = require("langchain/document_loaders/web/confluence"); function validSpaceUrl(spaceUrl = "") { - const UrlPattern = require("url-pattern"); - const pattern = new UrlPattern( - "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" + // Atlassian default URL match + const atlassianPattern = new UrlPattern( + "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*" ); - const match = pattern.match(spaceUrl); - if (!match) return { valid: false, result: null }; - return { valid: true, result: match }; + const atlassianMatch = atlassianPattern.match(spaceUrl); + if (atlassianMatch) { + return { valid: true, result: atlassianMatch }; + } + + // Custom Confluence URL match + const customPattern = new UrlPattern( + "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*" + ); + const customMatch = customPattern.match(spaceUrl); + if (customMatch) { + customMatch.customDomain = + (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // + `${customMatch.domain}.${customMatch.tld}`; + return { valid: true, result: customMatch, custom: true }; + } + + // No match + return { valid: false, result: null }; } async function loadConfluence({ pageUrl, username, accessToken }) { @@ -32,14 +49,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) { return { success: false, reason: - "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", }; } - const { subdomain, spaceKey } = validSpace.result; - console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const { subdomain, customDomain, spaceKey } = validSpace.result; + let baseUrl = `https://${subdomain}.atlassian.net/wiki`; + if (customDomain) { + baseUrl = `https://${customDomain}/wiki`; + } + + console.log(`-- Working Confluence ${baseUrl} --`); const loader = new ConfluencePagesLoader({ - baseUrl: `https://${subdomain}.atlassian.net/wiki`, + baseUrl, spaceKey, username, accessToken,