diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js index c24e9dd3befe9eab7ab7c9bf292208f418ff4e65..64fc0a0b7c4bfa7e913fa8b63448db235389819c 100644 --- a/collector/processLink/convert/generic.js +++ b/collector/processLink/convert/generic.js @@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) { } const url = new URL(link); - const filename = (url.host + "-" + url.pathname).replace(".", "_"); + const decodedPathname = decodeURIComponent(url.pathname); + const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`; const data = { id: v4(), diff --git a/collector/utils/extensions/WebsiteDepth/index.js b/collector/utils/extensions/WebsiteDepth/index.js index d007181297a350811b0ab3c5b8ab2db36c0379b1..e7d26d99a769e93e716657ccdaea202f4e14f30f 100644 --- a/collector/utils/extensions/WebsiteDepth/index.js +++ b/collector/utils/extensions/WebsiteDepth/index.js @@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) { } const url = new URL(link); - const filename = (url.host + "-" + url.pathname).replace(".", "_"); + const decodedPathname = decodeURIComponent(url.pathname); + const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`; const data = { id: v4(),