Skip to content
Snippets Groups Projects
Unverified Commit 41522cdf authored by Sean Hatfield's avatar Sean Hatfield Committed by GitHub
Browse files

Handle non-ascii characters in single and bulk link scraper URLs (#2495)

handle non-ascii characters in urls
parent 93d7ce6d
No related branches found
No related tags found
No related merge requests found
...@@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) { ...@@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) {
} }
const url = new URL(link); const url = new URL(link);
const filename = (url.host + "-" + url.pathname).replace(".", "_"); const decodedPathname = decodeURIComponent(url.pathname);
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
const data = { const data = {
id: v4(), id: v4(),
......
...@@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) { ...@@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) {
} }
const url = new URL(link); const url = new URL(link);
const filename = (url.host + "-" + url.pathname).replace(".", "_"); const decodedPathname = decodeURIComponent(url.pathname);
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
const data = { const data = {
id: v4(), id: v4(),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment