diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js index c12d79ade5856b819879786137638b3240d16a8e..a5eb20ca945e23a6dda343ec0ad6a48b92da0cbd 100644 --- a/collector/processLink/convert/generic.js +++ b/collector/processLink/convert/generic.js @@ -61,7 +61,7 @@ async function getPageContent(link) { ignoreHTTPSErrors: true, }, gotoOptions: { - waitUntil: "domcontentloaded", + waitUntil: "networkidle2", }, async evaluate(page, browser) { const result = await page.evaluate(() => document.body.innerText); diff --git a/collector/utils/extensions/WebsiteDepth/index.js b/collector/utils/extensions/WebsiteDepth/index.js index d8b23144dc2a37cff4e1971f53468d440d4be792..e680c0233b7b96cb46703757d9647ac5aa9ff613 100644 --- a/collector/utils/extensions/WebsiteDepth/index.js +++ b/collector/utils/extensions/WebsiteDepth/index.js @@ -48,7 +48,7 @@ async function getPageLinks(url, baseUrl) { try { const loader = new PuppeteerWebBaseLoader(url, { launchOptions: { headless: "new" }, - gotoOptions: { waitUntil: "domcontentloaded" }, + gotoOptions: { waitUntil: "networkidle2" }, }); const docs = await loader.load(); const html = docs[0].pageContent; @@ -92,7 +92,7 @@ async function bulkScrapePages(links, outFolderPath) { try { const loader = new PuppeteerWebBaseLoader(link, { launchOptions: { headless: "new" }, - gotoOptions: { waitUntil: "domcontentloaded" }, + gotoOptions: { waitUntil: "networkidle2" }, async evaluate(page, browser) { const result = await page.evaluate(() => document.body.innerText); await browser.close();