From 619f6b3884e22283c1de571da4dddb7d92553d8b Mon Sep 17 00:00:00 2001
From: timothycarambat <rambat1010@gmail.com>
Date: Wed, 14 Aug 2024 09:11:22 -0700
Subject: [PATCH] Ignore SSL errors for web scraper resolves #2114

---
 collector/processLink/convert/generic.js | 1 +
 collector/utils/url/index.js             | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js
index a05463abf..c24e9dd3b 100644
--- a/collector/processLink/convert/generic.js
+++ b/collector/processLink/convert/generic.js
@@ -57,6 +57,7 @@ async function getPageContent(link) {
     const loader = new PuppeteerWebBaseLoader(link, {
       launchOptions: {
         headless: "new",
+        ignoreHTTPSErrors: true,
       },
       gotoOptions: {
         waitUntil: "domcontentloaded",
diff --git a/collector/utils/url/index.js b/collector/utils/url/index.js
index dd0be2e99..8a58dbd7a 100644
--- a/collector/utils/url/index.js
+++ b/collector/utils/url/index.js
@@ -5,11 +5,11 @@
  * and is simply to prevent the user from accidentally putting in non-valid websites, which is all this protects
  * since _all urls must be submitted by the user anyway_ and cannot be done with authentication and manager or admin roles.
  * If an attacker has those roles then the system is already vulnerable and this is not a primary concern.
- * 
+ *
  * We have gotten this report may times, marked them as duplicate or information and continue to get them. We communicate
  * already that deployment (and security) of an instance is on the deployer and system admin deploying it. This would include
  * isolation, firewalls, and the general security of the instance.
-*/
+ */
 
 const VALID_PROTOCOLS = ["https:", "http:"];
 const INVALID_OCTETS = [192, 172, 10, 127];
@@ -32,7 +32,7 @@ function validURL(url) {
     if (!VALID_PROTOCOLS.includes(destination.protocol)) return false;
     if (isInvalidIp(destination)) return false;
     return true;
-  } catch { }
+  } catch {}
   return false;
 }
 
-- 
GitLab