diff --git a/collector/extensions/index.js b/collector/extensions/index.js
index 6a3f3393e131b09e479cc76a0bde8b8be4fc2e22..0772646461d8d26c8eed2a668f0779c88260ef03 100644
--- a/collector/extensions/index.js
+++ b/collector/extensions/index.js
@@ -1,5 +1,6 @@
 const { verifyPayloadIntegrity } = require("../middleware/verifyIntegrity");
 const { reqBody } = require("../utils/http");
+const { validURL } = require("../utils/url");
 
 function extensions(app) {
   if (!app) return;
@@ -86,6 +87,25 @@ function extensions(app) {
     }
   );
 
+  app.post(
+    "/ext/website-depth",
+    [verifyPayloadIntegrity],
+    async function (request, response) {
+      try {
+        const websiteDepth = require("../utils/extensions/WebsiteDepth");
+        const { url, depth = 1, maxLinks = 20 } = reqBody(request);
+        if (!validURL(url)) return { success: false, reason: "Not a valid URL." };
+
+        const scrapedData = await websiteDepth(url, depth, maxLinks);
+        response.status(200).json({ success: true, data: scrapedData });
+      } catch (e) {
+        console.error(e);
+        response.status(400).json({ success: false, reason: e.message });
+      }
+      return;
+    }
+  );
+
   app.post(
     "/ext/confluence",
     [verifyPayloadIntegrity],
diff --git a/collector/utils/extensions/WebsiteDepth/index.js b/collector/utils/extensions/WebsiteDepth/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..6e561ef74d504ac9c3cfb041de85bef1b144fea3
--- /dev/null
+++ b/collector/utils/extensions/WebsiteDepth/index.js
@@ -0,0 +1,153 @@
+const { v4 } = require("uuid");
+const {
+  PuppeteerWebBaseLoader,
+} = require("langchain/document_loaders/web/puppeteer");
+const { default: slugify } = require("slugify");
+const { parse } = require("node-html-parser");
+const { writeToServerDocuments } = require("../../files");
+const { tokenizeString } = require("../../tokenizer");
+const path = require("path");
+const fs = require("fs");
+
+async function discoverLinks(startUrl, depth = 1, maxLinks = 20) {
+  const baseUrl = new URL(startUrl).origin;
+  const discoveredLinks = new Set();
+  const pendingLinks = [startUrl];
+  let currentLevel = 0;
+  depth = depth < 1 ? 1 : depth;
+  maxLinks = maxLinks < 1 ? 1 : maxLinks;
+
+  // Check depth and if there are any links left to scrape
+  while (currentLevel < depth && pendingLinks.length > 0) {
+    const newLinks = await getPageLinks(pendingLinks[0], baseUrl);
+    pendingLinks.shift();
+
+    for (const link of newLinks) {
+      if (!discoveredLinks.has(link)) {
+        discoveredLinks.add(link);
+        pendingLinks.push(link);
+      }
+
+      // Exit out if we reach maxLinks
+      if (discoveredLinks.size >= maxLinks) {
+        return Array.from(discoveredLinks).slice(0, maxLinks);
+      }
+    }
+
+    if (pendingLinks.length === 0) {
+      currentLevel++;
+    }
+  }
+
+  return Array.from(discoveredLinks);
+}
+
+async function getPageLinks(url, baseUrl) {
+  try {
+    const loader = new PuppeteerWebBaseLoader(url, {
+      launchOptions: { headless: "new" },
+      gotoOptions: { waitUntil: "domcontentloaded" },
+    });
+    const docs = await loader.load();
+    const html = docs[0].pageContent;
+    const links = extractLinks(html, baseUrl);
+    return links;
+  } catch (error) {
+    console.error(`Failed to get page links from ${url}.`, error);
+    return [];
+  }
+}
+
+function extractLinks(html, baseUrl) {
+  const root = parse(html);
+  const links = root.querySelectorAll("a");
+  const extractedLinks = new Set();
+
+  for (const link of links) {
+    const href = link.getAttribute("href");
+    if (href) {
+      const absoluteUrl = new URL(href, baseUrl).href;
+      if (absoluteUrl.startsWith(baseUrl)) {
+        extractedLinks.add(absoluteUrl);
+      }
+    }
+  }
+
+  return Array.from(extractedLinks);
+}
+
+async function bulkScrapePages(links, outputFolder) {
+  const scrapedData = [];
+
+  for (let i = 0; i < links.length; i++) {
+    const link = links[i];
+    console.log(`Scraping ${i + 1}/${links.length}: ${link}`);
+
+    try {
+      const loader = new PuppeteerWebBaseLoader(link, {
+        launchOptions: { headless: "new" },
+        gotoOptions: { waitUntil: "domcontentloaded" },
+        async evaluate(page, browser) {
+          const result = await page.evaluate(() => document.body.innerText);
+          await browser.close();
+          return result;
+        },
+      });
+      const docs = await loader.load();
+      const content = docs[0].pageContent;
+
+      if (!content.length) {
+        console.warn(`Empty content for ${link}. Skipping.`);
+        continue;
+      }
+
+      const url = new URL(link);
+      const filename = (url.host + "-" + url.pathname).replace(".", "_");
+
+      const data = {
+        id: v4(),
+        url: "file://" + slugify(filename) + ".html",
+        title: slugify(filename) + ".html",
+        docAuthor: "no author found",
+        description: "No description found.",
+        docSource: "URL link uploaded by the user.",
+        chunkSource: `link://${link}`,
+        published: new Date().toLocaleString(),
+        wordCount: content.split(" ").length,
+        pageContent: content,
+        token_count_estimate: tokenizeString(content).length,
+      };
+
+      writeToServerDocuments(data, data.title, outputFolder);
+      scrapedData.push(data);
+
+      console.log(`Successfully scraped ${link}.`);
+    } catch (error) {
+      console.error(`Failed to scrape ${link}.`, error);
+    }
+  }
+
+  return scrapedData;
+}
+
+async function websiteScraper(startUrl, depth = 1, maxLinks = 20) {
+  const websiteName = new URL(startUrl).hostname;
+  const outputFolder = path.resolve(
+    __dirname,
+    `../../../../server/storage/documents/${slugify(websiteName)}`
+  );
+
+  fs.mkdirSync(outputFolder, { recursive: true });
+
+  console.log("Discovering links...");
+  const linksToScrape = await discoverLinks(startUrl, depth, maxLinks);
+  console.log(`Found ${linksToScrape.length} links to scrape.`);
+
+  console.log("Starting bulk scraping...");
+  const scrapedData = await bulkScrapePages(linksToScrape, outputFolder);
+  console.log(`Scraped ${scrapedData.length} pages.`);
+
+  return scrapedData;
+}
+
+module.exports = websiteScraper;
diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js
index ac8105975ed4dcdf71ba87f9052017a6a5942dbd..dee46a12b5e7d4efe8efbf3df64846f7611b7498 100644
--- a/frontend/src/components/DataConnectorOption/media/index.js
+++ b/frontend/src/components/DataConnectorOption/media/index.js
@@ -1,10 +1,12 @@
 import Github from "./github.svg";
 import YouTube from "./youtube.svg";
+import Link from "./link.svg";
 import Confluence from "./confluence.jpeg";
 
 const ConnectorImages = {
   github: Github,
   youtube: YouTube,
+  websiteDepth: Link,
   confluence: Confluence,
 };
 
diff --git a/frontend/src/components/DataConnectorOption/media/link.svg b/frontend/src/components/DataConnectorOption/media/link.svg
new file mode 100644
index 0000000000000000000000000000000000000000..c957e542ebd6acec879c0c4f1a392e94528996e1
--- /dev/null
+++ b/frontend/src/components/DataConnectorOption/media/link.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="330" zoomAndPan="magnify" viewBox="0 0 247.5 247.500006" height="330" preserveAspectRatio="xMidYMid meet" version="1.0"><defs><filter x="0%" y="0%" width="100%" height="100%" id="9045983972"><feColorMatrix values="0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0" color-interpolation-filters="sRGB"/></filter><filter x="0%" y="0%" width="100%" height="100%" id="111345b854"><feColorMatrix values="0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0.2126 0.7152 0.0722 0 0" color-interpolation-filters="sRGB"/></filter><clipPath id="41acadd750"><path d="M 23 23 L 224.648438 23 L 224.648438 224.648438 L 23 224.648438 Z M 23 23 " clip-rule="nonzero"/></clipPath><image x="0" y="0" width="200" xlink:href="" id="95b5eeec8e" height="200" preserveAspectRatio="xMidYMid meet"/><mask id="bd5e341d33"><g filter="url(#9045983972)"><g filter="url(#111345b854)" transform="matrix(1.010459, 0, 0, 1.010459, 22.55585, 22.555871)"><image x="0" y="0" width="200" xlink:href="" height="200" preserveAspectRatio="xMidYMid meet"/></g></g></mask><image x="0" y="0" width="200" xlink:href="" id="f7fc71df02" height="200" preserveAspectRatio="xMidYMid meet"/></defs><path fill="#ffffff" d="M 0 0 L 247 0 L 247 247 L 0 247 Z M 0 0 " fill-opacity="1" fill-rule="nonzero"/><path fill="#ffffff" d="M 0 0 L 247 0 L 247 247 L 0 247 Z M 0 0 " fill-opacity="1" fill-rule="nonzero"/><g clip-path="url(#41acadd750)"><g mask="url(#bd5e341d33)"><g transform="matrix(1.010459, 0, 0, 1.010459, 22.55585, 22.555871)"><image x="0" y="0" width="200" xlink:href="" height="200" preserveAspectRatio="xMidYMid meet"/></g></g></g></svg>
\ No newline at end of file
diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/WebsiteDepth/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/WebsiteDepth/index.jsx
new file mode 100644
index 0000000000000000000000000000000000000000..b3fc454530698ca9f8e35076332c26fe3a8d5ee2
--- /dev/null
+++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/WebsiteDepth/index.jsx
@@ -0,0 +1,134 @@
+import React, { useState } from "react";
+import System from "@/models/system";
+import showToast from "@/utils/toast";
+import pluralize from "pluralize";
+
+export default function WebsiteDepthOptions() {
+  const [loading, setLoading] = useState(false);
+
+  const handleSubmit = async (e) => {
+    e.preventDefault();
+    const form = new FormData(e.target);
+
+    try {
+      setLoading(true);
+      showToast("Scraping website - this may take a while.", "info", {
+        clear: true,
+        autoClose: false,
+      });
+
+      const { data, error } = await System.dataConnectors.websiteDepth.scrape({
+        url: form.get("url"),
+        depth: parseInt(form.get("depth")),
+        maxLinks: parseInt(form.get("maxLinks")),
+      });
+
+      if (!!error) {
+        showToast(error, "error", { clear: true });
+        setLoading(false);
+        return;
+      }
+
+      showToast(
+        `Successfully scraped ${data.length} ${pluralize(
+          "page",
+          data.length
+        )}!`,
+        "success",
+        { clear: true }
+      );
+      e.target.reset();
+      setLoading(false);
+    } catch (e) {
+      console.error(e);
+      showToast(e.message, "error", { clear: true });
+      setLoading(false);
+    }
+  };
+
+  return (
+    <div className="flex w-full">
+      <div className="flex flex-col w-full px-1 md:pb-6 pb-16">
+        <form className="w-full" onSubmit={handleSubmit}>
+          <div className="w-full flex flex-col py-2">
+            <div className="w-full flex flex-col gap-4">
+              <div className="flex flex-col pr-10">
+                <div className="flex flex-col gap-y-1 mb-4">
+                  <label className="text-white text-sm font-bold">
+                    Website URL
+                  </label>
+                  <p className="text-xs font-normal text-white/50">
+                    URL of the website you want to scrape.
+                  </p>
+                </div>
+                <input
+                  type="url"
+                  name="url"
+                  className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+                  placeholder="https://example.com"
+                  required={true}
+                  autoComplete="off"
+                  spellCheck={false}
+                />
+              </div>
+              <div className="flex flex-col pr-10">
+                <div className="flex flex-col gap-y-1 mb-4">
+                  <label className="text-white text-sm font-bold">Depth</label>
+                  <p className="text-xs font-normal text-white/50">
+                    This is the number of child-links that the worker should
+                    follow from the origin URL.
+                  </p>
+                </div>
+                <input
+                  type="number"
+                  name="depth"
+                  min="1"
+                  max="5"
+                  className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+                  required={true}
+                  defaultValue="1"
+                />
+              </div>
+              <div className="flex flex-col pr-10">
+                <div className="flex flex-col gap-y-1 mb-4">
+                  <label className="text-white text-sm font-bold">
+                    Max Links
+                  </label>
+                  <p className="text-xs font-normal text-white/50">
+                    Maximum number of links to scrape.
+                  </p>
+                </div>
+                <input
+                  type="number"
+                  name="maxLinks"
+                  min="1"
+                  className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
+                  required={true}
+                  defaultValue="20"
+                />
+              </div>
+            </div>
+          </div>
+
+          <div className="flex flex-col gap-y-2 w-full pr-10">
+            <button
+              type="submit"
+              disabled={loading}
+              className={`mt-2 w-full ${
+                loading ? "cursor-not-allowed animate-pulse" : ""
+              } justify-center border border-slate-200 px-4 py-2 rounded-lg text-[#222628] text-sm font-bold items-center flex gap-x-2 bg-slate-200 hover:bg-slate-300 hover:text-slate-800 disabled:bg-slate-300 disabled:cursor-not-allowed`}
+            >
+              {loading ? "Scraping website..." : "Submit"}
+            </button>
+            {loading && (
+              <p className="text-xs text-white/50">
+                Once complete, all scraped pages will be available for embedding
+                into workspaces in the document picker.
+              </p>
+            )}
+          </div>
+        </form>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx
index 69d30e281992f1aac77fe877959db853c3e9fcd3..5a03e19c1899acbbfd45cd994ab8a06dab2b7fa2 100644
--- a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx
+++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx
@@ -5,6 +5,7 @@ import YoutubeOptions from "./Connectors/Youtube";
 import ConfluenceOptions from "./Connectors/Confluence";
 import { useState } from "react";
 import ConnectorOption from "./ConnectorOption";
+import WebsiteDepthOptions from "./Connectors/WebsiteDepth";
 
 export const DATA_CONNECTORS = {
   github: {
@@ -21,6 +22,12 @@ export const DATA_CONNECTORS = {
       "Import the transcription of an entire YouTube video from a link.",
     options: <YoutubeOptions />,
   },
+  "website-depth": {
+    name: "Bulk Link Scraper",
+    image: ConnectorImages.websiteDepth,
+    description: "Scrape a website and its sub-links up to a certain depth.",
+    options: <WebsiteDepthOptions />,
+  },
   confluence: {
     name: "Confluence",
     image: ConnectorImages.confluence,
diff --git a/frontend/src/models/dataConnector.js b/frontend/src/models/dataConnector.js
index 19fa5f9124fbe9135e9571e88aee9752016d140b..d01c3c8b8074a82bc62d3de5e1287933ab245cdb 100644
--- a/frontend/src/models/dataConnector.js
+++ b/frontend/src/models/dataConnector.js
@@ -60,6 +60,24 @@ const DataConnector = {
         });
     },
   },
+  websiteDepth: {
+    scrape: async ({ url, depth, maxLinks }) => {
+      return await fetch(`${API_BASE}/ext/website-depth`, {
+        method: "POST",
+        headers: baseHeaders(),
+        body: JSON.stringify({ url, depth, maxLinks }),
+      })
+        .then((res) => res.json())
+        .then((res) => {
+          if (!res.success) throw new Error(res.reason);
+          return { data: res.data, error: null };
+        })
+        .catch((e) => {
+          console.error(e);
+          return { data: null, error: e.message };
+        });
+    },
+  },
 
   confluence: {
     collect: async function ({ pageUrl, username, accessToken }) {
diff --git a/server/endpoints/extensions/index.js b/server/endpoints/extensions/index.js
index 07eb7130db6e6d0abd449951e4ed44c423f7fc6e..cf8e1191c229c2232abcc2c2e718690da858c40d 100644
--- a/server/endpoints/extensions/index.js
+++ b/server/endpoints/extensions/index.js
@@ -93,6 +93,27 @@ function extensionEndpoints(app) {
       }
     }
   );
+  app.post(
+    "/ext/website-depth",
+    [validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
+    async (request, response) => {
+      try {
+        const responseFromProcessor =
+          await new CollectorApi().forwardExtensionRequest({
+            endpoint: "/ext/website-depth",
+            method: "POST",
+            body: request.body,
+          });
+        await Telemetry.sendTelemetry("extension_invoked", {
+          type: "website_depth",
+        });
+        response.status(200).json(responseFromProcessor);
+      } catch (e) {
+        console.error(e);
+        response.sendStatus(500).end();
+      }
+    }
+  );
 }
 
 module.exports = { extensionEndpoints };