diff --git a/.vscode/settings.json b/.vscode/settings.json
index aafdb17d8b6c5338abb57438af11822955f08247..ce350ca2fedc8b42ff804d492f399549c0b1e739 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -29,11 +29,13 @@
     "Milvus",
     "Mintplex",
     "moderations",
+    "numpages",
     "Ollama",
     "Oobabooga",
     "openai",
     "opendocument",
     "openrouter",
+    "pagerender",
     "Qdrant",
     "searxng",
     "Serper",
diff --git a/collector/package.json b/collector/package.json
index 72deb4abd8ecea3264b6dedf4be9c1ca2d4bc30e..5e3873d1eb7f351b6dcc51fb220652f185033f0f 100644
--- a/collector/package.json
+++ b/collector/package.json
@@ -37,7 +37,7 @@
     "node-html-parser": "^6.1.13",
     "officeparser": "^4.0.5",
     "openai": "4.38.5",
-    "pdfjs-dist": "3.4.120",
+    "pdf-parse": "^1.1.1",
     "puppeteer": "~21.5.2",
     "slugify": "^1.6.6",
     "url-pattern": "^1.0.3",
diff --git a/collector/processSingleFile/convert/asPDF/PDFLoader/index.js b/collector/processSingleFile/convert/asPDF/PDFLoader/index.js
new file mode 100644
index 0000000000000000000000000000000000000000..6987690626618a01894f5b35deba13b98b8ac029
--- /dev/null
+++ b/collector/processSingleFile/convert/asPDF/PDFLoader/index.js
@@ -0,0 +1,62 @@
+const fs = require("fs").promises;
+const pdf = require("pdf-parse");
+
+class PDFLoader {
+  constructor(filePath, { splitPages = true } = {}) {
+    this.filePath = filePath;
+    this.splitPages = splitPages;
+  }
+
+  async load() {
+    const buffer = await fs.readFile(this.filePath);
+
+    const options = {
+      pagerender: this.splitPages ? this.renderPage : null,
+    };
+
+    const { text, numpages, info, metadata, version } = await pdf(
+      buffer,
+      options
+    );
+
+    if (!this.splitPages) {
+      return [
+        {
+          pageContent: text.trim(),
+          metadata: {
+            source: this.filePath,
+            pdf: { version, info, metadata, totalPages: numpages },
+          },
+        },
+      ];
+    }
+
+    return this.pages.map((pageContent, index) => ({
+      pageContent: pageContent.trim(),
+      metadata: {
+        source: this.filePath,
+        pdf: { version, info, metadata, totalPages: numpages },
+        loc: { pageNumber: index + 1 },
+      },
+    }));
+  }
+
+  pages = [];
+
+  renderPage = async (pageData) => {
+    const textContent = await pageData.getTextContent();
+    let lastY,
+      text = "";
+    for (const item of textContent.items) {
+      if (lastY !== item.transform[5] && lastY !== undefined) {
+        text += "\n";
+      }
+      text += item.str;
+      lastY = item.transform[5];
+    }
+    this.pages.push(text);
+    return text;
+  };
+}
+
+module.exports = PDFLoader;
diff --git a/collector/processSingleFile/convert/asPDF.js b/collector/processSingleFile/convert/asPDF/index.js
similarity index 54%
rename from collector/processSingleFile/convert/asPDF.js
rename to collector/processSingleFile/convert/asPDF/index.js
index 0521a8fbb620f510515f8d2ffe96751214c7e0b8..bf14516419e266ade79b245fece87287198b0ed0 100644
--- a/collector/processSingleFile/convert/asPDF.js
+++ b/collector/processSingleFile/convert/asPDF/index.js
@@ -3,29 +3,28 @@ const {
   createdDate,
   trashFile,
   writeToServerDocuments,
-} = require("../../utils/files");
-const { tokenizeString } = require("../../utils/tokenizer");
+} = require("../../../utils/files");
+const { tokenizeString } = require("../../../utils/tokenizer");
 const { default: slugify } = require("slugify");
+const PDFLoader = require("./PDFLoader");
 
-async function asPDF({ fullFilePath = "", filename = "" }) {
-  const pdfjsLib = await import("pdfjs-dist");
-  console.log(`-- Working ${filename} --`);
-
-  const loadingTask = pdfjsLib.default.getDocument(fullFilePath);
-  const pdf = await loadingTask.promise;
+async function asPdf({ fullFilePath = "", filename = "" }) {
+  const pdfLoader = new PDFLoader(fullFilePath, {
+    splitPages: true,
+  });
 
-  const numPages = pdf.numPages;
+  console.log(`-- Working ${filename} --`);
   const pageContent = [];
+  const docs = await pdfLoader.load();
 
-  for (let i = 1; i <= numPages; i++) {
-    console.log(`-- Parsing content from pg ${i} --`);
-    const page = await pdf.getPage(i);
-    const content = await page.getTextContent();
-    const text = content.items.map((item) => item.str).join(" ");
-
-    if (text.length) {
-      pageContent.push(text);
-    }
+  for (const doc of docs) {
+    console.log(
+      `-- Parsing content from pg ${
+        doc.metadata?.loc?.pageNumber || "unknown"
+      } --`
+    );
+    if (!doc.pageContent || !doc.pageContent.length) continue;
+    pageContent.push(doc.pageContent);
   }
 
   if (!pageContent.length) {
@@ -38,15 +37,13 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
     };
   }
 
-  const content = pageContent.join(" ");
-  const metadata = await pdf.getMetadata();
-
+  const content = pageContent.join("");
   const data = {
     id: v4(),
     url: "file://" + fullFilePath,
     title: filename,
-    docAuthor: metadata?.info?.Creator || "no author found",
-    description: metadata?.info?.Title || "No description found.",
+    docAuthor: docs[0]?.metadata?.pdf?.info?.Creator || "no author found",
+    description: docs[0]?.metadata?.pdf?.info?.Title || "No description found.",
     docSource: "pdf file uploaded by the user.",
     chunkSource: "",
     published: createdDate(fullFilePath),
@@ -64,4 +61,4 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
   return { success: true, reason: null, documents: [document] };
 }
 
-module.exports = asPDF;
+module.exports = asPdf;
diff --git a/collector/utils/constants.js b/collector/utils/constants.js
index ddcee800fd51e359d2bd0ddb033c66ff9457b3a3..ee9ad22ae0d609460b6865b1e29300fd40c826f3 100644
--- a/collector/utils/constants.js
+++ b/collector/utils/constants.js
@@ -33,7 +33,7 @@ const SUPPORTED_FILETYPE_CONVERTERS = {
   ".rst": "./convert/asTxt.js",
 
   ".html": "./convert/asTxt.js",
-  ".pdf": "./convert/asPDF.js",
+  ".pdf": "./convert/asPDF/index.js",
 
   ".docx": "./convert/asDocx.js",
   ".pptx": "./convert/asOfficeMime.js",
diff --git a/collector/yarn.lock b/collector/yarn.lock
index 394d8954de27128e1686829eaf68d377866fd2ea..24dfd435f92618b30008d9d45ba940a65d661f0f 100644
--- a/collector/yarn.lock
+++ b/collector/yarn.lock
@@ -122,7 +122,7 @@
     "@langchain/core" "~0.1"
     js-tiktoken "^1.0.11"
 
-"@mapbox/node-pre-gyp@^1.0.0", "@mapbox/node-pre-gyp@^1.0.11":
+"@mapbox/node-pre-gyp@^1.0.11":
   version "1.0.11"
   resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa"
   integrity sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==
@@ -662,15 +662,6 @@ camelcase@6:
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a"
   integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==
 
-canvas@^2.11.0:
-  version "2.11.2"
-  resolved "https://registry.yarnpkg.com/canvas/-/canvas-2.11.2.tgz#553d87b1e0228c7ac0fc72887c3adbac4abbd860"
-  integrity sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==
-  dependencies:
-    "@mapbox/node-pre-gyp" "^1.0.0"
-    nan "^2.17.0"
-    simple-get "^3.0.3"
-
 chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
@@ -936,13 +927,6 @@ decamelize@1.2.0:
   resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
   integrity sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==
 
-decompress-response@^4.2.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-4.2.1.tgz#414023cc7a302da25ce2ec82d0d5238ccafd8986"
-  integrity sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==
-  dependencies:
-    mimic-response "^2.0.0"
-
 decompress-response@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/decompress-response/-/decompress-response-6.0.0.tgz#ca387612ddb7e104bd16d85aab00d5ecf09c66fc"
@@ -2237,11 +2221,6 @@ mime@^3.0.0:
   resolved "https://registry.yarnpkg.com/mime/-/mime-3.0.0.tgz#b374550dca3a0c18443b0c950a6a58f1931cf7a7"
   integrity sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==
 
-mimic-response@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/mimic-response/-/mimic-response-2.1.0.tgz#d13763d35f613d09ec37ebb30bac0469c0ee8f43"
-  integrity sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==
-
 mimic-response@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/mimic-response/-/mimic-response-3.1.0.tgz#2d1d59af9c1b129815accc2c46a022a5ce1fa3c9"
@@ -2375,11 +2354,6 @@ mustache@^4.2.0:
   resolved "https://registry.yarnpkg.com/mustache/-/mustache-4.2.0.tgz#e5892324d60a12ec9c2a73359edca52972bf6f64"
   integrity sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==
 
-nan@^2.17.0:
-  version "2.20.0"
-  resolved "https://registry.yarnpkg.com/nan/-/nan-2.20.0.tgz#08c5ea813dd54ed16e5bd6505bf42af4f7838ca3"
-  integrity sha512-bk3gXBZDGILuuo/6sKtr0DQmSThYHLtNCdSdXk9YkxD/jK6X2vmCyyXBBxyqZ4XcnzTyYEAThfX3DCEnLf6igw==
-
 napi-build-utils@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806"
@@ -2715,18 +2689,6 @@ path-type@^4.0.0:
   resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
   integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
 
-path2d-polyfill@^2.0.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/path2d-polyfill/-/path2d-polyfill-2.1.1.tgz#6098b7bf2fc24c306c6377bcd558b17ba437ea27"
-  integrity sha512-4Rka5lN+rY/p0CdD8+E+BFv51lFaFvJOrlOhyQ+zjzyQrzyh3ozmxd1vVGGDdIbUFSBtIZLSnspxTgPT0iJhvA==
-  dependencies:
-    path2d "0.1.1"
-
-path2d@0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/path2d/-/path2d-0.1.1.tgz#d3c3886cd2252fb2a7830c27ea7bb9a862d937ea"
-  integrity sha512-/+S03c8AGsDYKKBtRDqieTJv2GlkMb0bWjnqOgtF6MkjdUQ9a8ARAtxWf9NgKLGm2+WQr6+/tqJdU8HNGsIDoA==
-
 pdf-parse@^1.1.1:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/pdf-parse/-/pdf-parse-1.1.1.tgz#745e07408679548b3995ff896fd38e96e19d14a7"
@@ -2735,16 +2697,6 @@ pdf-parse@^1.1.1:
     debug "^3.1.0"
     node-ensure "^0.0.0"
 
-pdfjs-dist@3.4.120:
-  version "3.4.120"
-  resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-3.4.120.tgz#6f4222117157498f179c95dc4569fad6336a8fdd"
-  integrity sha512-B1hw9ilLG4m/jNeFA0C2A0PZydjxslP8ylU+I4XM7Bzh/xWETo9EiBV848lh0O0hLut7T6lK1V7cpAXv5BhxWw==
-  dependencies:
-    path2d-polyfill "^2.0.1"
-    web-streams-polyfill "^3.2.1"
-  optionalDependencies:
-    canvas "^2.11.0"
-
 peberminta@^0.9.0:
   version "0.9.0"
   resolved "https://registry.yarnpkg.com/peberminta/-/peberminta-0.9.0.tgz#8ec9bc0eb84b7d368126e71ce9033501dca2a352"
@@ -3175,15 +3127,6 @@ simple-concat@^1.0.0:
   resolved "https://registry.yarnpkg.com/simple-concat/-/simple-concat-1.0.1.tgz#f46976082ba35c2263f1c8ab5edfe26c41c9552f"
   integrity sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==
 
-simple-get@^3.0.3:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/simple-get/-/simple-get-3.1.1.tgz#cc7ba77cfbe761036fbfce3d021af25fc5584d55"
-  integrity sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==
-  dependencies:
-    decompress-response "^4.2.0"
-    once "^1.3.1"
-    simple-concat "^1.0.0"
-
 simple-get@^4.0.0, simple-get@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/simple-get/-/simple-get-4.0.1.tgz#4a39db549287c979d352112fa03fd99fd6bc3543"