From eeaa6b0151bfc857c97328d2679164d6d1848896 Mon Sep 17 00:00:00 2001
From: Shanmukeshwar <shanmukeshwar@icloud.com>
Date: Wed, 19 Feb 2025 03:31:19 +0530
Subject: [PATCH] feat: Add endpoint to retrieve documents by folder name
 (#3258)

* feat: Add endpoint to retrieve documents by folder name

* isWithin Check on path to prevent path traversal

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
---
 server/endpoints/api/document/index.js | 54 ++++++++++++++++++++++++++
 server/swagger/openapi.json            | 24 ++++++++++++
 server/utils/files/index.js            | 45 +++++++++++++++++++++
 3 files changed, 123 insertions(+)

diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js
index f49cf0dd4..014784fe4 100644
--- a/server/endpoints/api/document/index.js
+++ b/server/endpoints/api/document/index.js
@@ -4,6 +4,7 @@ const { handleAPIFileUpload } = require("../../../utils/files/multer");
 const {
   viewLocalFiles,
   findDocumentInDocuments,
+  getDocumentsByFolder,
   normalizePath,
   isWithin,
 } = require("../../../utils/files");
@@ -395,6 +396,59 @@ function apiDocumentEndpoints(app) {
     }
   });
 
+  app.get(
+    "/v1/documents/folder/:folderName",
+    [validApiKey],
+    async (request, response) => {
+      /*
+    #swagger.tags = ['Documents']
+    #swagger.description = 'Get all documents stored in a specific folder.'
+    #swagger.parameters['folderName'] = {
+      in: 'path',
+      description: 'Name of the folder to retrieve documents from',
+      required: true,
+      type: 'string'
+    }
+    #swagger.responses[200] = {
+      content: {
+        "application/json": {
+          schema: {
+            type: 'object',
+            example: {
+              folder: "custom-documents",
+              documents: [
+                {
+                  name: "document1.json",
+                  type: "file",
+                  cached: false,
+                  pinnedWorkspaces: [],
+                  watched: false,
+                  // ... other document metadata
+                },
+                // more documents
+              ]
+            }
+          }
+        }
+      }
+    }
+    #swagger.responses[403] = {
+      schema: {
+        "$ref": "#/definitions/InvalidAPIKey"
+      }
+    }
+    */
+      try {
+        const { folderName } = request.params;
+        const result = await getDocumentsByFolder(folderName);
+        response.status(200).json(result);
+      } catch (e) {
+        console.error(e.message, e);
+        response.sendStatus(500).end();
+      }
+    }
+  );
+
   app.get(
     "/v1/document/accepted-file-types",
     [validApiKey],
diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json
index 7a459c9cf..3c52dc164 100644
--- a/server/swagger/openapi.json
+++ b/server/swagger/openapi.json
@@ -1124,6 +1124,30 @@
         }
       }
     },
+    "/v1/documents/folder/{folderName}": {
+      "get": {
+        "tags": [
+          "Documents"
+        ],
+        "description": "Get all documents stored in a specific folder.",
+        "parameters": [
+          {
+            "name": "folderName",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "Name of the folder to retrieve documents from"
+          }
+        ],
+        "responses": {
+          "403": {
+            "description": "Forbidden"
+          }
+        }
+      }
+    },
     "/v1/document/accepted-file-types": {
       "get": {
         "tags": [
diff --git a/server/utils/files/index.js b/server/utils/files/index.js
index 625d8582c..4b33fbc0c 100644
--- a/server/utils/files/index.js
+++ b/server/utils/files/index.js
@@ -91,6 +91,50 @@ async function viewLocalFiles() {
   return directory;
 }
 
+async function getDocumentsByFolder(folderName = "") {
+  if (!folderName) throw new Error("Folder name must be provided.");
+  const folderPath = path.resolve(documentsPath, normalizePath(folderName));
+  if (
+    !isWithin(documentsPath, folderPath) ||
+    !fs.existsSync(folderPath) ||
+    !fs.lstatSync(folderPath).isDirectory()
+  )
+    throw new Error(`Folder "${folderName}" does not exist.`);
+
+  const documents = [];
+  const filenames = {};
+  const files = fs.readdirSync(folderPath);
+  for (const file of files) {
+    if (path.extname(file) !== ".json") continue;
+    const filePath = path.join(folderPath, file);
+    const rawData = fs.readFileSync(filePath, "utf8");
+    const cachefilename = `${folderName}/${file}`;
+    const { pageContent, ...metadata } = JSON.parse(rawData);
+    documents.push({
+      name: file,
+      type: "file",
+      ...metadata,
+      cached: await cachedVectorInformation(cachefilename, true),
+    });
+    filenames[cachefilename] = file;
+  }
+
+  // Get pinned and watched information for each document in the folder
+  const pinnedWorkspacesByDocument =
+    await getPinnedWorkspacesByDocument(filenames);
+  const watchedDocumentsFilenames =
+    await getWatchedDocumentFilenames(filenames);
+  for (let doc of documents) {
+    doc.pinnedWorkspaces = pinnedWorkspacesByDocument[doc.name] || [];
+    doc.watched = Object.prototype.hasOwnProperty.call(
+      watchedDocumentsFilenames,
+      doc.name
+    );
+  }
+
+  return { folder: folderName, documents };
+}
+
 /**
  * Searches the vector-cache folder for existing information so we dont have to re-embed a
  * document and can instead push directly to vector db.
@@ -304,4 +348,5 @@ module.exports = {
   documentsPath,
   hasVectorCachedFiles,
   purgeEntireVectorCache,
+  getDocumentsByFolder,
 };
-- 
GitLab