From fa4ab0f65f290f79ee968210cb88ae145c2845d1 Mon Sep 17 00:00:00 2001
From: Jason Zhang <xzha4350@gmail.com>
Date: Wed, 26 Jun 2024 08:15:09 +0930
Subject: [PATCH] fix: sanitize filename before writing (#1743)

* fix: sanitize filename before writing

Fixes: https://github.com/Mintplex-Labs/anything-llm/issues/1737

* fixup

* fixup
---
 collector/utils/extensions/Confluence/index.js | 10 +++++-----
 collector/utils/files/index.js                 |  6 ++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js
index 6df063107..22df1c7fe 100644
--- a/collector/utils/extensions/Confluence/index.js
+++ b/collector/utils/extensions/Confluence/index.js
@@ -3,7 +3,7 @@ const path = require("path");
 const { default: slugify } = require("slugify");
 const { v4 } = require("uuid");
 const UrlPattern = require("url-pattern");
-const { writeToServerDocuments } = require("../../files");
+const { writeToServerDocuments, sanitizeFileName } = require("../../files");
 const { tokenizeString } = require("../../tokenizer");
 const {
   ConfluencePagesLoader,
@@ -98,11 +98,11 @@ async function loadConfluence({ pageUrl, username, accessToken }, response) {
     console.log(
       `[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}`
     );
-    writeToServerDocuments(
-      data,
-      `${slugify(doc.metadata.title)}-${data.id}`,
-      outFolderPath
+
+    const fileName = sanitizeFileName(
+      `${slugify(doc.metadata.title)}-${data.id}`
     );
+    writeToServerDocuments(data, fileName, outFolderPath);
   });
 
   return {
diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js
index 9b56bb5b4..86b50c364 100644
--- a/collector/utils/files/index.js
+++ b/collector/utils/files/index.js
@@ -129,6 +129,11 @@ function normalizePath(filepath = "") {
   return result;
 }
 
+function sanitizeFileName(fileName) {
+  if (!fileName) return fileName;
+  return fileName.replace(/[<>:"\/\\|?*]/g, "");
+}
+
 module.exports = {
   trashFile,
   isTextType,
@@ -137,4 +142,5 @@ module.exports = {
   wipeCollectorStorage,
   normalizePath,
   isWithin,
+  sanitizeFileName,
 };
-- 
GitLab