diff --git a/collector/processSingleFile/index.js b/collector/processSingleFile/index.js index 9efd3a70f81512e8c191d1958ddcfc957f6a23ec..569a2cde27ad996932633b55c3214e3a5afe3c15 100644 --- a/collector/processSingleFile/index.js +++ b/collector/processSingleFile/index.js @@ -4,7 +4,7 @@ const { WATCH_DIRECTORY, SUPPORTED_FILETYPE_CONVERTERS, } = require("../utils/constants"); -const { trashFile } = require("../utils/files"); +const { trashFile, isTextType } = require("../utils/files"); const RESERVED_FILES = ["__HOTDIR__.md"]; async function processSingleFile(targetFilename) { @@ -31,17 +31,25 @@ async function processSingleFile(targetFilename) { }; } - if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) { - trashFile(fullFilePath); - return { - success: false, - reason: `File extension ${fileExtension} not supported for parsing.`, - documents: [], - }; + let processFileAs = fileExtension; + if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(fileExtension)) { + if (isTextType(fullFilePath)) { + console.log( + `\x1b[33m[Collector]\x1b[0m The provided filetype of ${fileExtension} does not have a preset and will be processed as .txt.` + ); + processFileAs = ".txt"; + } else { + trashFile(fullFilePath); + return { + success: false, + reason: `File extension ${fileExtension} not supported for parsing and cannot be assumed as text file type.`, + documents: [], + }; + } } const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[ - fileExtension + processFileAs ]); return await FileTypeProcessor({ fullFilePath, diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js index caf33c888a2bc3dbc836e070eda5bdd78941e7fe..3e6ce3445e19adf42612d435b26b87e4f3439413 100644 --- a/collector/utils/files/index.js +++ b/collector/utils/files/index.js @@ -1,5 +1,33 @@ const fs = require("fs"); const path = require("path"); +const { getType } = require("mime"); + +function isTextType(filepath) { + if (!fs.existsSync(filepath)) return false; + // These are types of mime primary classes that for sure + // cannot also for forced into a text type. + const nonTextTypes = ["multipart", "image", "model", "audio", "video"]; + // These are full-mimes we for sure cannot parse or interpret as text + // documents + const BAD_MIMES = [ + "application/octet-stream", + "application/zip", + "application/pkcs8", + "application/vnd.microsoft.portable-executable", + "application/x-msdownload", + ]; + + try { + const mime = getType(filepath); + if (BAD_MIMES.includes(mime)) return false; + + const type = mime.split("/")[0]; + if (nonTextTypes.includes(type)) return false; + return true; + } catch { + return false; + } +} function trashFile(filepath) { if (!fs.existsSync(filepath)) return; @@ -94,6 +122,7 @@ async function wipeCollectorStorage() { module.exports = { trashFile, + isTextType, createdDate, writeToServerDocuments, wipeCollectorStorage, diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx index 557fe418145a10ea1c63f4fe18b4c9731d15c2f8..158719445ac004b67228c49c0ec6aac1f3c056bc 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx @@ -8,7 +8,6 @@ function Directory({ files, loading, setLoading, - fileTypes, workspace, fetchKeys, selectedItems, @@ -135,9 +134,7 @@ function Directory({ </div> )} </div> - <UploadFile - fileTypes={fileTypes} workspace={workspace} fetchKeys={fetchKeys} setLoading={setLoading} diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/FileUploadProgress/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/FileUploadProgress/index.jsx index 31cf57943bb4d1401c8d844e3f295b12fc7e14ef..c375aa2e3635d2fe959ed7c082d6055163afb38f 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/FileUploadProgress/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/FileUploadProgress/index.jsx @@ -61,7 +61,7 @@ function FileUploadProgressComponent({ if (status === "failed") { return ( <div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40 overflow-y-auto"> - <div className="w-6 h-6"> + <div className="w-6 h-6 flex-shrink-0"> <XCircle className="w-6 h-6 stroke-white bg-red-500 rounded-full p-1 w-full h-full" /> </div> <div className="flex flex-col"> @@ -76,7 +76,7 @@ function FileUploadProgressComponent({ return ( <div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40"> - <div className="w-6 h-6"> + <div className="w-6 h-6 flex-shrink-0"> {status !== "complete" ? ( <div className="flex items-center justify-center"> <PreLoader size="6" /> diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx index a6cee8c8035bd7055c52be8b84adcc1ee32e2846..182cebcd214e58701061437035d9c4835ad946dd 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/UploadFile/index.jsx @@ -7,12 +7,7 @@ import { v4 } from "uuid"; import FileUploadProgress from "./FileUploadProgress"; import Workspace from "../../../../../models/workspace"; -export default function UploadFile({ - workspace, - fileTypes, - fetchKeys, - setLoading, -}) { +export default function UploadFile({ workspace, fetchKeys, setLoading }) { const [ready, setReady] = useState(false); const [files, setFiles] = useState([]); const [fetchingUrl, setFetchingUrl] = useState(false); @@ -76,9 +71,6 @@ export default function UploadFile({ const { getRootProps, getInputProps } = useDropzone({ onDrop, - accept: { - ...fileTypes, - }, disabled: !ready, }); @@ -109,9 +101,7 @@ export default function UploadFile({ Click to upload or drag and drop </div> <div className="text-white text-opacity-60 text-xs font-medium py-1"> - {Object.values(fileTypes ?? []) - .flat() - .join(" ")} + supports text files, csv's, spreadsheets, audio files, and more! </div> </div> ) : ( diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx index e8b63c903ca7c6981db6dbe4aa715fb1602acee0..736a1476f6b1d61a3084f0640d5c6fa980402b95 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/index.jsx @@ -15,11 +15,7 @@ const MODEL_COSTS = { "text-embedding-3-large": 0.00000013, // $0.00013 / 1K tokens }; -export default function DocumentSettings({ - workspace, - fileTypes, - systemSettings, -}) { +export default function DocumentSettings({ workspace, systemSettings }) { const [highlightWorkspace, setHighlightWorkspace] = useState(false); const [availableDocs, setAvailableDocs] = useState([]); const [loading, setLoading] = useState(true); @@ -201,7 +197,6 @@ export default function DocumentSettings({ loading={loading} loadingMessage={loadingMessage} setLoading={setLoading} - fileTypes={fileTypes} workspace={workspace} fetchKeys={fetchKeys} selectedItems={selectedItems} diff --git a/frontend/src/components/Modals/MangeWorkspace/index.jsx b/frontend/src/components/Modals/MangeWorkspace/index.jsx index 6696a875697ee0d7fe0bd442f6810cc744f05c0d..ef3a58afb7be4ac52bdddf90a0cdb5a3b707f69f 100644 --- a/frontend/src/components/Modals/MangeWorkspace/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/index.jsx @@ -11,17 +11,14 @@ const noop = () => {}; const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => { const { slug } = useParams(); const [workspace, setWorkspace] = useState(null); - const [fileTypes, setFileTypes] = useState(null); const [settings, setSettings] = useState({}); useEffect(() => { - async function checkSupportedFiletypes() { - const acceptedTypes = await System.acceptedDocumentTypes(); + async function getSettings() { const _settings = await System.keys(); - setFileTypes(acceptedTypes ?? {}); setSettings(_settings ?? {}); } - checkSupportedFiletypes(); + getSettings(); }, []); useEffect(() => { @@ -78,11 +75,7 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => { <X className="text-gray-300 text-lg" /> </button> </div> - <DocumentSettings - workspace={workspace} - fileTypes={fileTypes} - systemSettings={settings} - /> + <DocumentSettings workspace={workspace} systemSettings={settings} /> </div> </div> </div>