From 60a00843dfc2ceb012cb4c8c62257270ae28e893 Mon Sep 17 00:00:00 2001 From: Timothy Carambat <rambat1010@gmail.com> Date: Mon, 26 Jun 2023 17:20:09 -0700 Subject: [PATCH] add ability to purge document from custom documents as well as cleanup its associated cache file (#113) * add ability to purge document from custom documents as well as cleanup its assoicated cache file * update alert text --- .../Documents/Directory/index.jsx | 41 ++++++++++++++----- frontend/src/models/system.js | 12 ++++++ server/endpoints/system.js | 12 ++++++ server/utils/files/index.js | 41 +++++++++++++++++++ server/utils/files/purgeDocument.js | 17 ++++++++ .../utils/vectorDbProviders/chroma/index.js | 8 ++-- .../utils/vectorDbProviders/pinecone/index.js | 8 ++-- 7 files changed, 121 insertions(+), 18 deletions(-) create mode 100644 server/utils/files/purgeDocument.js diff --git a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx index fe5e38a3f..b838d0b10 100644 --- a/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/Documents/Directory/index.jsx @@ -8,6 +8,7 @@ import { Zap, } from "react-feather"; import { nFormatter } from "../../../../../utils/numbers"; +import System from "../../../../../models/system"; export default function Directory({ files, @@ -19,6 +20,16 @@ export default function Directory({ const [isExpanded, toggleExpanded] = useState(false); const [showDetails, toggleDetails] = useState(false); const [showZap, setShowZap] = useState(false); + const handleDelete = async (name, meta) => { + if ( + !window.confirm( + "Are you sure you want to delete this document?\nThis will require you to re-upload and re-embed it.\nThis document will be removed from any workspace that is currently referencing it.\nThis action is not reversible." + ) + ) + return false; + document?.getElementById(meta?.id)?.remove(); + await System.deleteDocument(name, meta); + }; if (files.type === "folder") { return ( @@ -73,7 +84,7 @@ export default function Directory({ const { name, type: _type, ...meta } = files; return ( - <div className="ml-[20px] my-2"> + <div className="ml-[20px] my-2" id={meta.id}> <div className="flex items-center"> {meta?.cached && ( <button @@ -134,15 +145,25 @@ export default function Directory({ </div> </div> {showDetails && ( - <div className="ml-[20px] flex flex-col gap-y-1 my-1 p-2 rounded-md bg-slate-200 font-mono text-sm overflow-x-scroll"> - {Object.entries(meta).map(([key, value], i) => { - if (key === "cached") return null; - return ( - <p key={i} className="whitespace-pre"> - {key}: {value} - </p> - ); - })} + <div className="w-full flex flex-col"> + <div className="ml-[20px] flex flex-col gap-y-1 my-1 p-2 rounded-md bg-slate-200 font-mono text-sm overflow-x-scroll"> + {Object.entries(meta).map(([key, value], i) => { + if (key === "cached") return null; + return ( + <p key={i} className="whitespace-pre"> + {key}: {value} + </p> + ); + })} + </div> + <div + onClick={() => handleDelete(`${parent}/${name}`, meta)} + className="flex items-center justify-end w-full" + > + <button className="text-sm text-slate-400 dark:text-stone-500 hover:text-red-500"> + Purge Document + </button> + </div> </div> )} </div> diff --git a/frontend/src/models/system.js b/frontend/src/models/system.js index 1ce003d51..99e462694 100644 --- a/frontend/src/models/system.js +++ b/frontend/src/models/system.js @@ -86,6 +86,18 @@ const System = { return { newValues: null, error: e.message }; }); }, + deleteDocument: async (name, meta) => { + return await fetch(`${API_BASE}/system/remove-document`, { + method: "DELETE", + headers: baseHeaders(), + body: JSON.stringify({ name, meta }), + }) + .then((res) => res.ok) + .catch((e) => { + console.error(e); + return false; + }); + }, }; export default System; diff --git a/server/endpoints/system.js b/server/endpoints/system.js index ba16b2e5b..5abb5c717 100644 --- a/server/endpoints/system.js +++ b/server/endpoints/system.js @@ -7,6 +7,7 @@ const { checkPythonAppAlive, acceptedFileTypes, } = require("../utils/files/documentProcessor"); +const { purgeDocument } = require("../utils/files/purgeDocument"); const { getVectorDbClass } = require("../utils/helpers"); const { updateENV } = require("../utils/helpers/updateENV"); const { reqBody, makeJWT } = require("../utils/http"); @@ -94,6 +95,17 @@ function systemEndpoints(app) { } }); + app.delete("/system/remove-document", async (request, response) => { + try { + const { name, meta } = reqBody(request); + await purgeDocument(name, meta); + response.sendStatus(200).end(); + } catch (e) { + console.log(e.message, e); + response.sendStatus(500).end(); + } + }); + app.get("/system/local-files", async (_, response) => { try { const localFiles = await viewLocalFiles(); diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 0e49b540a..83505f8b4 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -141,10 +141,51 @@ async function storeVectorResult(vectorData = [], filename = null) { return; } +// Purges a file from the documents/ folder. +async function purgeSourceDocument(filename = null) { + if (!filename) return; + console.log(`Purging document of ${filename}.`); + + const filePath = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/documents`, filename) + : path.resolve(process.env.STORAGE_DIR, `documents`, filename); + + if (!fs.existsSync(filePath)) { + console.log(`Could not located cachefile for ${filename}`, filePath); + return; + } + + fs.rmSync(filePath); + return; +} + +// Purges a vector-cache file from the vector-cache/ folder. +async function purgeVectorCache(filename = null) { + if (!filename) return; + console.log(`Purging cached vectorized results of ${filename}.`); + + const digest = uuidv5(filename, uuidv5.URL); + const filePath = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/vector-cache`, `${digest}.json`) + : path.resolve(process.env.STORAGE_DIR, `vector-cache`, `${digest}.json`); + + if (!fs.existsSync(filePath)) { + console.log(`Could not located cache file for ${filename}`, filePath); + return; + } + + fs.rmSync(filePath); + return; +} + module.exports = { cachedVectorInformation, collectDocumentData, viewLocalFiles, + purgeSourceDocument, + purgeVectorCache, storeVectorResult, fileData, }; diff --git a/server/utils/files/purgeDocument.js b/server/utils/files/purgeDocument.js new file mode 100644 index 000000000..a584a4261 --- /dev/null +++ b/server/utils/files/purgeDocument.js @@ -0,0 +1,17 @@ +const { purgeVectorCache, purgeSourceDocument } = require("."); +const { Document } = require("../../models/documents"); +const { Workspace } = require("../../models/workspace"); + +async function purgeDocument(filename, meta) { + const workspaces = await Workspace.where(); + for (const workspace of workspaces) { + await Document.removeDocuments(workspace, [filename]); + } + await purgeVectorCache(filename); + await purgeSourceDocument(filename); + return; +} + +module.exports = { + purgeDocument, +}; diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 532f629cd..1fb324aa6 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -350,10 +350,10 @@ const Chroma = { content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed. Context: ${contextTexts - .map((text, i) => { - return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; - }) - .join("")}`, + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, }; const memory = [prompt, ...chatHistory, { role: "user", content: input }]; const responseText = await this.getChatCompletion(this.openai(), memory, { diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 37198ca49..dc984f7fa 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -321,10 +321,10 @@ const Pinecone = { content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed. Context: ${contextTexts - .map((text, i) => { - return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; - }) - .join("")}`, + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, }; const memory = [prompt, ...chatHistory, { role: "user", content: input }]; -- GitLab