Skip to content
Snippets Groups Projects
Unverified Commit d005107e authored by Shanmukeshwar's avatar Shanmukeshwar Committed by GitHub
Browse files

feat: Add endpoint to upload documents to a specified folder (#3276)


* feat: Add endpoint to retrieve documents by folder name

* isWithin Check on path to prevent path traversal

* feat: Add endpoint to upload documents to a specified folder

* refactor upload to folder endpoint + update jsdoc for swagger

* linting

---------

Co-authored-by: default avatartimothycarambat <rambat1010@gmail.com>
Co-authored-by: default avatarshatfield4 <seanhatfield5@gmail.com>
parent 2f5ed6c7
No related branches found
No related tags found
No related merge requests found
......@@ -36,6 +36,7 @@ function apiDocumentEndpoints(app) {
"multipart/form-data": {
schema: {
type: 'object',
required: ['file'],
properties: {
file: {
type: 'string',
......@@ -66,7 +67,7 @@ function apiDocumentEndpoints(app) {
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00 PM",
"published": "1/16/2024, 3:07:00 PM",
"wordCount": 93,
"token_count_estimate": 115,
}
......@@ -123,6 +124,167 @@ function apiDocumentEndpoints(app) {
}
);
app.post(
"/v1/document/upload/:folderName",
[validApiKey, handleAPIFileUpload],
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.'
#swagger.parameters['folderName'] = {
in: 'path',
description: 'Target folder path (defaults to "custom-documents" if not provided)',
required: true,
type: 'string',
example: 'my-folder'
}
#swagger.requestBody = {
description: 'File to be uploaded.',
required: true,
content: {
"multipart/form-data": {
schema: {
type: 'object',
required: ['file'],
properties: {
file: {
type: 'string',
format: 'binary',
description: 'The file to upload'
}
}
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
documents: [
{
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
"title": "anythingllm.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00 PM",
"wordCount": 93,
"token_count_estimate": 115
}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
#swagger.responses[500] = {
description: "Internal Server Error",
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: false,
error: "Document processing API is not online. Document will not be processed automatically."
}
}
}
}
}
*/
try {
const { originalname } = request.file;
let folder = request.params?.folderName || "custom-documents";
folder = normalizePath(folder);
const targetFolderPath = path.join(documentsPath, folder);
if (
!isWithin(path.resolve(documentsPath), path.resolve(targetFolderPath))
)
throw new Error("Invalid folder name");
if (!fs.existsSync(targetFolderPath))
fs.mkdirSync(targetFolderPath, { recursive: true });
const Collector = new CollectorApi();
const processingOnline = await Collector.online();
if (!processingOnline) {
response
.status(500)
.json({
success: false,
error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
})
.end();
return;
}
// Process the uploaded document
const { success, reason, documents } =
await Collector.processDocument(originalname);
if (!success) {
response
.status(500)
.json({ success: false, error: reason, documents })
.end();
return;
}
// For each processed document, check if it is already in the desired folder.
// If not, move it using similar logic as in the move-files endpoint.
for (const doc of documents) {
const currentFolder = path.dirname(doc.location);
if (currentFolder !== folder) {
const sourcePath = path.join(
documentsPath,
normalizePath(doc.location)
);
const destinationPath = path.join(
targetFolderPath,
path.basename(doc.location)
);
if (
!isWithin(documentsPath, sourcePath) ||
!isWithin(documentsPath, destinationPath)
)
throw new Error("Invalid file location");
fs.renameSync(sourcePath, destinationPath);
doc.location = path.join(folder, path.basename(doc.location));
doc.name = path.basename(doc.location);
}
}
Collector.log(
`Document ${originalname} uploaded, processed, and moved to folder ${folder} successfully.`
);
await Telemetry.sendTelemetry("document_uploaded");
await EventLogs.logEvent("api_document_uploaded", {
documentName: originalname,
folder,
});
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.error(e.message, e);
response.sendStatus(500).end();
}
}
);
app.post(
"/v1/document/upload-link",
[validApiKey],
......@@ -161,7 +323,7 @@ function apiDocumentEndpoints(app) {
"description": "No description found.",
"docSource": "URL link uploaded by the user.",
"chunkSource": "https:anythingllm.com.html",
"published": "1/16/2024, 3:46:33 PM",
"published": "1/16/2024, 3:46:33 PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
......@@ -264,7 +426,7 @@ function apiDocumentEndpoints(app) {
"description": "No description found.",
"docSource": "My custom description set during upload",
"chunkSource": "no chunk source specified",
"published": "1/16/2024, 3:46:33 PM",
"published": "1/16/2024, 3:46:33 PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
......
......@@ -865,7 +865,7 @@
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00 PM",
"published": "1/16/2024, 3:07:00 PM",
"wordCount": 93,
"token_count_estimate": 115
}
......@@ -901,16 +901,115 @@
"multipart/form-data": {
"schema": {
"type": "object",
"required": [
"file"
],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "The file to upload"
}
},
}
}
}
}
}
}
},
"/v1/document/upload/{folderName}": {
"post": {
"tags": [
"Documents"
],
"description": "Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.",
"parameters": [
{
"name": "folderName",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"type": "object",
"example": {
"success": true,
"error": null,
"documents": [
{
"location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
"url": "file://Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
"title": "anythingllm.txt",
"docAuthor": "Unknown",
"description": "Unknown",
"docSource": "a text file uploaded by the user.",
"chunkSource": "anythingllm.txt",
"published": "1/16/2024, 3:07:00 PM",
"wordCount": 93,
"token_count_estimate": 115
}
]
}
}
}
}
},
"403": {
"description": "Forbidden",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/InvalidAPIKey"
}
},
"application/xml": {
"schema": {
"$ref": "#/components/schemas/InvalidAPIKey"
}
}
}
},
"500": {
"description": "Internal Server Error",
"content": {
"application/json": {
"schema": {
"type": "object",
"example": {
"success": false,
"error": "Document processing API is not online. Document will not be processed automatically."
}
}
}
}
}
},
"requestBody": {
"description": "File to be uploaded.",
"required": true,
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"required": [
"file"
]
],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "The file to upload"
}
}
}
}
}
......@@ -943,7 +1042,7 @@
"description": "No description found.",
"docSource": "URL link uploaded by the user.",
"chunkSource": "https:anythingllm.com.html",
"published": "1/16/2024, 3:46:33 PM",
"published": "1/16/2024, 3:46:33 PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
......@@ -1016,7 +1115,7 @@
"description": "No description found.",
"docSource": "My custom description set during upload",
"chunkSource": "no chunk source specified",
"published": "1/16/2024, 3:46:33 PM",
"published": "1/16/2024, 3:46:33 PM",
"wordCount": 252,
"pageContent": "AnythingLLM is the best....",
"token_count_estimate": 447,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment