diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index 60090bddcbd9ce48e3cd9bd8c0b084e81131955a..faa6b85e4899e2cb0c8d6d6811d3202e6367eede 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -36,6 +36,7 @@ function apiDocumentEndpoints(app) { "multipart/form-data": { schema: { type: 'object', + required: ['file'], properties: { file: { type: 'string', @@ -66,7 +67,7 @@ function apiDocumentEndpoints(app) { "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, "token_count_estimate": 115, } @@ -123,6 +124,167 @@ function apiDocumentEndpoints(app) { } ); + app.post( + "/v1/document/upload/:folderName", + [validApiKey, handleAPIFileUpload], + async (request, response) => { + /* + #swagger.tags = ['Documents'] + #swagger.description = 'Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.' + #swagger.parameters['folderName'] = { + in: 'path', + description: 'Target folder path (defaults to "custom-documents" if not provided)', + required: true, + type: 'string', + example: 'my-folder' + } + #swagger.requestBody = { + description: 'File to be uploaded.', + required: true, + content: { + "multipart/form-data": { + schema: { + type: 'object', + required: ['file'], + properties: { + file: { + type: 'string', + format: 'binary', + description: 'The file to upload' + } + } + } + } + } + } + #swagger.responses[200] = { + content: { + "application/json": { + schema: { + type: 'object', + example: { + success: true, + error: null, + documents: [ + { + "location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt", + "title": "anythingllm.txt", + "docAuthor": "Unknown", + "description": "Unknown", + "docSource": "a text file uploaded by the user.", + "chunkSource": "anythingllm.txt", + "published": "1/16/2024, 3:07:00 PM", + "wordCount": 93, + "token_count_estimate": 115 + } + ] + } + } + } + } + } + #swagger.responses[403] = { + schema: { + "$ref": "#/definitions/InvalidAPIKey" + } + } + #swagger.responses[500] = { + description: "Internal Server Error", + content: { + "application/json": { + schema: { + type: 'object', + example: { + success: false, + error: "Document processing API is not online. Document will not be processed automatically." + } + } + } + } + } + */ + try { + const { originalname } = request.file; + let folder = request.params?.folderName || "custom-documents"; + folder = normalizePath(folder); + const targetFolderPath = path.join(documentsPath, folder); + + if ( + !isWithin(path.resolve(documentsPath), path.resolve(targetFolderPath)) + ) + throw new Error("Invalid folder name"); + if (!fs.existsSync(targetFolderPath)) + fs.mkdirSync(targetFolderPath, { recursive: true }); + + const Collector = new CollectorApi(); + const processingOnline = await Collector.online(); + if (!processingOnline) { + response + .status(500) + .json({ + success: false, + error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`, + }) + .end(); + return; + } + + // Process the uploaded document + const { success, reason, documents } = + await Collector.processDocument(originalname); + if (!success) { + response + .status(500) + .json({ success: false, error: reason, documents }) + .end(); + return; + } + + // For each processed document, check if it is already in the desired folder. + // If not, move it using similar logic as in the move-files endpoint. + for (const doc of documents) { + const currentFolder = path.dirname(doc.location); + if (currentFolder !== folder) { + const sourcePath = path.join( + documentsPath, + normalizePath(doc.location) + ); + const destinationPath = path.join( + targetFolderPath, + path.basename(doc.location) + ); + + if ( + !isWithin(documentsPath, sourcePath) || + !isWithin(documentsPath, destinationPath) + ) + throw new Error("Invalid file location"); + + fs.renameSync(sourcePath, destinationPath); + doc.location = path.join(folder, path.basename(doc.location)); + doc.name = path.basename(doc.location); + } + } + + Collector.log( + `Document ${originalname} uploaded, processed, and moved to folder ${folder} successfully.` + ); + + await Telemetry.sendTelemetry("document_uploaded"); + await EventLogs.logEvent("api_document_uploaded", { + documentName: originalname, + folder, + }); + response.status(200).json({ success: true, error: null, documents }); + } catch (e) { + console.error(e.message, e); + response.sendStatus(500).end(); + } + } + ); + app.post( "/v1/document/upload-link", [validApiKey], @@ -161,7 +323,7 @@ function apiDocumentEndpoints(app) { "description": "No description found.", "docSource": "URL link uploaded by the user.", "chunkSource": "https:anythingllm.com.html", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, @@ -264,7 +426,7 @@ function apiDocumentEndpoints(app) { "description": "No description found.", "docSource": "My custom description set during upload", "chunkSource": "no chunk source specified", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 91d0aa53db41d1cc63628e893a98f13657c3a57e..e7b3582d36163fe81ef42728da95cf57de3e64c4 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -865,7 +865,7 @@ "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, "token_count_estimate": 115 } @@ -901,16 +901,115 @@ "multipart/form-data": { "schema": { "type": "object", + "required": [ + "file" + ], "properties": { "file": { "type": "string", "format": "binary", "description": "The file to upload" } - }, + } + } + } + } + } + } + }, + "/v1/document/upload/{folderName}": { + "post": { + "tags": [ + "Documents" + ], + "description": "Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.", + "parameters": [ + { + "name": "folderName", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "success": true, + "error": null, + "documents": [ + { + "location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "url": "file://Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt", + "title": "anythingllm.txt", + "docAuthor": "Unknown", + "description": "Unknown", + "docSource": "a text file uploaded by the user.", + "chunkSource": "anythingllm.txt", + "published": "1/16/2024, 3:07:00 PM", + "wordCount": 93, + "token_count_estimate": 115 + } + ] + } + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + }, + "application/xml": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + } + } + }, + "500": { + "description": "Internal Server Error", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "success": false, + "error": "Document processing API is not online. Document will not be processed automatically." + } + } + } + } + } + }, + "requestBody": { + "description": "File to be uploaded.", + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "type": "object", "required": [ "file" - ] + ], + "properties": { + "file": { + "type": "string", + "format": "binary", + "description": "The file to upload" + } + } } } } @@ -943,7 +1042,7 @@ "description": "No description found.", "docSource": "URL link uploaded by the user.", "chunkSource": "https:anythingllm.com.html", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, @@ -1016,7 +1115,7 @@ "description": "No description found.", "docSource": "My custom description set during upload", "chunkSource": "no chunk source specified", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447,