Skip to content
Snippets Groups Projects
Unverified Commit a5059289 authored by Timothy Carambat's avatar Timothy Carambat Committed by GitHub
Browse files

Display better error messages from document processor (#243)

pass messages to frontend on success/failure
resolves #242
parent 3e784767
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,10 @@ def as_docx(**kwargs):
data = loader.load()[0]
content = data.page_content
if len(content) == 0:
print(f"Resulting text content was empty for {filename}{ext}.")
return(False, f"No text content found in {filename}{ext}")
print(f"-- Working {fullpath} --")
data = {
'id': guid(),
......@@ -33,7 +37,9 @@ def as_docx(**kwargs):
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
def as_odt(**kwargs):
parent_dir = kwargs.get('directory', 'hotdir')
......@@ -46,6 +52,10 @@ def as_odt(**kwargs):
data = loader.load()[0]
content = data.page_content
if len(content) == 0:
print(f"Resulting text content was empty for {filename}{ext}.")
return(False, f"No text content found in {filename}{ext}")
print(f"-- Working {fullpath} --")
data = {
'id': guid(),
......@@ -63,4 +73,6 @@ def as_odt(**kwargs):
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
\ No newline at end of file
......@@ -16,6 +16,10 @@ def as_markdown(**kwargs):
data = loader.load()[0]
content = data.page_content
if len(content) == 0:
print(f"Resulting page content was empty - no text could be extracted from {filename}{ext}.")
return(False, f"No text could be extracted from {filename}{ext}.")
print(f"-- Working {fullpath} --")
data = {
'id': guid(),
......@@ -33,4 +37,6 @@ def as_markdown(**kwargs):
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
......@@ -55,5 +55,7 @@ def as_mbox(**kwargs):
}
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
......@@ -19,7 +19,7 @@ def as_pdf(**kwargs):
if len(pages) == 0:
print(f"{fullpath} parsing resulted in no pages - nothing to do.")
return False
return(False, f"No pages found for {filename}{ext}!")
# Set doc to the first page so we can still get the metadata from PyMuPDF but without all the unicode issues.
doc = pages[0]
......@@ -31,6 +31,10 @@ def as_pdf(**kwargs):
print(f"-- Parsing content from pg {page.number} --")
page_content += unidecode(page.get_text('text'))
if len(page_content) == 0:
print(f"Resulting page content was empty - no text could be extracted from the document.")
return(False, f"No text content could be extracted from {filename}{ext}!")
title = doc.metadata.get('title')
author = doc.metadata.get('author')
subject = doc.metadata.get('subject')
......@@ -50,4 +54,6 @@ def as_pdf(**kwargs):
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
......@@ -12,6 +12,10 @@ def as_text(**kwargs):
fullpath = f"{parent_dir}/{filename}{ext}"
content = open(fullpath).read()
if len(content) == 0:
print(f"Resulting text content was empty for {filename}{ext}.")
return(False, f"No text content found in {filename}{ext}")
print(f"-- Working {fullpath} --")
data = {
'id': guid(),
......@@ -28,4 +32,6 @@ def as_text(**kwargs):
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
move_source(parent_dir, f"{filename}{ext}", remove=remove)
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
return(True, None)
......@@ -25,11 +25,11 @@ def process_single(directory, target_doc):
move_source(new_destination_filename=target_doc, failed=True, remove=True)
return (False, f"{fileext} not a supported file type for conversion. It will not be processed.")
FILETYPES[fileext](
# Returns Tuple of (Boolean, String|None) of success status and possible error message.
# Error message will display to user.
return FILETYPES[fileext](
directory=directory,
filename=filename,
ext=fileext,
remove_on_complete=True # remove source document to save disk space.
)
return (True, None)
)
\ No newline at end of file
......@@ -14,7 +14,8 @@ function FileUploadProgressComponent({
onUploadError,
}) {
const [timerMs, setTimerMs] = useState(10);
const [status, setStatus] = useState(file?.rejected ? "uploading" : "failed");
const [status, setStatus] = useState("pending");
const [error, setError] = useState("");
useEffect(() => {
async function uploadFile() {
......@@ -31,6 +32,7 @@ function FileUploadProgressComponent({
setStatus("failed");
clearInterval(timer);
onUploadError(data.error);
setError(data.error);
} else {
setStatus("complete");
clearInterval(timer);
......@@ -58,6 +60,24 @@ function FileUploadProgressComponent({
);
}
if (status === "failed") {
return (
<div className="w-fit px-2 py-2 flex items-center gap-x-4 rounded-lg bg-blue-100 border-blue-600 dark:bg-stone-800 bg-opacity-50 border dark:border-stone-600">
<div className="w-6 h-6">
<XCircle className="w-6 h-6 stroke-white bg-red-500 rounded-full p-1 w-full h-full" />
</div>
<div className="flex flex-col">
<p className="text-black dark:text-stone-200 text-sm font-mono overflow-x-scroll">
{truncate(file.name, 30)}
</p>
<p className="text-red-700 dark:text-red-400 text-xs font-mono">
{error}
</p>
</div>
</div>
);
}
return (
<div className="w-fit px-2 py-2 flex items-center gap-x-4 rounded-lg bg-blue-100 border-blue-600 dark:bg-stone-800 bg-opacity-50 border dark:border-stone-600">
<div className="w-6 h-6">
......@@ -77,6 +97,8 @@ function FileUploadProgressComponent({
</div>
</div>
);
return null;
}
export default memo(FileUploadProgressComponent);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment