diff --git a/collector/scripts/watch/convert/as_docx.py b/collector/scripts/watch/convert/as_docx.py index 33aaaaaeb0b9b619a6aee4c02f428f61e604db8e..b37786179324a70828d69b086116270acc79efb5 100644 --- a/collector/scripts/watch/convert/as_docx.py +++ b/collector/scripts/watch/convert/as_docx.py @@ -61,7 +61,7 @@ def as_odt(**kwargs): 'id': guid(), 'url': "file://"+os.path.abspath(f"{parent_dir}/processed/{filename}{ext}"), 'title': f"{filename}{ext}", - 'author': 'Unknown', # TODO: Find a better author + 'docAuthor': 'Unknown', # TODO: Find a better author 'description': 'Unknown', # TODO: Find a better bescription 'docSource': 'ODT Text file uploaded by the user.', 'chunkSource': f"{filename}{ext}", diff --git a/collector/scripts/watch/convert/as_mbox.py b/collector/scripts/watch/convert/as_mbox.py index f5a645eaa70770271ddb4dac4a32987182fbfe96..2d7c08e637c7b4d5d281377072d4c568eb63c064 100644 --- a/collector/scripts/watch/convert/as_mbox.py +++ b/collector/scripts/watch/convert/as_mbox.py @@ -110,11 +110,8 @@ def as_mbox(**kwargs): "docAuthor": message["From"], "description": f"email from {message['From']} to {message['To']}", "docSource": "mbox file uploaded by the user.", + "chunkSource": subject, "published": file_creation_time(fullpath), - "sender": message["From"], - "recipient": message["To"], - "subject": subject, - "date_sent": date_sent, "wordCount": len(content), "pageContent": content, "token_count_estimate": len(tokenize(content)), diff --git a/collector/scripts/watch/convert/as_text.py b/collector/scripts/watch/convert/as_text.py index e6ad85140d88e076ca8f57edccdd64a117c2d6cf..1b897874b4fadc5f9e73cdc5ebbd148de447cc27 100644 --- a/collector/scripts/watch/convert/as_text.py +++ b/collector/scripts/watch/convert/as_text.py @@ -23,6 +23,7 @@ def as_text(**kwargs): 'title': f"{filename}{ext}", 'docAuthor': 'Unknown', # TODO: Find a better author 'description': 'Unknown', # TODO: Find a better description + 'docSource': 'a text file uploaded by the user.', 'chunkSource': f"{filename}{ext}", 'published': file_creation_time(fullpath), 'wordCount': len(content),