Skip to content
Snippets Groups Projects
Unverified Commit 23f1491d authored by Huu Le (Lee)'s avatar Huu Le (Lee) Committed by GitHub
Browse files

fix null value in default file document metadata (#11501)

parent b66f94c5
No related branches found
No related tags found
No related merge requests found
...@@ -56,6 +56,21 @@ def _try_loading_included_file_formats() -> Dict[str, Type[BaseReader]]: ...@@ -56,6 +56,21 @@ def _try_loading_included_file_formats() -> Dict[str, Type[BaseReader]]:
return default_file_reader_cls return default_file_reader_cls
def _format_file_timestamp(timestamp: float) -> Optional[str]:
"""Format file timestamp to a %Y-%m-%d string.
Args:
timestamp (float): timestamp in float
Returns:
str: formatted timestamp
"""
try:
return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")
except Exception:
return None
def default_file_metadata_func( def default_file_metadata_func(
file_path: str, fs: Optional[fsspec.AbstractFileSystem] = None file_path: str, fs: Optional[fsspec.AbstractFileSystem] = None
) -> Dict: ) -> Dict:
...@@ -66,20 +81,10 @@ def default_file_metadata_func( ...@@ -66,20 +81,10 @@ def default_file_metadata_func(
""" """
fs = fs or get_default_fs() fs = fs or get_default_fs()
stat_result = fs.stat(file_path) stat_result = fs.stat(file_path)
creation_date = stat_result.get("created") creation_date = _format_file_timestamp(stat_result.get("created"))
last_modified_date = stat_result.get("mtime") last_modified_date = _format_file_timestamp(stat_result.get("mtime"))
last_accessed_date = stat_result.get("atime") last_accessed_date = _format_file_timestamp(stat_result.get("atime"))
try: default_meta = {
creation_date = datetime.fromtimestamp(creation_date).strftime("%Y-%m-%d")
last_modified_date = datetime.fromtimestamp(last_modified_date).strftime(
"%Y-%m-%d"
)
last_accessed_date = datetime.fromtimestamp(last_accessed_date).strftime(
"%Y-%m-%d"
)
except Exception:
pass
return {
"file_path": file_path, "file_path": file_path,
"file_name": stat_result["name"], "file_name": stat_result["name"],
"file_type": mimetypes.guess_type(file_path)[0], "file_type": mimetypes.guess_type(file_path)[0],
...@@ -89,6 +94,13 @@ def default_file_metadata_func( ...@@ -89,6 +94,13 @@ def default_file_metadata_func(
"last_accessed_date": last_accessed_date, "last_accessed_date": last_accessed_date,
} }
# Return not null value
return {
meta_key: meta_value
for meta_key, meta_value in default_meta.items()
if meta_value is not None
}
class _DefaultFileMetadataFunc: class _DefaultFileMetadataFunc:
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment