Skip to content
Snippets Groups Projects
Unverified Commit c24b9650 authored by Sai Ramana Reddy's avatar Sai Ramana Reddy Committed by GitHub
Browse files

Fix path implementation for non-local FS (#12141)

parent ae382b7f
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,7 @@ import warnings ...@@ -8,7 +8,7 @@ import warnings
from datetime import datetime from datetime import datetime
from functools import reduce from functools import reduce
from itertools import repeat from itertools import repeat
from pathlib import Path from pathlib import Path, PurePosixPath
import fsspec import fsspec
from fsspec.implementations.local import LocalFileSystem from fsspec.implementations.local import LocalFileSystem
from typing import Any, Callable, Dict, Generator, List, Optional, Type from typing import Any, Callable, Dict, Generator, List, Optional, Type
...@@ -196,18 +196,19 @@ class SimpleDirectoryReader(BaseReader): ...@@ -196,18 +196,19 @@ class SimpleDirectoryReader(BaseReader):
self.exclude_hidden = exclude_hidden self.exclude_hidden = exclude_hidden
self.required_exts = required_exts self.required_exts = required_exts
self.num_files_limit = num_files_limit self.num_files_limit = num_files_limit
_Path = Path if is_default_fs(self.fs) else PurePosixPath
if input_files: if input_files:
self.input_files = [] self.input_files = []
for path in input_files: for path in input_files:
if not self.fs.isfile(path): if not self.fs.isfile(path):
raise ValueError(f"File {path} does not exist.") raise ValueError(f"File {path} does not exist.")
input_file = Path(path) input_file = _Path(path)
self.input_files.append(input_file) self.input_files.append(input_file)
elif input_dir: elif input_dir:
if not self.fs.isdir(input_dir): if not self.fs.isdir(input_dir):
raise ValueError(f"Directory {input_dir} does not exist.") raise ValueError(f"Directory {input_dir} does not exist.")
self.input_dir = Path(input_dir) self.input_dir = _Path(input_dir)
self.exclude = exclude self.exclude = exclude
self.input_files = self._add_files(self.input_dir) self.input_files = self._add_files(self.input_dir)
...@@ -229,20 +230,22 @@ class SimpleDirectoryReader(BaseReader): ...@@ -229,20 +230,22 @@ class SimpleDirectoryReader(BaseReader):
all_files = set() all_files = set()
rejected_files = set() rejected_files = set()
rejected_dirs = set() rejected_dirs = set()
# Default to POSIX paths for non-default file systems (e.g. S3)
_Path = Path if is_default_fs(self.fs) else PurePosixPath
if self.exclude is not None: if self.exclude is not None:
for excluded_pattern in self.exclude: for excluded_pattern in self.exclude:
if self.recursive: if self.recursive:
# Recursive glob # Recursive glob
excluded_glob = Path(input_dir) / Path("**") / excluded_pattern excluded_glob = _Path(input_dir) / _Path("**") / excluded_pattern
else: else:
# Non-recursive glob # Non-recursive glob
excluded_glob = Path(input_dir) / excluded_pattern excluded_glob = _Path(input_dir) / excluded_pattern
for file in self.fs.glob(str(excluded_glob)): for file in self.fs.glob(str(excluded_glob)):
if self.fs.isdir(file): if self.fs.isdir(file):
rejected_dirs.add(Path(file)) rejected_dirs.add(_Path(file))
else: else:
rejected_files.add(Path(file)) rejected_files.add(_Path(file))
file_refs: List[str] = [] file_refs: List[str] = []
if self.recursive: if self.recursive:
...@@ -253,7 +256,7 @@ class SimpleDirectoryReader(BaseReader): ...@@ -253,7 +256,7 @@ class SimpleDirectoryReader(BaseReader):
for ref in file_refs: for ref in file_refs:
# Manually check if file is hidden or directory instead of # Manually check if file is hidden or directory instead of
# in glob for backwards compatibility. # in glob for backwards compatibility.
ref = Path(ref) ref = _Path(ref)
is_dir = self.fs.isdir(ref) is_dir = self.fs.isdir(ref)
skip_because_hidden = self.exclude_hidden and self.is_hidden(ref) skip_because_hidden = self.exclude_hidden and self.is_hidden(ref)
skip_because_bad_ext = ( skip_because_bad_ext = (
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment