From b730fb67b66870eec7a9bd430750735e4125ec31 Mon Sep 17 00:00:00 2001 From: Sourabh Desai <sourabhdesai@gmail.com> Date: Fri, 22 Mar 2024 14:58:24 -0700 Subject: [PATCH] fix PDFReader for remote fs (#12186) --- .../llama_index/readers/file/docs/base.py | 4 ++-- .../readers/llama-index-readers-file/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py index a2db764561..a3c168d61e 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py +++ b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py @@ -53,7 +53,7 @@ class PDFReader(BaseReader): # This block returns a whole PDF as a single Document if self.return_full_document: text = "" - metadata = {"file_name": fp.name} + metadata = {"file_name": file.name} for page in range(num_pages): # Extract the text from the page @@ -71,7 +71,7 @@ class PDFReader(BaseReader): page_text = pdf.pages[page].extract_text() page_label = pdf.page_labels[page] - metadata = {"page_label": page_label, "file_name": fp.name} + metadata = {"page_label": page_label, "file_name": file.name} if extra_info is not None: metadata.update(extra_info) diff --git a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml index 71561c017a..92528f5930 100644 --- a/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-file/pyproject.toml @@ -50,7 +50,7 @@ license = "MIT" maintainers = ["FarisHijazi", "Haowjy", "ephe-meral", "hursh-desai", "iamarunbrahma", "jon-chuang", "mmaatouk", "ravi03071991", "sangwongenip", "thejessezhang"] name = "llama-index-readers-file" readme = "README.md" -version = "0.1.11" +version = "0.1.12" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab