From 69427f9fddbee738907df188c5798e70b13be184 Mon Sep 17 00:00:00 2001
From: Javier Torres <javierandrestorresreyes@gmail.com>
Date: Mon, 8 Apr 2024 15:27:28 -0500
Subject: [PATCH] Add in-memory loading for non-default filesystems in
 PDFReader (#12659)

* Add in-memory loading for non-default filesystems in PDFReader

* bump version
---
 .../llama_index/readers/file/docs/base.py                | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py
index 441ee5e054..71b03c1469 100644
--- a/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py
+++ b/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/docs/base.py
@@ -10,9 +10,10 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional
 from fsspec import AbstractFileSystem
 import logging
+import io
 
 from llama_index.core.readers.base import BaseReader
-from llama_index.core.readers.file.base import get_default_fs
+from llama_index.core.readers.file.base import get_default_fs, is_default_fs
 from llama_index.core.schema import Document
 
 logger = logging.getLogger(__name__)
@@ -45,8 +46,12 @@ class PDFReader(BaseReader):
             )
         fs = fs or get_default_fs()
         with fs.open(file, "rb") as fp:
+            # Load the file in memory if the filesystem is not the default one to avoid
+            # issues with pypdf
+            stream = fp if is_default_fs(fs) else io.BytesIO(fp.read())
+
             # Create a PDF object
-            pdf = pypdf.PdfReader(fp)
+            pdf = pypdf.PdfReader(stream)
 
             # Get the number of pages in the PDF document
             num_pages = len(pdf.pages)
-- 
GitLab