From aa8633e1e194478ff6d42fec9592d150951f6ddb Mon Sep 17 00:00:00 2001 From: Nick Fiacco <nicholas.r.fiacco@gmail.com> Date: Mon, 25 Mar 2024 19:20:58 -0600 Subject: [PATCH] Remove unnecessary PyDrive dependency from Google Drive Reader (#12257) * Remove unnecessary deps from Google Drive Reader And fix incorrect usage of client secrets argument as the service account key. * Fix order of params to prevent breaking * Use original argument names --- .../llama-index-readers-google/CHANGELOG.md | 5 ++ .../llama_index/readers/google/drive/base.py | 53 ++++--------------- .../llama-index-readers-google/pyproject.toml | 2 +- 3 files changed, 17 insertions(+), 43 deletions(-) diff --git a/llama-index-integrations/readers/llama-index-readers-google/CHANGELOG.md b/llama-index-integrations/readers/llama-index-readers-google/CHANGELOG.md index a9f026fb6..6b55fba29 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/CHANGELOG.md +++ b/llama-index-integrations/readers/llama-index-readers-google/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +## [0.2.0] - 2024-03-26 + +- Use separate arg for service account key file, don't conflate client secrets with service account key +- Remove unused PyDrive dependency and code + ## [0.1.5] - 2024-03-06 - Add missing README.md for all readers folder lost during the last migration from llamahub diff --git a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py index a68272561..493407450 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py +++ b/llama-index-integrations/readers/llama-index-readers-google/llama_index/readers/google/drive/base.py @@ -9,7 +9,6 @@ from typing import List, Optional, Tuple from google.auth.transport.requests import Request from google.oauth2 import service_account from google.oauth2.credentials import Credentials -from pydrive.drive import GoogleDrive from llama_index.core.readers import SimpleDirectoryReader from llama_index.core.readers.base import BaseReader @@ -28,15 +27,14 @@ class GoogleDriveReader(BaseReader): self, credentials_path: str = "credentials.json", token_path: str = "token.json", - pydrive_creds_path: str = "creds.txt", + service_account_key_path: str = "service_account_key.json", ) -> None: """Initialize with parameters.""" + self.service_account_key_path = service_account_key_path self.credentials_path = credentials_path self.token_path = token_path - self.pydrive_creds_path = pydrive_creds_path self._creds = None - self._drive = None # Download Google Docs/Slides/Sheets as actual files # See https://developers.google.com/drive/v3/web/mime-types @@ -57,41 +55,31 @@ class GoogleDriveReader(BaseReader): }, } - def _get_credentials(self) -> Tuple[Credentials, GoogleDrive]: + def _get_credentials(self) -> Tuple[Credentials]: """Authenticate with Google and save credentials. - Download the credentials.json file with these instructions: https://developers.google.com/drive/api/v3/quickstart/python. - Copy credentials.json file and rename it to client_secrets.json file which will be used by pydrive for downloading files. - So, we need two files: - 1. credentials.json - 2. client_secrets.json - Both 1, 2 are essentially same but needed with two different names according to google-api-python-client, google-auth-httplib2, google-auth-oauthlib and pydrive libraries. + Download the service_account_key.json file with these instructions: https://cloud.google.com/iam/docs/keys-create-delete. Returns: - credentials, pydrive object. + credentials """ from google_auth_oauthlib.flow import InstalledAppFlow - from pydrive.auth import GoogleAuth # First, we need the Google API credentials for the app creds = None if Path(self.token_path).exists(): creds = Credentials.from_authorized_user_file(self.token_path, SCOPES) - elif Path(self.credentials_path).exists(): - creds = service_account.Credentials.from_service_account_file( - self.credentials_path, scopes=SCOPES + elif Path(self.service_account_key_path).exists(): + return service_account.Credentials.from_service_account_file( + self.service_account_key_path, scopes=SCOPES ) - gauth = GoogleAuth() - gauth.credentials = creds - drive = GoogleDrive(gauth) - return creds, drive # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: - flow = InstalledAppFlow.from_client_secrets_file( + flow = InstalledAppFlow.from_credentials_file( self.credentials_path, SCOPES ) creds = flow.run_local_server(port=0) @@ -99,26 +87,7 @@ class GoogleDriveReader(BaseReader): with open(self.token_path, "w", encoding="utf-8") as token: token.write(creds.to_json()) - # Next, we need user authentication to download files (via pydrive) - # Uses client_secrets.json file for authorization. - gauth = GoogleAuth() - # Try to load saved client credentials - gauth.LoadCredentialsFile(self.pydrive_creds_path) - if gauth.credentials is None: - # Authenticate if they're not there - gauth.LocalWebserverAuth() - elif gauth.access_token_expired: - # Refresh them if expired - gauth.Refresh() - else: - # Initialize the saved creds - gauth.Authorize() - # Save the current credentials to a file so user doesn't have to auth every time - gauth.SaveCredentialsFile(self.pydrive_creds_path) - - drive = GoogleDrive(gauth) - - return creds, drive + return creds def _get_fileids_meta( self, @@ -414,7 +383,7 @@ class GoogleDriveReader(BaseReader): Returns: List[Document]: A list of documents. """ - self._creds, self._drive = self._get_credentials() + self._creds = self._get_credentials() if folder_id: return self._load_from_folder(folder_id, mime_types, query_string) diff --git a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml index 9a444a9ad..fd24e91a6 100644 --- a/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-google/pyproject.toml @@ -45,7 +45,7 @@ maintainers = [ ] name = "llama-index-readers-google" readme = "README.md" -version = "0.1.7" +version = "0.2.0" [tool.poetry.dependencies] python = ">=3.10,<4.0" -- GitLab