Skip to content
Snippets Groups Projects
Unverified Commit aa8633e1 authored by Nick Fiacco's avatar Nick Fiacco Committed by GitHub
Browse files

Remove unnecessary PyDrive dependency from Google Drive Reader (#12257)

* Remove unnecessary deps from Google Drive Reader

And fix incorrect usage of client secrets argument as the service account key.

* Fix order of params to prevent breaking

* Use original argument names
parent a088e391
No related branches found
No related tags found
No related merge requests found
# CHANGELOG # CHANGELOG
## [0.2.0] - 2024-03-26
- Use separate arg for service account key file, don't conflate client secrets with service account key
- Remove unused PyDrive dependency and code
## [0.1.5] - 2024-03-06 ## [0.1.5] - 2024-03-06
- Add missing README.md for all readers folder lost during the last migration from llamahub - Add missing README.md for all readers folder lost during the last migration from llamahub
......
...@@ -9,7 +9,6 @@ from typing import List, Optional, Tuple ...@@ -9,7 +9,6 @@ from typing import List, Optional, Tuple
from google.auth.transport.requests import Request from google.auth.transport.requests import Request
from google.oauth2 import service_account from google.oauth2 import service_account
from google.oauth2.credentials import Credentials from google.oauth2.credentials import Credentials
from pydrive.drive import GoogleDrive
from llama_index.core.readers import SimpleDirectoryReader from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.readers.base import BaseReader from llama_index.core.readers.base import BaseReader
...@@ -28,15 +27,14 @@ class GoogleDriveReader(BaseReader): ...@@ -28,15 +27,14 @@ class GoogleDriveReader(BaseReader):
self, self,
credentials_path: str = "credentials.json", credentials_path: str = "credentials.json",
token_path: str = "token.json", token_path: str = "token.json",
pydrive_creds_path: str = "creds.txt", service_account_key_path: str = "service_account_key.json",
) -> None: ) -> None:
"""Initialize with parameters.""" """Initialize with parameters."""
self.service_account_key_path = service_account_key_path
self.credentials_path = credentials_path self.credentials_path = credentials_path
self.token_path = token_path self.token_path = token_path
self.pydrive_creds_path = pydrive_creds_path
self._creds = None self._creds = None
self._drive = None
# Download Google Docs/Slides/Sheets as actual files # Download Google Docs/Slides/Sheets as actual files
# See https://developers.google.com/drive/v3/web/mime-types # See https://developers.google.com/drive/v3/web/mime-types
...@@ -57,41 +55,31 @@ class GoogleDriveReader(BaseReader): ...@@ -57,41 +55,31 @@ class GoogleDriveReader(BaseReader):
}, },
} }
def _get_credentials(self) -> Tuple[Credentials, GoogleDrive]: def _get_credentials(self) -> Tuple[Credentials]:
"""Authenticate with Google and save credentials. """Authenticate with Google and save credentials.
Download the credentials.json file with these instructions: https://developers.google.com/drive/api/v3/quickstart/python. Download the service_account_key.json file with these instructions: https://cloud.google.com/iam/docs/keys-create-delete.
Copy credentials.json file and rename it to client_secrets.json file which will be used by pydrive for downloading files.
So, we need two files:
1. credentials.json
2. client_secrets.json
Both 1, 2 are essentially same but needed with two different names according to google-api-python-client, google-auth-httplib2, google-auth-oauthlib and pydrive libraries.
Returns: Returns:
credentials, pydrive object. credentials
""" """
from google_auth_oauthlib.flow import InstalledAppFlow from google_auth_oauthlib.flow import InstalledAppFlow
from pydrive.auth import GoogleAuth
# First, we need the Google API credentials for the app # First, we need the Google API credentials for the app
creds = None creds = None
if Path(self.token_path).exists(): if Path(self.token_path).exists():
creds = Credentials.from_authorized_user_file(self.token_path, SCOPES) creds = Credentials.from_authorized_user_file(self.token_path, SCOPES)
elif Path(self.credentials_path).exists(): elif Path(self.service_account_key_path).exists():
creds = service_account.Credentials.from_service_account_file( return service_account.Credentials.from_service_account_file(
self.credentials_path, scopes=SCOPES self.service_account_key_path, scopes=SCOPES
) )
gauth = GoogleAuth()
gauth.credentials = creds
drive = GoogleDrive(gauth)
return creds, drive
# If there are no (valid) credentials available, let the user log in. # If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid: if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token: if creds and creds.expired and creds.refresh_token:
creds.refresh(Request()) creds.refresh(Request())
else: else:
flow = InstalledAppFlow.from_client_secrets_file( flow = InstalledAppFlow.from_credentials_file(
self.credentials_path, SCOPES self.credentials_path, SCOPES
) )
creds = flow.run_local_server(port=0) creds = flow.run_local_server(port=0)
...@@ -99,26 +87,7 @@ class GoogleDriveReader(BaseReader): ...@@ -99,26 +87,7 @@ class GoogleDriveReader(BaseReader):
with open(self.token_path, "w", encoding="utf-8") as token: with open(self.token_path, "w", encoding="utf-8") as token:
token.write(creds.to_json()) token.write(creds.to_json())
# Next, we need user authentication to download files (via pydrive) return creds
# Uses client_secrets.json file for authorization.
gauth = GoogleAuth()
# Try to load saved client credentials
gauth.LoadCredentialsFile(self.pydrive_creds_path)
if gauth.credentials is None:
# Authenticate if they're not there
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
# Refresh them if expired
gauth.Refresh()
else:
# Initialize the saved creds
gauth.Authorize()
# Save the current credentials to a file so user doesn't have to auth every time
gauth.SaveCredentialsFile(self.pydrive_creds_path)
drive = GoogleDrive(gauth)
return creds, drive
def _get_fileids_meta( def _get_fileids_meta(
self, self,
...@@ -414,7 +383,7 @@ class GoogleDriveReader(BaseReader): ...@@ -414,7 +383,7 @@ class GoogleDriveReader(BaseReader):
Returns: Returns:
List[Document]: A list of documents. List[Document]: A list of documents.
""" """
self._creds, self._drive = self._get_credentials() self._creds = self._get_credentials()
if folder_id: if folder_id:
return self._load_from_folder(folder_id, mime_types, query_string) return self._load_from_folder(folder_id, mime_types, query_string)
......
...@@ -45,7 +45,7 @@ maintainers = [ ...@@ -45,7 +45,7 @@ maintainers = [
] ]
name = "llama-index-readers-google" name = "llama-index-readers-google"
readme = "README.md" readme = "README.md"
version = "0.1.7" version = "0.2.0"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.10,<4.0" python = ">=3.10,<4.0"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment