From f0ea093de867e5f099a4b5de2bfa24d788b79133 Mon Sep 17 00:00:00 2001 From: Nicholas Liu Date: Wed, 24 May 2023 22:26:17 -0700 Subject: [PATCH] Change Default GoogleDriveLoader Behavior to not Load Trashed Files (issue #5104) (#5220) # Change Default GoogleDriveLoader Behavior to not Load Trashed Files (issue #5104) Fixes #5104 If the previous behavior of loading files that used to live in the folder, but are now trashed, you can use the `load_trashed_files` parameter: ``` loader = GoogleDriveLoader( folder_id="1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5", recursive=False, load_trashed_files=True ) ``` As not loading trashed files should be expected behavior, should we 1. even provide the `load_trashed_files` parameter? 2. add documentation? Feels most users will stick with default behavior ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: DataLoaders - @eyurtsev Twitter: [@nicholasliu77](https://twitter.com/nicholasliu77) --- langchain/document_loaders/googledrive.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/langchain/document_loaders/googledrive.py b/langchain/document_loaders/googledrive.py index 176191f7..dc00eb4a 100644 --- a/langchain/document_loaders/googledrive.py +++ b/langchain/document_loaders/googledrive.py @@ -31,6 +31,7 @@ class GoogleDriveLoader(BaseLoader, BaseModel): file_ids: Optional[List[str]] = None recursive: bool = False file_types: Optional[Sequence[str]] = None + load_trashed_files: bool = False @root_validator def validate_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]: @@ -215,8 +216,10 @@ class GoogleDriveLoader(BaseLoader, BaseModel): _files = files returns = [] - for file in _files: - if file["mimeType"] == "application/vnd.google-apps.document": + for file in files: + if file["trashed"] and not self.load_trashed_files: + continue + elif file["mimeType"] == "application/vnd.google-apps.document": returns.append(self._load_document_from_id(file["id"])) # type: ignore elif file["mimeType"] == "application/vnd.google-apps.spreadsheet": returns.extend(self._load_sheet_from_id(file["id"])) # type: ignore @@ -224,7 +227,6 @@ class GoogleDriveLoader(BaseLoader, BaseModel): returns.extend(self._load_file_from_id(file["id"])) # type: ignore else: pass - return returns def _fetch_files_recursive( @@ -238,7 +240,7 @@ class GoogleDriveLoader(BaseLoader, BaseModel): pageSize=1000, includeItemsFromAllDrives=True, supportsAllDrives=True, - fields="nextPageToken, files(id, name, mimeType, parents)", + fields="nextPageToken, files(id, name, mimeType, parents, trashed)", ) .execute() )