From f6d3a3546f1ad049ab9c30aff75b23978ce4ac10 Mon Sep 17 00:00:00 2001 From: Shawn Date: Wed, 14 Feb 2024 14:48:31 -0500 Subject: [PATCH] community[patch]: document_loaders: modified athena key logic to handle s3 uris without a prefix (#17526) https://github.com/langchain-ai/langchain/issues/17525 ### Example Code ```python from langchain_community.document_loaders.athena import AthenaLoader database_name = "database" s3_output_path = "s3://bucket-no-prefix" query="""SELECT CAST(extract(hour FROM current_timestamp) AS INTEGER) AS current_hour, CAST(extract(minute FROM current_timestamp) AS INTEGER) AS current_minute, CAST(extract(second FROM current_timestamp) AS INTEGER) AS current_second; """ profile_name = "AdministratorAccess" loader = AthenaLoader( query=query, database=database_name, s3_output_uri=s3_output_path, profile_name=profile_name, ) documents = loader.load() print(documents) ``` ### Error Message and Stack Trace (if applicable) NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist ### Description Athena Loader errors when result s3 bucket uri has no prefix. The Loader instance call results in a "NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist." error. If s3_output_path contains a prefix like: ```python s3_output_path = "s3://bucket-with-prefix/prefix" ``` Execution works without an error. ## Suggested solution Modify: ```python key = "/".join(tokens[1:]) + "/" + query_execution_id + ".csv" ``` to ```python key = "/".join(tokens[1:]) + ("/" if tokens[1:] else "") + query_execution_id + ".csv" ``` https://github.com/langchain-ai/langchain/blob/9e8a3fc4fff8e20ab5d1f113515ded14906eb6f3/libs/community/langchain_community/document_loaders/athena.py#L128 ### System Info System Information ------------------ > OS: Darwin > OS Version: Darwin Kernel Version 22.6.0: Fri Sep 15 13:41:30 PDT 2023; root:xnu-8796.141.3.700.8~1/RELEASE_ARM64_T8103 > Python Version: 3.9.9 (main, Jan 9 2023, 11:42:03) [Clang 14.0.0 (clang-1400.0.29.102)] Package Information ------------------- > langchain_core: 0.1.23 > langchain: 0.1.7 > langchain_community: 0.0.20 > langsmith: 0.0.87 > langchain_openai: 0.0.6 > langchainhub: 0.1.14 Packages not installed (Not Necessarily a Problem) -------------------------------------------------- The following packages were not found: > langgraph > langserve --------- Co-authored-by: Bagatur --- libs/community/langchain_community/document_loaders/athena.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/athena.py b/libs/community/langchain_community/document_loaders/athena.py index fc3c947dea..e3ed42e44d 100644 --- a/libs/community/langchain_community/document_loaders/athena.py +++ b/libs/community/langchain_community/document_loaders/athena.py @@ -125,7 +125,7 @@ class AthenaLoader(BaseLoader): self._remove_suffix(output_uri, "/"), "s3://" ).split("/") bucket = tokens[0] - key = "/".join(tokens[1:]) + "/" + query_execution_id + ".csv" + key = "/".join(tokens[1:] + [query_execution_id]) + ".csv" obj = self.s3_client.get_object(Bucket=bucket, Key=key) df = pd.read_csv(io.BytesIO(obj["Body"].read()), encoding="utf8")