From 784d24a1d5f77697e2d9799befe0359bae08a96f Mon Sep 17 00:00:00 2001 From: Alpri Else Date: Wed, 8 Mar 2023 19:17:26 -0500 Subject: [PATCH] Support S3 Object keys with `/` in `S3FileLoader` (#1517) Resolves https://github.com/hwchase17/langchain/issues/1510 ### Problem When loading S3 Objects with `/` in the object key (eg. `folder/some-document.txt`) using `S3FileLoader`, the objects are downloaded into a temporary directory and saved as a file. This errors out when the parent directory does not exist within the temporary directory. See https://github.com/hwchase17/langchain/issues/1510#issuecomment-1459583696 on how to reproduce this bug ### What this pr does Creates parent directories based on object key. This also works with deeply nested keys: `folder/subfolder/some-document.txt` --- langchain/document_loaders/s3_file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/langchain/document_loaders/s3_file.py b/langchain/document_loaders/s3_file.py index 797d503a..f78913b2 100644 --- a/langchain/document_loaders/s3_file.py +++ b/langchain/document_loaders/s3_file.py @@ -1,4 +1,5 @@ """Loading logic for loading documents from an s3 file.""" +import os import tempfile from typing import List @@ -27,6 +28,7 @@ class S3FileLoader(BaseLoader): s3 = boto3.client("s3") with tempfile.TemporaryDirectory() as temp_dir: file_path = f"{temp_dir}/{self.key}" + os.makedirs(os.path.dirname(file_path), exist_ok=True) s3.download_file(self.bucket, self.key, file_path) loader = UnstructuredFileLoader(file_path) return loader.load()