"""Loading logic for loading documents from an s3 file.""" import tempfile from typing import List from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader from langchain.document_loaders.unstructured import UnstructuredFileLoader class S3FileLoader(BaseLoader): """Loading logic for loading documents from s3.""" def __init__(self, bucket: str, key: str): """Initialize with bucket and key name.""" self.bucket = bucket self.key = key def load(self) -> List[Document]: """Load documents.""" try: import boto3 except ImportError: raise ValueError( "Could not import boto3 python package. " "Please it install it with `pip install boto3`." ) s3 = boto3.client("s3") with tempfile.TemporaryDirectory() as temp_dir: file_path = f"{temp_dir}/{self.key}" s3.download_file(self.bucket, self.key, file_path) loader = UnstructuredFileLoader(file_path) return loader.load()