GCSFileLoader retrieve blob custom metadata and append to document metadata (#11066)

- **Description:** GCSFileLoader retrieve blob's custom metadata and
append to document's metadata
- **Issue:** #9975,
- **Tag maintainer:** @baskaryan please review

Co-authored-by: b0l00ib <bharat.lal@walmart.com>
This commit is contained in:
M Bharat lal 2023-10-18 00:47:59 +05:30 committed by GitHub
parent 23c261ba57
commit 67300567d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -62,6 +62,8 @@ class GCSFileLoader(BaseLoader):
bucket = storage_client.get_bucket(self.bucket)
# Create a blob object from the filepath
blob = bucket.blob(self.blob)
# retrieve custom metadata associated with the blob
metadata = bucket.get_blob(self.blob).metadata
with tempfile.TemporaryDirectory() as temp_dir:
file_path = f"{temp_dir}/{self.blob}"
os.makedirs(os.path.dirname(file_path), exist_ok=True)
@ -72,4 +74,6 @@ class GCSFileLoader(BaseLoader):
for doc in docs:
if "source" in doc.metadata:
doc.metadata["source"] = f"gs://{self.bucket}/{self.blob}"
if metadata:
doc.metadata.update(metadata)
return docs