From 67300567d35b7c608a1c25d00869fe11edaea3b6 Mon Sep 17 00:00:00 2001 From: M Bharat lal Date: Wed, 18 Oct 2023 00:47:59 +0530 Subject: [PATCH] GCSFileLoader retrieve blob custom metadata and append to document metadata (#11066) - **Description:** GCSFileLoader retrieve blob's custom metadata and append to document's metadata - **Issue:** #9975, - **Tag maintainer:** @baskaryan please review Co-authored-by: b0l00ib --- libs/langchain/langchain/document_loaders/gcs_file.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/langchain/langchain/document_loaders/gcs_file.py b/libs/langchain/langchain/document_loaders/gcs_file.py index efab6c38e8..5fd6519bf1 100644 --- a/libs/langchain/langchain/document_loaders/gcs_file.py +++ b/libs/langchain/langchain/document_loaders/gcs_file.py @@ -62,6 +62,8 @@ class GCSFileLoader(BaseLoader): bucket = storage_client.get_bucket(self.bucket) # Create a blob object from the filepath blob = bucket.blob(self.blob) + # retrieve custom metadata associated with the blob + metadata = bucket.get_blob(self.blob).metadata with tempfile.TemporaryDirectory() as temp_dir: file_path = f"{temp_dir}/{self.blob}" os.makedirs(os.path.dirname(file_path), exist_ok=True) @@ -72,4 +74,6 @@ class GCSFileLoader(BaseLoader): for doc in docs: if "source" in doc.metadata: doc.metadata["source"] = f"gs://{self.bucket}/{self.blob}" + if metadata: + doc.metadata.update(metadata) return docs