From 8b7721ebbbe37ac0915598bdcb8a3b089d5feb23 Mon Sep 17 00:00:00 2001 From: Gael Grosch <3279847+Digma@users.noreply.github.com> Date: Mon, 29 May 2023 15:36:50 +0200 Subject: [PATCH] fix: Blob.from_data mimetype is lost (#5395) # Fix lost mimetype when using Blob.from_data method The mimetype is lost due to a typo in the class attribue name Fixes # - (no issue opened but I can open one if needed) ## Changes * Fixed typo in name * Added unit-tests to validate the output Blob ## Review @eyurtsev --- .../document_loaders/blob_loaders/schema.py | 2 +- .../blob_loaders/test_schema.py | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/blob_loaders/schema.py b/langchain/document_loaders/blob_loaders/schema.py index 6ea20fdb..6548352c 100644 --- a/langchain/document_loaders/blob_loaders/schema.py +++ b/langchain/document_loaders/blob_loaders/schema.py @@ -137,7 +137,7 @@ class Blob(BaseModel): Returns: Blob instance """ - return cls(data=data, mime_type=mime_type, encoding=encoding, path=path) + return cls(data=data, mimetype=mime_type, encoding=encoding, path=path) def __repr__(self) -> str: """Define the blob representation.""" diff --git a/tests/unit_tests/document_loaders/blob_loaders/test_schema.py b/tests/unit_tests/document_loaders/blob_loaders/test_schema.py index fa4a3dca..2f18549b 100644 --- a/tests/unit_tests/document_loaders/blob_loaders/test_schema.py +++ b/tests/unit_tests/document_loaders/blob_loaders/test_schema.py @@ -70,6 +70,29 @@ def test_blob_from_str_path() -> None: assert bytes_io.read() == content +def test_blob_from_str_data() -> None: + """Test reading blob from a file path.""" + content = b"Hello, World!" + blob = Blob.from_data(content) + assert blob.encoding == "utf-8" # Default encoding + assert blob.path is None + assert blob.mimetype is None + assert blob.source is None + assert blob.data == b"Hello, World!" + assert blob.as_bytes() == content + assert blob.as_string() == "Hello, World!" + with blob.as_bytes_io() as bytes_io: + assert bytes_io.read() == content + + +def test_blob_mimetype_from_str_data() -> None: + """Test reading blob from a file path.""" + content = b"Hello, World!" + mimetype = "text/html" + blob = Blob.from_data(content, mime_type=mimetype) + assert blob.mimetype == mimetype + + @pytest.mark.parametrize( "path, mime_type, guess_type, expected_mime_type", [