langchain/tests/unit_tests/document_loader/test_base.py
Eugene Yurtsev 423f497168
Add BlobParser abstraction (#3979)
This PR adds the BlobParser abstraction.

It follows the proposal described here:
https://github.com/hwchase17/langchain/pull/2833#issuecomment-1509097756
2023-05-05 21:43:38 -04:00

29 lines
921 B
Python

"""Test Base Schema of documents."""
from typing import Iterator
from langchain.document_loaders.base import BaseBlobParser
from langchain.document_loaders.blob_loaders import Blob
from langchain.schema import Document
def test_base_blob_parser() -> None:
"""Verify that the eager method is hooked up to the lazy method by default."""
class MyParser(BaseBlobParser):
"""A simple parser that returns a single document."""
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazy parsing interface."""
yield Document(
page_content="foo",
)
parser = MyParser()
assert isinstance(parser.lazy_parse(Blob(data="who?")), Iterator)
# We're verifying that the eager method is hooked up to the lazy method by default.
docs = parser.parse(Blob(data="who?"))
assert len(docs) == 1
assert docs[0].page_content == "foo"