mirror of https://github.com/hwchase17/langchain
Add BlobParser abstraction (#3979)
This PR adds the BlobParser abstraction. It follows the proposal described here: https://github.com/hwchase17/langchain/pull/2833#issuecomment-1509097756pull/4210/head
parent
5ca13cc1f0
commit
423f497168
@ -0,0 +1,28 @@
|
||||
"""Test Base Schema of documents."""
|
||||
from typing import Iterator
|
||||
|
||||
from langchain.document_loaders.base import BaseBlobParser
|
||||
from langchain.document_loaders.blob_loaders import Blob
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
def test_base_blob_parser() -> None:
|
||||
"""Verify that the eager method is hooked up to the lazy method by default."""
|
||||
|
||||
class MyParser(BaseBlobParser):
|
||||
"""A simple parser that returns a single document."""
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazy parsing interface."""
|
||||
yield Document(
|
||||
page_content="foo",
|
||||
)
|
||||
|
||||
parser = MyParser()
|
||||
|
||||
assert isinstance(parser.lazy_parse(Blob(data="who?")), Iterator)
|
||||
|
||||
# We're verifying that the eager method is hooked up to the lazy method by default.
|
||||
docs = parser.parse(Blob(data="who?"))
|
||||
assert len(docs) == 1
|
||||
assert docs[0].page_content == "foo"
|
Loading…
Reference in New Issue