Merge pull request #18423

* Implement lazy_load() for BSHTMLLoader
This commit is contained in:
Christophe Bornet 2024-03-06 19:25:01 +01:00 committed by GitHub
parent b3a0c44838
commit 9a6f7e213b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,5 @@
import logging
from typing import Dict, List, Union
from typing import Dict, Iterator, Union
from langchain_core.documents import Document
@ -42,7 +42,7 @@ class BSHTMLLoader(BaseLoader):
self.bs_kwargs = bs_kwargs
self.get_text_separator = get_text_separator
def load(self) -> List[Document]:
def lazy_load(self) -> Iterator[Document]:
"""Load HTML document into document objects."""
from bs4 import BeautifulSoup
@ -60,4 +60,4 @@ class BSHTMLLoader(BaseLoader):
"source": self.file_path,
"title": title,
}
return [Document(page_content=text, metadata=metadata)]
yield Document(page_content=text, metadata=metadata)