mirror of
https://github.com/hwchase17/langchain
synced 2024-10-31 15:20:26 +00:00
parent
b3a0c44838
commit
9a6f7e213b
@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Dict, List, Union
|
||||
from typing import Dict, Iterator, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -42,7 +42,7 @@ class BSHTMLLoader(BaseLoader):
|
||||
self.bs_kwargs = bs_kwargs
|
||||
self.get_text_separator = get_text_separator
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load HTML document into document objects."""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@ -60,4 +60,4 @@ class BSHTMLLoader(BaseLoader):
|
||||
"source": self.file_path,
|
||||
"title": title,
|
||||
}
|
||||
return [Document(page_content=text, metadata=metadata)]
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
Loading…
Reference in New Issue
Block a user