diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py index 50cf549d..1d4e90c5 100644 --- a/langchain/document_loaders/web_base.py +++ b/langchain/document_loaders/web_base.py @@ -169,6 +169,7 @@ class WebBaseLoader(BaseLoader): self._check_parser(parser) html_doc = self.session.get(url) + html_doc.encoding = html_doc.apparent_encoding return BeautifulSoup(html_doc.text, parser) def scrape(self, parser: Union[str, None] = None) -> Any: