mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
WebBaseLoader: optionally raise exception in the case of http error (#6823)
- **Description**: this PR adds the possibility to raise an exception in the case the http request did not return a 2xx status code. This is particularly useful in the situation when the url points to a non-existent web page, the server returns a http status of 404 NOT FOUND, but WebBaseLoader anyway parses and returns the http body of the error message. - **Dependencies**: none, - **Tag maintainer**: @rlancemartin, @eyurtsev, - **Twitter handle**: jtolgyesi
This commit is contained in:
parent
ef72a7cf26
commit
f1070de038
@ -50,6 +50,9 @@ class WebBaseLoader(BaseLoader):
|
||||
requests_kwargs: Dict[str, Any] = {}
|
||||
"""kwargs for requests"""
|
||||
|
||||
raise_for_status: bool = False
|
||||
"""Raise an exception if http status code denotes an error."""
|
||||
|
||||
bs_get_text_kwargs: Dict[str, Any] = {}
|
||||
"""kwargs for beatifulsoup4 get_text"""
|
||||
|
||||
@ -189,6 +192,8 @@ class WebBaseLoader(BaseLoader):
|
||||
self._check_parser(parser)
|
||||
|
||||
html_doc = self.session.get(url, verify=self.verify, **self.requests_kwargs)
|
||||
if self.raise_for_status:
|
||||
html_doc.raise_for_status()
|
||||
html_doc.encoding = html_doc.apparent_encoding
|
||||
return BeautifulSoup(html_doc.text, parser)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user