diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py index cee218dadb..bd699eb831 100644 --- a/langchain/document_loaders/web_base.py +++ b/langchain/document_loaders/web_base.py @@ -51,7 +51,10 @@ class WebBaseLoader(BaseLoader): """kwargs for requests""" def __init__( - self, web_path: Union[str, List[str]], header_template: Optional[dict] = None + self, + web_path: Union[str, List[str]], + header_template: Optional[dict] = None, + verify: Optional[bool] = True, ): """Initialize with webpage path.""" @@ -71,6 +74,9 @@ class WebBaseLoader(BaseLoader): "bs4 package not found, please install it with " "`pip install bs4`" ) + # Choose to verify + self.verify = verify + headers = header_template or default_header_template if not headers.get("User-Agent"): try: @@ -98,7 +104,7 @@ class WebBaseLoader(BaseLoader): for i in range(retries): try: async with session.get( - url, headers=self.session.headers + url, headers=self.session.headers, verify=self.verify ) as response: return await response.text() except aiohttp.ClientConnectionError as e: @@ -173,7 +179,7 @@ class WebBaseLoader(BaseLoader): self._check_parser(parser) - html_doc = self.session.get(url, **self.requests_kwargs) + html_doc = self.session.get(url, verify=self.verify, **self.requests_kwargs) html_doc.encoding = html_doc.apparent_encoding return BeautifulSoup(html_doc.text, parser)