From b2b9ded12facf3ae205eb4b1cbb455eca6af8977 Mon Sep 17 00:00:00 2001 From: Dhruvil Shah <48760936+jackfrost1411@users.noreply.github.com> Date: Sun, 18 Jun 2023 18:34:18 -0700 Subject: [PATCH] Update web_base.py _fetch() method For SiteMapLoader (#6256) A must-include for SiteMap Loader to avoid the SSL verification error. Setting the 'verify' to False by ``` sitemap_loader.requests_kwargs = {"verify": False}``` does not bypass the SSL verification in some websites. There are websites (https:// researchadmin.asu.edu/ sitemap.xml) where setting "verify" to False as shown below would not work: sitemap_loader.requests_kwargs = {"verify": False} We need this merge to tell the Session to use a connector with a specific argument about SSL: \# For SiteMap SSL verification if not self.request_kwargs['verify']: connector = aiohttp.TCPConnector(ssl=False) else: connector = None Fixes #5483 #### Before submitting #### Who can review? Tag maintainers/contributors who might be interested: @hwchase17 @eyurtsev --------- Co-authored-by: Harrison Chase --- langchain/document_loaders/web_base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py index bd699eb8..7e3ff0bb 100644 --- a/langchain/document_loaders/web_base.py +++ b/langchain/document_loaders/web_base.py @@ -100,7 +100,13 @@ class WebBaseLoader(BaseLoader): async def _fetch( self, url: str, retries: int = 3, cooldown: int = 2, backoff: float = 1.5 ) -> str: - async with aiohttp.ClientSession() as session: + # For SiteMap SSL verification + if not self.requests_kwargs.get("verify", True): + connector = aiohttp.TCPConnector(ssl=False) + else: + connector = None + + async with aiohttp.ClientSession(connector=connector) as session: for i in range(retries): try: async with session.get(