@ -60,7 +60,7 @@ class WebBaseLoader(BaseLoader):
self ,
web_path : Union [ str , List [ str ] ] ,
header_template : Optional [ dict ] = None ,
verify : Optional [ bool ] = True ,
verify _ssl : Optional [ bool ] = True ,
proxies : Optional [ dict ] = None ,
) :
""" Initialize with webpage path. """
@ -73,7 +73,6 @@ class WebBaseLoader(BaseLoader):
elif isinstance ( web_path , List ) :
self . web_paths = web_path
self . session = requests . Session ( )
try :
import bs4 # noqa:F401
except ImportError :
@ -81,9 +80,6 @@ class WebBaseLoader(BaseLoader):
" bs4 package not found, please install it with " " `pip install bs4` "
)
# Choose to verify
self . verify = verify
headers = header_template or default_header_template
if not headers . get ( " User-Agent " ) :
try :
@ -96,7 +92,10 @@ class WebBaseLoader(BaseLoader):
" To get a realistic header for requests, "
" `pip install fake_useragent`. "
)
self . session = requests . Session ( )
self . session . headers = dict ( headers )
self . session . verify = verify_ssl
if proxies :
self . session . proxies . update ( proxies )
@ -110,17 +109,13 @@ class WebBaseLoader(BaseLoader):
async def _fetch (
self , url : str , retries : int = 3 , cooldown : int = 2 , backoff : float = 1.5
) - > str :
# For SiteMap SSL verification
if not self . requests_kwargs . get ( " verify " , True ) :
connector = aiohttp . TCPConnector ( ssl = False )
else :
connector = None
async with aiohttp . ClientSession ( connector = connector ) as session :
async with aiohttp . ClientSession ( ) as session :
for i in range ( retries ) :
try :
async with session . get (
url , headers = self . session . headers , verify = self . verify
url ,
headers = self . session . headers ,
ssl = None if self . session . verify else False ,
) as response :
return await response . text ( )
except aiohttp . ClientConnectionError as e :
@ -195,7 +190,7 @@ class WebBaseLoader(BaseLoader):
self . _check_parser ( parser )
html_doc = self . session . get ( url , verify = self . verify , * * self . requests_kwargs )
html_doc = self . session . get ( url , * * self . requests_kwargs )
if self . raise_for_status :
html_doc . raise_for_status ( )
html_doc . encoding = html_doc . apparent_encoding