update web_base.py to have verify option (#6107)

We propose an enhancement to the web-based loader initialize method by
introducing a "verify" option. This enhancement addresses the issue of
SSL verification errors encountered on certain web pages. By providing
users with the option to set the verify parameter to False, we offer
greater flexibility and control.
<!--
Thank you for contributing to LangChain! Your PR will appear in our
release under the title you set. Please make sure it highlights your
valuable contribution.

Replace this with a description of the change, the issue it fixes (if
applicable), and relevant context. List any dependencies required for
this change.

After you're done, someone will review your PR. They may suggest
improvements. If no one reviews your PR within a few days, feel free to
@-mention the same people again, as notifications can get lost.

Finally, we'd love to show appreciation for your contribution - if you'd
like us to shout you out on Twitter, please also include your handle!
-->

### Fixes #6079 

#### Who can review?
@eyurtsev @hwchase17

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
searx_updates
Dhruvil Shah 11 months ago committed by GitHub
parent e194dc5306
commit 2eec687474
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -51,7 +51,10 @@ class WebBaseLoader(BaseLoader):
"""kwargs for requests"""
def __init__(
self, web_path: Union[str, List[str]], header_template: Optional[dict] = None
self,
web_path: Union[str, List[str]],
header_template: Optional[dict] = None,
verify: Optional[bool] = True,
):
"""Initialize with webpage path."""
@ -71,6 +74,9 @@ class WebBaseLoader(BaseLoader):
"bs4 package not found, please install it with " "`pip install bs4`"
)
# Choose to verify
self.verify = verify
headers = header_template or default_header_template
if not headers.get("User-Agent"):
try:
@ -98,7 +104,7 @@ class WebBaseLoader(BaseLoader):
for i in range(retries):
try:
async with session.get(
url, headers=self.session.headers
url, headers=self.session.headers, verify=self.verify
) as response:
return await response.text()
except aiohttp.ClientConnectionError as e:
@ -173,7 +179,7 @@ class WebBaseLoader(BaseLoader):
self._check_parser(parser)
html_doc = self.session.get(url, **self.requests_kwargs)
html_doc = self.session.get(url, verify=self.verify, **self.requests_kwargs)
html_doc.encoding = html_doc.apparent_encoding
return BeautifulSoup(html_doc.text, parser)

Loading…
Cancel
Save