mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
update web_base.py to have verify option (#6107)
We propose an enhancement to the web-based loader initialize method by introducing a "verify" option. This enhancement addresses the issue of SSL verification errors encountered on certain web pages. By providing users with the option to set the verify parameter to False, we offer greater flexibility and control. <!-- Thank you for contributing to LangChain! Your PR will appear in our release under the title you set. Please make sure it highlights your valuable contribution. Replace this with a description of the change, the issue it fixes (if applicable), and relevant context. List any dependencies required for this change. After you're done, someone will review your PR. They may suggest improvements. If no one reviews your PR within a few days, feel free to @-mention the same people again, as notifications can get lost. Finally, we'd love to show appreciation for your contribution - if you'd like us to shout you out on Twitter, please also include your handle! --> ### Fixes #6079 #### Who can review? @eyurtsev @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
e194dc5306
commit
2eec687474
@ -51,7 +51,10 @@ class WebBaseLoader(BaseLoader):
|
|||||||
"""kwargs for requests"""
|
"""kwargs for requests"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, web_path: Union[str, List[str]], header_template: Optional[dict] = None
|
self,
|
||||||
|
web_path: Union[str, List[str]],
|
||||||
|
header_template: Optional[dict] = None,
|
||||||
|
verify: Optional[bool] = True,
|
||||||
):
|
):
|
||||||
"""Initialize with webpage path."""
|
"""Initialize with webpage path."""
|
||||||
|
|
||||||
@ -71,6 +74,9 @@ class WebBaseLoader(BaseLoader):
|
|||||||
"bs4 package not found, please install it with " "`pip install bs4`"
|
"bs4 package not found, please install it with " "`pip install bs4`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Choose to verify
|
||||||
|
self.verify = verify
|
||||||
|
|
||||||
headers = header_template or default_header_template
|
headers = header_template or default_header_template
|
||||||
if not headers.get("User-Agent"):
|
if not headers.get("User-Agent"):
|
||||||
try:
|
try:
|
||||||
@ -98,7 +104,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
for i in range(retries):
|
for i in range(retries):
|
||||||
try:
|
try:
|
||||||
async with session.get(
|
async with session.get(
|
||||||
url, headers=self.session.headers
|
url, headers=self.session.headers, verify=self.verify
|
||||||
) as response:
|
) as response:
|
||||||
return await response.text()
|
return await response.text()
|
||||||
except aiohttp.ClientConnectionError as e:
|
except aiohttp.ClientConnectionError as e:
|
||||||
@ -173,7 +179,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
|
|
||||||
self._check_parser(parser)
|
self._check_parser(parser)
|
||||||
|
|
||||||
html_doc = self.session.get(url, **self.requests_kwargs)
|
html_doc = self.session.get(url, verify=self.verify, **self.requests_kwargs)
|
||||||
html_doc.encoding = html_doc.apparent_encoding
|
html_doc.encoding = html_doc.apparent_encoding
|
||||||
return BeautifulSoup(html_doc.text, parser)
|
return BeautifulSoup(html_doc.text, parser)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user