mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
community[patch]: Force opt-in for WebResearchRetriever (CVE-2024-3095) (#24451)
This PR addresses the issue raised by (CVE-2024-3095) https://huntr.com/bounties/e62d4895-2901-405b-9559-38276b6a5273 Unfortunately, we didn't do a good job writing the initial report. It's pointing at both the wrong package and the wrong code. The affected code is the Web Retriever not the AsyncHTMLLoader, and the WebRetriever lives in langchain-community The vulnerable code lives here:0bd3f4e129/libs/community/langchain_community/retrievers/web_research.py (L233-L233)
This PR adds a forced opt-in for users to make sure they are aware of the risk and can mitigate by configuring a proxy:0bd3f4e129/libs/community/langchain_community/retrievers/web_research.py (L84-L84)
This commit is contained in:
parent
f101c759ed
commit
604dfe2d99
@ -1,6 +1,6 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.prompt_selector import ConditionalPromptSelector
|
||||
@ -81,6 +81,35 @@ class WebResearchRetriever(BaseRetriever):
|
||||
"check .netrc for proxy configuration",
|
||||
)
|
||||
|
||||
allow_dangerous_requests: bool = False
|
||||
"""A flag to force users to acknowledge the risks of SSRF attacks when using
|
||||
this retriever.
|
||||
|
||||
Users should set this flag to `True` if they have taken the necessary precautions
|
||||
to prevent SSRF attacks when using this retriever.
|
||||
|
||||
For example, users can run the requests through a properly configured
|
||||
proxy and prevent the crawler from accidentally crawling internal resources.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the retriever."""
|
||||
allow_dangerous_requests = kwargs.get("allow_dangerous_requests", False)
|
||||
if not allow_dangerous_requests:
|
||||
raise ValueError(
|
||||
"WebResearchRetriever crawls URLs surfaced through "
|
||||
"the provided search engine. It is possible that some of those URLs "
|
||||
"will end up pointing to machines residing on an internal network, "
|
||||
"leading"
|
||||
"to an SSRF (Server-Side Request Forgery) attack. "
|
||||
"To protect yourself against that risk, you can run the requests "
|
||||
"through a proxy and prevent the crawler from accidentally crawling "
|
||||
"internal resources."
|
||||
"If've taken the necessary precautions, you can set "
|
||||
"`allow_dangerous_requests` to `True`."
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
|
Loading…
Reference in New Issue
Block a user