community[major], experimental[patch]: Remove Python REPL from community (#22904)

Remove the REPL from community, and suggest an alternative import from
langchain_experimental.

Fix for this issue:
https://github.com/langchain-ai/langchain/issues/14345

This is not a bug in the code or an actual security risk. The python
REPL itself is behaving as expected.

The PR is done to appease blanket security policies that are just
looking for the presence of exec in the code.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Eugene Yurtsev 2024-06-14 13:53:29 -04:00 committed by GitHub
parent 9a877c7adb
commit c72bcda4f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 36 additions and 90 deletions

View File

@ -122,9 +122,6 @@ if TYPE_CHECKING:
from langchain_community.utilities.pubmed import ( from langchain_community.utilities.pubmed import (
PubMedAPIWrapper, PubMedAPIWrapper,
) )
from langchain_community.utilities.python import (
PythonREPL,
)
from langchain_community.utilities.rememberizer import RememberizerAPIWrapper from langchain_community.utilities.rememberizer import RememberizerAPIWrapper
from langchain_community.utilities.requests import ( from langchain_community.utilities.requests import (
Requests, Requests,
@ -215,7 +212,6 @@ __all__ = [
"Portkey", "Portkey",
"PowerBIDataset", "PowerBIDataset",
"PubMedAPIWrapper", "PubMedAPIWrapper",
"PythonREPL",
"RememberizerAPIWrapper", "RememberizerAPIWrapper",
"Requests", "Requests",
"RequestsWrapper", "RequestsWrapper",
@ -279,7 +275,6 @@ _module_lookup = {
"Portkey": "langchain_community.utilities.portkey", "Portkey": "langchain_community.utilities.portkey",
"PowerBIDataset": "langchain_community.utilities.powerbi", "PowerBIDataset": "langchain_community.utilities.powerbi",
"PubMedAPIWrapper": "langchain_community.utilities.pubmed", "PubMedAPIWrapper": "langchain_community.utilities.pubmed",
"PythonREPL": "langchain_community.utilities.python",
"RememberizerAPIWrapper": "langchain_community.utilities.rememberizer", "RememberizerAPIWrapper": "langchain_community.utilities.rememberizer",
"Requests": "langchain_community.utilities.requests", "Requests": "langchain_community.utilities.requests",
"RequestsWrapper": "langchain_community.utilities.requests", "RequestsWrapper": "langchain_community.utilities.requests",
@ -302,8 +297,21 @@ _module_lookup = {
"ZapierNLAWrapper": "langchain_community.utilities.zapier", "ZapierNLAWrapper": "langchain_community.utilities.zapier",
} }
REMOVED = {
"PythonREPL": (
"PythonREPL has been deprecated from langchain_community "
"due to being flagged by security scanners. See: "
"https://github.com/langchain-ai/langchain/issues/14345 "
"If you need to use it, please use the version "
"from langchain_experimental. "
"from langchain_experimental.utilities.python import PythonREPL."
)
}
def __getattr__(name: str) -> Any: def __getattr__(name: str) -> Any:
if name in REMOVED:
raise AssertionError(REMOVED[name])
if name in _module_lookup: if name in _module_lookup:
module = importlib.import_module(_module_lookup[name]) module = importlib.import_module(_module_lookup[name])
return getattr(module, name) return getattr(module, name)

View File

@ -1,71 +1,17 @@
import functools
import logging import logging
import multiprocessing from typing import Any
import sys
from io import StringIO
from typing import Dict, Optional
from langchain_core.pydantic_v1 import BaseModel, Field
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@functools.lru_cache(maxsize=None) def __getattr__(name: str) -> Any:
def warn_once() -> None: if name in "PythonREPL":
"""Warn once about the dangers of PythonREPL.""" raise AssertionError(
logger.warning("Python REPL can execute arbitrary code. Use with caution.") "PythonREPL has been deprecated from langchain_community due to being "
"flagged by security scanners. See: "
"https://github.com/langchain-ai/langchain/issues/14345 "
class PythonREPL(BaseModel): "If you need to use it, please use the version "
"""Simulates a standalone Python REPL.""" "from langchain_experimental. "
"from langchain_experimental.utilities.python import PythonREPL."
globals: Optional[Dict] = Field(default_factory=dict, alias="_globals") )
locals: Optional[Dict] = Field(default_factory=dict, alias="_locals") raise AttributeError(f"module {__name__} has no attribute {name}")
@classmethod
def worker(
cls,
command: str,
globals: Optional[Dict],
locals: Optional[Dict],
queue: multiprocessing.Queue,
) -> None:
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
try:
exec(command, globals, locals)
sys.stdout = old_stdout
queue.put(mystdout.getvalue())
except Exception as e:
sys.stdout = old_stdout
queue.put(repr(e))
def run(self, command: str, timeout: Optional[int] = None) -> str:
"""Run command with own globals/locals and returns anything printed.
Timeout after the specified number of seconds."""
# Warn against dangers of PythonREPL
warn_once()
queue: multiprocessing.Queue = multiprocessing.Queue()
# Only use multiprocessing if we are enforcing a timeout
if timeout is not None:
# create a Process
p = multiprocessing.Process(
target=self.worker, args=(command, self.globals, self.locals, queue)
)
# start it
p.start()
# wait for the process to finish or kill it after timeout seconds
p.join(timeout)
if p.is_alive():
p.terminate()
return "Execution timed out"
else:
self.worker(command, self.globals, self.locals, queue)
# get the result from the worker function
return queue.get()

View File

@ -41,7 +41,6 @@ EXPECTED_ALL = [
"Portkey", "Portkey",
"PowerBIDataset", "PowerBIDataset",
"PubMedAPIWrapper", "PubMedAPIWrapper",
"PythonREPL",
"Requests", "Requests",
"RequestsWrapper", "RequestsWrapper",
"RememberizerAPIWrapper", "RememberizerAPIWrapper",

View File

@ -12,13 +12,13 @@ from typing import Any, Dict, List, Optional
from langchain.chains.base import Chain from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain from langchain.chains.llm import LLMChain
from langchain_community.utilities import PythonREPL
from langchain_core.callbacks.manager import CallbackManagerForChainRun from langchain_core.callbacks.manager import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel from langchain_core.language_models import BaseLanguageModel
from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT
from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT
from langchain_experimental.pydantic_v1 import Extra, Field, root_validator from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
from langchain_experimental.utilities import PythonREPL
COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"] COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"]
COMMAND_EXECUTION_ATTRIBUTES = [ COMMAND_EXECUTION_ATTRIBUTES = [

View File

@ -1,11 +1,11 @@
"""For backwards compatibility.""" """For backwards compatibility."""
from typing import TYPE_CHECKING, Any from typing import Any
from langchain._api import create_importer from langchain._api import create_importer
if TYPE_CHECKING: # Code has been removed from the community package as well.
from langchain_community.utilities.python import PythonREPL # We'll proxy to community package, which will raise an appropriate exception,
# but we'll not include this in __all__, so it won't be listed as importable.
_importer = create_importer( _importer = create_importer(
__package__, __package__,
@ -16,6 +16,3 @@ _importer = create_importer(
def __getattr__(name: str) -> Any: def __getattr__(name: str) -> Any:
"""Look up attributes dynamically.""" """Look up attributes dynamically."""
return _importer(name) return _importer(name)
__all__ = ["PythonREPL"]

View File

@ -38,7 +38,6 @@ if TYPE_CHECKING:
Portkey, Portkey,
PowerBIDataset, PowerBIDataset,
PubMedAPIWrapper, PubMedAPIWrapper,
PythonREPL,
Requests, Requests,
RequestsWrapper, RequestsWrapper,
SceneXplainAPIWrapper, SceneXplainAPIWrapper,
@ -90,6 +89,8 @@ DEPRECATED_LOOKUP = {
"Portkey": "langchain_community.utilities", "Portkey": "langchain_community.utilities",
"PowerBIDataset": "langchain_community.utilities", "PowerBIDataset": "langchain_community.utilities",
"PubMedAPIWrapper": "langchain_community.utilities", "PubMedAPIWrapper": "langchain_community.utilities",
# We will not list PythonREPL in __all__ since it has been removed from community
# it'll proxy to community package, which will raise an appropriate exception.
"PythonREPL": "langchain_community.utilities", "PythonREPL": "langchain_community.utilities",
"Requests": "langchain_community.utilities", "Requests": "langchain_community.utilities",
"SteamWebAPIWrapper": "langchain_community.utilities", "SteamWebAPIWrapper": "langchain_community.utilities",
@ -147,7 +148,6 @@ __all__ = [
"Portkey", "Portkey",
"PowerBIDataset", "PowerBIDataset",
"PubMedAPIWrapper", "PubMedAPIWrapper",
"PythonREPL",
"Requests", "Requests",
"SteamWebAPIWrapper", "SteamWebAPIWrapper",
"SQLDatabase", "SQLDatabase",

View File

@ -1,11 +1,11 @@
"""For backwards compatibility.""" """For backwards compatibility."""
from typing import TYPE_CHECKING, Any from typing import Any
from langchain._api import create_importer from langchain._api import create_importer
if TYPE_CHECKING: # Code has been removed from the community package as well.
from langchain_community.utilities.python import PythonREPL # We'll proxy to community package, which will raise an appropriate exception,
# but we'll not include this in __all__, so it won't be listed as importable.
_importer = create_importer( _importer = create_importer(
__package__, __package__,
@ -16,6 +16,3 @@ _importer = create_importer(
def __getattr__(name: str) -> Any: def __getattr__(name: str) -> Any:
"""Look up attributes dynamically.""" """Look up attributes dynamically."""
return _importer(name) return _importer(name)
__all__ = ["PythonREPL"]

View File

@ -29,7 +29,6 @@ EXPECTED_ALL = [
"Portkey", "Portkey",
"PowerBIDataset", "PowerBIDataset",
"PubMedAPIWrapper", "PubMedAPIWrapper",
"PythonREPL",
"Requests", "Requests",
"RequestsWrapper", "RequestsWrapper",
"SQLDatabase", "SQLDatabase",