langchain/libs/experimental/langchain_experimental/utilities/python.py

import functools
import logging
import multiprocessing
import re
import sys
from io import StringIO
from typing import Dict, Optional

from langchain.pydantic_v1 import BaseModel, Field

logger = logging.getLogger(__name__)


@functools.lru_cache(maxsize=None)
def warn_once() -> None:
    """Warn once about the dangers of PythonREPL."""
    logger.warning("Python REPL can execute arbitrary code. Use with caution.")


class PythonREPL(BaseModel):
    """Simulates a standalone Python REPL."""

    globals: Optional[Dict] = Field(default_factory=dict, alias="_globals")
    locals: Optional[Dict] = Field(default_factory=dict, alias="_locals")

    @staticmethod
    def sanitize_input(query: str) -> str:
        """Sanitize input to the python REPL.

        Remove whitespace, backtick & python
        (if llm mistakes python console as terminal)

        Args:
            query: The query to sanitize

        Returns:
            str: The sanitized query
        """
        query = re.sub(r"^(\s|`)*(?i:python)?\s*", "", query)
        query = re.sub(r"(\s|`)*$", "", query)
        return query

    @classmethod
    def worker(
        cls,
        command: str,
        globals: Optional[Dict],
        locals: Optional[Dict],
        queue: multiprocessing.Queue,
    ) -> None:
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            cleaned_command = cls.sanitize_input(command)
            exec(cleaned_command, globals, locals)
            sys.stdout = old_stdout
            queue.put(mystdout.getvalue())
        except Exception as e:
            sys.stdout = old_stdout
            queue.put(repr(e))

    def run(self, command: str, timeout: Optional[int] = None) -> str:
        """Run command with own globals/locals and returns anything printed.
        Timeout after the specified number of seconds."""

        # Warn against dangers of PythonREPL
        warn_once()

        queue: multiprocessing.Queue = multiprocessing.Queue()

        # Only use multiprocessing if we are enforcing a timeout
        if timeout is not None:
            # create a Process
            p = multiprocessing.Process(
                target=self.worker, args=(command, self.globals, self.locals, queue)
            )

            # start it
            p.start()

            # wait for the process to finish or kill it after timeout seconds
            p.join(timeout)

            if p.is_alive():
                p.terminate()
                return "Execution timed out"
        else:
            self.worker(command, self.globals, self.locals, queue)
        # get the result from the worker function
        return queue.get()
Add python,pandas,xorbits,spark agents to experimental (#11774) See for contex https://github.com/langchain-ai/langchain/discussions/11680 2023-10-13 21:36:44 +00:00			`import functools`
			`import logging`
			`import multiprocessing`
experimental: clean python repl input（experimental：Added code for PythonREPL） (#20930) Update python.py（experimental：Added code for PythonREPL） Added code for PythonREPL, defining a static method 'sanitize_input' that takes the string 'query' as input and returns a sanitizing string. The purpose of this method is to remove unwanted characters from the input string, Specifically: 1. Delete the whitespace at the beginning and end of the string (' \s'). 2. Remove the quotation marks (`` ` ``) at the beginning and end of the string. 3. Remove the keyword "python" at the beginning of the string (case insensitive) because the user may have typed it. This method uses regular expressions (regex) to implement sanitizing. It all started with this code： from langchain.agents import Tool from langchain_experimental.utilities import PythonREPL python_repl = PythonREPL() repl_tool = Tool( name="python_repl", description="Remove redundant formatting marks at the beginning and end of source code from input.Use a Python shell to execute python commands. If you want to see the output of a value, you should print it out with `print(...)`.", func=python_repl.run, ) When I call the agent to write a piece of code for me and execute it with the defined code, I must get an error: SyntaxError('invalid syntax', ('<string>', 1, 1,'In', 1, 2)) After checking, I found that pythonREPL has less formatting of input code than the soon-to-be deprecated pythonREPL tool, so I added this step to it, so that no matter what code I ask the agent to write for me, it can be executed smoothly and get the output result. I have tried modifying the prompt words to solve this problem before, but it did not work, and by adding a simple format check, the problem is well resolved. <img width="1271" alt="image" src="https://github.com/langchain-ai/langchain/assets/164149097/c49a685f-d246-4b11-b655-fd952fc2f04c"> --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2024-05-01 05:19:09 +00:00			`import re`
Add python,pandas,xorbits,spark agents to experimental (#11774) See for contex https://github.com/langchain-ai/langchain/discussions/11680 2023-10-13 21:36:44 +00:00			`import sys`
			`from io import StringIO`
			`from typing import Dict, Optional`

			`from langchain.pydantic_v1 import BaseModel, Field`

			`logger = logging.getLogger(__name__)`


			`@functools.lru_cache(maxsize=None)`
			`def warn_once() -> None:`
			`"""Warn once about the dangers of PythonREPL."""`
			`logger.warning("Python REPL can execute arbitrary code. Use with caution.")`


			`class PythonREPL(BaseModel):`
			`"""Simulates a standalone Python REPL."""`

			`globals: Optional[Dict] = Field(default_factory=dict, alias="_globals")`
			`locals: Optional[Dict] = Field(default_factory=dict, alias="_locals")`

experimental: clean python repl input（experimental：Added code for PythonREPL） (#20930) Update python.py（experimental：Added code for PythonREPL） Added code for PythonREPL, defining a static method 'sanitize_input' that takes the string 'query' as input and returns a sanitizing string. The purpose of this method is to remove unwanted characters from the input string, Specifically: 1. Delete the whitespace at the beginning and end of the string (' \s'). 2. Remove the quotation marks (`` ` ``) at the beginning and end of the string. 3. Remove the keyword "python" at the beginning of the string (case insensitive) because the user may have typed it. This method uses regular expressions (regex) to implement sanitizing. It all started with this code： from langchain.agents import Tool from langchain_experimental.utilities import PythonREPL python_repl = PythonREPL() repl_tool = Tool( name="python_repl", description="Remove redundant formatting marks at the beginning and end of source code from input.Use a Python shell to execute python commands. If you want to see the output of a value, you should print it out with `print(...)`.", func=python_repl.run, ) When I call the agent to write a piece of code for me and execute it with the defined code, I must get an error: SyntaxError('invalid syntax', ('<string>', 1, 1,'In', 1, 2)) After checking, I found that pythonREPL has less formatting of input code than the soon-to-be deprecated pythonREPL tool, so I added this step to it, so that no matter what code I ask the agent to write for me, it can be executed smoothly and get the output result. I have tried modifying the prompt words to solve this problem before, but it did not work, and by adding a simple format check, the problem is well resolved. <img width="1271" alt="image" src="https://github.com/langchain-ai/langchain/assets/164149097/c49a685f-d246-4b11-b655-fd952fc2f04c"> --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2024-05-01 05:19:09 +00:00			`@staticmethod`
			`def sanitize_input(query: str) -> str:`
			`"""Sanitize input to the python REPL.`

			`Remove whitespace, backtick & python`
			`(if llm mistakes python console as terminal)`

			`Args:`
			`query: The query to sanitize`

			`Returns:`
			`str: The sanitized query`
			`"""`
			query = re.sub(r"^(\s\|`)(?i:python)?\s", "", query)
			query = re.sub(r"(\s\|`)*$", "", query)
			`return query`

Add python,pandas,xorbits,spark agents to experimental (#11774) See for contex https://github.com/langchain-ai/langchain/discussions/11680 2023-10-13 21:36:44 +00:00			`@classmethod`
			`def worker(`
			`cls,`
			`command: str,`
			`globals: Optional[Dict],`
			`locals: Optional[Dict],`
			`queue: multiprocessing.Queue,`
			`) -> None:`
			`old_stdout = sys.stdout`
			`sys.stdout = mystdout = StringIO()`
			`try:`
experimental: clean python repl input（experimental：Added code for PythonREPL） (#20930) Update python.py（experimental：Added code for PythonREPL） Added code for PythonREPL, defining a static method 'sanitize_input' that takes the string 'query' as input and returns a sanitizing string. The purpose of this method is to remove unwanted characters from the input string, Specifically: 1. Delete the whitespace at the beginning and end of the string (' \s'). 2. Remove the quotation marks (`` ` ``) at the beginning and end of the string. 3. Remove the keyword "python" at the beginning of the string (case insensitive) because the user may have typed it. This method uses regular expressions (regex) to implement sanitizing. It all started with this code： from langchain.agents import Tool from langchain_experimental.utilities import PythonREPL python_repl = PythonREPL() repl_tool = Tool( name="python_repl", description="Remove redundant formatting marks at the beginning and end of source code from input.Use a Python shell to execute python commands. If you want to see the output of a value, you should print it out with `print(...)`.", func=python_repl.run, ) When I call the agent to write a piece of code for me and execute it with the defined code, I must get an error: SyntaxError('invalid syntax', ('<string>', 1, 1,'In', 1, 2)) After checking, I found that pythonREPL has less formatting of input code than the soon-to-be deprecated pythonREPL tool, so I added this step to it, so that no matter what code I ask the agent to write for me, it can be executed smoothly and get the output result. I have tried modifying the prompt words to solve this problem before, but it did not work, and by adding a simple format check, the problem is well resolved. <img width="1271" alt="image" src="https://github.com/langchain-ai/langchain/assets/164149097/c49a685f-d246-4b11-b655-fd952fc2f04c"> --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2024-05-01 05:19:09 +00:00			`cleaned_command = cls.sanitize_input(command)`
			`exec(cleaned_command, globals, locals)`
Add python,pandas,xorbits,spark agents to experimental (#11774) See for contex https://github.com/langchain-ai/langchain/discussions/11680 2023-10-13 21:36:44 +00:00			`sys.stdout = old_stdout`
			`queue.put(mystdout.getvalue())`
			`except Exception as e:`
			`sys.stdout = old_stdout`
			`queue.put(repr(e))`

			`def run(self, command: str, timeout: Optional[int] = None) -> str:`
			`"""Run command with own globals/locals and returns anything printed.`
			`Timeout after the specified number of seconds."""`

			`# Warn against dangers of PythonREPL`
			`warn_once()`

			`queue: multiprocessing.Queue = multiprocessing.Queue()`

			`# Only use multiprocessing if we are enforcing a timeout`
			`if timeout is not None:`
			`# create a Process`
			`p = multiprocessing.Process(`
			`target=self.worker, args=(command, self.globals, self.locals, queue)`
			`)`

			`# start it`
			`p.start()`

			`# wait for the process to finish or kill it after timeout seconds`
			`p.join(timeout)`

			`if p.is_alive():`
			`p.terminate()`
			`return "Execution timed out"`
			`else:`
			`self.worker(command, self.globals, self.locals, queue)`
			`# get the result from the worker function`
			`return queue.get()`