Feature/terminal strace (#10)

* feat: exp * feat: watch syscall * feat: terminal tool with syscall tracer * refactor: move editor into tools * fix: errors * refactor: syscall and terminal * fix: USE_GPU option * fix: hard code playground dir * fix: toolsets * fix: terminal cwd playground * feat: wait syscall with timeout * fix: reset timer on wait_until_stop_or_exit ends * fix: handle in case exception is tuple
1 year ago · 1fdf63741f
parent 072e16b122
commit 1fdf63741f
13 changed files with 367 additions and 198 deletions
--- a/.env.example
+++ b/.env.example
@ -1,3 +1,4 @@
 USE_GPU=True
 BOT_NAME=<your-bot-name>
 OPENAI_API_KEY=***
 BING_SEARCH_URL=***
--- a/api/main.py
+++ b/api/main.py
@ -13,9 +13,9 @@ from env import settings
 from core.prompts.error import ERROR_PROMPT
 from core.agents.manager import AgentManager
 from core.tools.base import BaseToolSet
 from core.tools.terminal import Terminal
 from core.tools.editor import CodeEditor
 from core.tools.cpu import (
    Terminal,
    CodeEditor,
    RequestsGet,
    WineDB,
    ExitConversation,
@ -38,26 +38,28 @@ app = FastAPI()
 app.mount("/static", StaticFiles(directory=StaticUploader.STATIC_DIR), name="static")
 uploader = StaticUploader.from_settings(settings)
-toolsets: List[BaseToolSet] = (
+use_gpu = settings["USE_GPU"] and torch.cuda.is_available()
-    [
+
 toolsets: List[BaseToolSet] = [
    Terminal(),
    CodeEditor(),
    RequestsGet(),
    ExitConversation(),
-    ]
+]
-    + [
+
 if use_gpu:
    toolsets.extend(
        [
            Text2Image("cuda"),
            ImageEditing("cuda"),
            InstructPix2Pix("cuda"),
            VisualQuestionAnswering("cuda"),
        ]
-    if torch.cuda.is_available()
+    )
    else []
 )
 handlers: Dict[FileType, BaseHandler] = {}
 handlers[FileType.DATAFRAME] = CsvToDataframe()
-if torch.cuda.is_available():
+if use_gpu:
    handlers[FileType.IMAGE] = ImageCaptioning("cuda")
 if settings["WINEDB_HOST"] and settings["WINEDB_PASSWORD"]:
@ -102,7 +104,7 @@ async def command(request: Request) -> Response:
    try:
        res = executor({"input": promptedQuery})
    except Exception as e:
-        logger.error(f"error while processing request: ", str(e))
+        logger.error(f"error while processing request: {str(e)}")
        try:
            res = executor(
                {
--- a/core/editor/init.py
+++ b/core/editor/init.py
@ -1,3 +0,0 @@
 from .patch import CodePatcher
 from .read import CodeReader
 from .write import CodeWriter
--- a/core/tools/cpu.py
+++ b/core/tools/cpu.py
@ -7,180 +7,10 @@ from llama_index import GPTSimpleVectorIndex
 from bs4 import BeautifulSoup
 import subprocess
 from core.editor import CodePatcher, CodeReader, CodeWriter
 from .base import tool, BaseToolSet, ToolScope, SessionGetter
 from logger import logger
 class Terminal(BaseToolSet):
    @tool(
        name="Terminal",
        description="Executes commands in a terminal."
        "If linux errno occurs, we have to solve the problem with the terminal. "
        "It can't execute interactive operations or blocking operations. "
        "Input should be valid commands, "
        "and the output will be any output from running that command.",
    )
    def execute(self, commands: str) -> str:
        """Run commands and return final output."""
        try:
            output = subprocess.run(
                commands,
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
            ).stdout.decode()
        except Exception as e:
            output = str(e)
        if len(output) > 1000:
            output = output[:1000] + "..."
        logger.debug(
            f"\nProcessed Terminal, Input Commands: {commands} "
            f"Output Answer: {output}"
        )
        return output
 class CodeEditor(BaseToolSet):
    @tool(
        name="CodeEditor.READ",
        description="Read and understand code. "
        f"Input should be filename and line number group. ex. test.py|1-10 "
        "and the output will be code. ",
    )
    def read(self, inputs: str) -> str:
        try:
            output = CodeReader.read(inputs)
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.READ, Input Commands: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.SUMMARY",
        description="Summary code. "
        "Read the code structured into a tree. "
        "If you set specific line, it will show the code from the specific line. "
        "Input should be filename, depth, and specific line if you want. ex. test.py|2 or test.py|3|print('hello world') "
        "and the output will be list of (line number: code). ",
    )
    def summary(self, inputs: str) -> str:
        try:
            output = CodeReader.summary(inputs)
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.SUMMARY, Input Commands: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.APPEND",
        description="Append code to the existing file. "
        "If the code is completed, use the Terminal tool to execute it, if not, append the code through the this tool. "
        "Input should be filename and code to append. "
        "Input code must be the code that should be appended, NOT whole code. "
        "ex. test.py\nprint('hello world')\n "
        "and the output will be last 3 line.",
    )
    def append(self, inputs: str) -> str:
        try:
            code = CodeWriter.append(inputs)
            output = (
                "Last 3 line was:\n"
                + "\n".join(code.split("\n")[-3:])
                + "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
            )
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.APPEND, Input: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.WRITE",
        description="Write code to create a new tool. "
        "If the code is completed, use the Terminal tool to execute it, if not, append the code through the CodeEditor.APPEND tool. "
        "Input should be filename and code. This file must be in playground folder. "
        "ex. test.py\nprint('hello world')\n "
        "and the output will be last 3 line.",
    )
    def write(self, inputs: str) -> str:
        try:
            code = CodeWriter.write(inputs)
            output = (
                "Last 3 line was:\n"
                + "\n".join(code.split("\n")[-3:])
                + "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
            )
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.WRITE, Input: {inputs} " f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.PATCH",
        description="Patch the code to correct the error if an error occurs or to improve it. "
        "Input is a list of patches. The patch is separated by {seperator}. ".format(
            seperator=CodePatcher.separator.replace("\n", "\\n")
        )
        + "Each patch has to be formatted like below.\n"
        "<filepath>|<start_line>,<start_col>|<end_line>,<end_col>|<new_code>"
        "Code between start and end will be replaced with new_code. "
        "The output will be written/deleted bytes or error message. ",
    )
    def patch(self, patches: str) -> str:
        try:
            w, d = CodePatcher.patch(patches)
            output = f"successfully wrote {w}, deleted {d}"
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.PATCH, Input Patch: {patches} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.DELETE",
        description="Delete code in file for a new start. "
        "Input should be filename."
        "ex. test.py "
        "Output will be success or error message.",
    )
    def delete(self, inputs: str) -> str:
        filename = inputs
        try:
            with open(filename, "w") as f:
                f.write("")
            output = "success"
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.DELETE, Input filename: {inputs} "
            f"Output Answer: {output}"
        )
        return output
 class RequestsGet(BaseToolSet):
    @tool(
        name="Requests Get",
--- a/core/tools/editor/init.py
+++ b/core/tools/editor/init.py
@ -0,0 +1,142 @@
 from core.tools.base import tool, BaseToolSet
 from logger import logger
 from .patch import CodePatcher
 from .read import CodeReader
 from .write import CodeWriter
 class CodeEditor(BaseToolSet):
    @tool(
        name="CodeEditor.READ",
        description="Read and understand code. "
        f"Input should be filename and line number group. ex. test.py|1-10 "
        "and the output will be code. ",
    )
    def read(self, inputs: str) -> str:
        try:
            output = CodeReader.read(inputs)
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.READ, Input Commands: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.SUMMARY",
        description="Summary code. "
        "Read the code structured into a tree. "
        "If you set specific line, it will show the code from the specific line. "
        "Input should be filename, depth, and specific line if you want. ex. test.py|2 or test.py|3|print('hello world') "
        "and the output will be list of (line number: code). ",
    )
    def summary(self, inputs: str) -> str:
        try:
            output = CodeReader.summary(inputs)
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.SUMMARY, Input Commands: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.APPEND",
        description="Append code to the existing file. "
        "If the code is completed, use the Terminal tool to execute it, if not, append the code through the this tool. "
        "Input should be filename and code to append. "
        "Input code must be the code that should be appended, NOT whole code. "
        "ex. test.py\nprint('hello world')\n "
        "and the output will be last 3 line.",
    )
    def append(self, inputs: str) -> str:
        try:
            code = CodeWriter.append(inputs)
            output = (
                "Last 3 line was:\n"
                + "\n".join(code.split("\n")[-3:])
                + "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
            )
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.APPEND, Input: {inputs} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.WRITE",
        description="Write code to create a new tool. "
        "If the code is completed, use the Terminal tool to execute it, if not, append the code through the CodeEditor.APPEND tool. "
        "Input should be filename and code. This file must be in playground folder. "
        "ex. test.py\nprint('hello world')\n "
        "and the output will be last 3 line.",
    )
    def write(self, inputs: str) -> str:
        try:
            code = CodeWriter.write(inputs)
            output = (
                "Last 3 line was:\n"
                + "\n".join(code.split("\n")[-3:])
                + "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
            )
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.WRITE, Input: {inputs} " f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.PATCH",
        description="Patch the code to correct the error if an error occurs or to improve it. "
        "Input is a list of patches. The patch is separated by {seperator}. ".format(
            seperator=CodePatcher.separator.replace("\n", "\\n")
        )
        + "Each patch has to be formatted like below.\n"
        "<filepath>|<start_line>,<start_col>|<end_line>,<end_col>|<new_code>"
        "Code between start and end will be replaced with new_code. "
        "The output will be written/deleted bytes or error message. ",
    )
    def patch(self, patches: str) -> str:
        try:
            w, d = CodePatcher.patch(patches)
            output = f"successfully wrote {w}, deleted {d}"
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.PATCH, Input Patch: {patches} "
            f"Output Answer: {output}"
        )
        return output
    @tool(
        name="CodeEditor.DELETE",
        description="Delete code in file for a new start. "
        "Input should be filename."
        "ex. test.py "
        "Output will be success or error message.",
    )
    def delete(self, inputs: str) -> str:
        filename = inputs
        try:
            with open(filename, "w") as f:
                f.write("")
            output = "success"
        except Exception as e:
            output = str(e)
        logger.debug(
            f"\nProcessed CodeEditor.DELETE, Input filename: {inputs} "
            f"Output Answer: {output}"
        )
        return output
--- a/core/tools/editor/patch.py
+++ b/core/tools/editor/patch.py
--- a/core/tools/editor/read.py
+++ b/core/tools/editor/read.py
--- a/core/tools/editor/write.py
+++ b/core/tools/editor/write.py
@ -6,13 +6,14 @@ write protocol:
 """
 import os
 from pathlib import Path
 from env import settings
 class WriteCommand:
    separator = "\n"
    def __init__(self, filepath: str, content: int):
-        self.filepath: str = filepath
+        self.filepath: str = str(Path(settings["PLAYGROUND_DIR"]) / Path(filepath))
        self.content: str = content
        self.mode: str = "w"
@ -23,9 +24,9 @@ class WriteCommand:
    def execute(self) -> str:
        # make sure the directory exists
        if not str(Path(self.filepath).resolve()).startswith(
-            str(Path("playground/").resolve())
+            str(Path(settings["PLAYGROUND_DIR"]).resolve())
        ):
-            return "You can't write file outside of playground folder."
+            return "You can't write file outside of current directory."
        os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
        with open(self.filepath, self.mode) as f:
--- a/core/tools/terminal/init.py
+++ b/core/tools/terminal/init.py
@ -0,0 +1,67 @@
 import subprocess
 from typing import Dict, List
 from tempfile import TemporaryFile
 from env import settings
 from logger import logger
 from core.tools.base import tool, BaseToolSet, ToolScope, SessionGetter
 from core.tools.terminal.syscall import SyscallTracer
 class Terminal(BaseToolSet):
    def __init__(self):
        self.sessions: Dict[str, List[SyscallTracer]] = {}
    @tool(
        name="Terminal",
        description="Executes commands in a terminal."
        "If linux errno occurs, we have to solve the problem with the terminal. "
        "It can't execute interactive operations or blocking operations. "
        "Input should be valid commands, "
        "and the output will be any output from running that command.",
        scope=ToolScope.SESSION,
    )
    def execute(self, commands: str, get_session: SessionGetter) -> str:
        session, _ = get_session()
        try:
            with TemporaryFile() as fp:
                process = subprocess.Popen(
                    commands,
                    shell=True,
                    cwd=settings["PLAYGROUND_DIR"],
                    stdout=fp,
                    stderr=fp,
                )
                tracer = SyscallTracer(process.pid)
                tracer.attach()
                exitcode, reason = tracer.wait_until_stop_or_exit()
                logger.debug(f"Stopped terminal execution: {exitcode} {reason}")
                fp.seek(0)
                output = fp.read().decode()
        except Exception as e:
            output = str(e)
        if len(output) > 1000:
            output = output[:1000] + "..."
        logger.debug(
            f"\nProcessed Terminal, Input Commands: {commands} "
            f"Output Answer: {output}"
        )
        return output
 if __name__ == "__main__":
    import time
    o = Terminal().execute(
        "sleep 1; echo 1; sleep 2; echo 2; sleep 3; echo 3; sleep 10;",
        lambda: ("", None),
    )
    print(o)
    time.sleep(10)  # see if timer has reset
--- a/core/tools/terminal/syscall.py
+++ b/core/tools/terminal/syscall.py
@ -0,0 +1,103 @@
 from typing import Tuple, Optional
 import signal
 from ptrace.debugger import (
    PtraceDebugger,
    PtraceProcess,
    ProcessExit,
    ProcessSignal,
    NewProcessEvent,
    ProcessExecution,
 )
 from ptrace.syscall import PtraceSyscall
 from ptrace.func_call import FunctionCallOptions
 from ptrace.tools import signal_to_exitcode
 class SyscallTimeoutException(Exception):
    def __init__(self, pid: int, *args) -> None:
        super().__init__(f"deadline exceeded while waiting syscall for {pid}", *args)
 class SyscallTracer:
    def __init__(self, pid: int):
        self.debugger: PtraceDebugger = PtraceDebugger()
        self.pid: int = pid
        self.process: PtraceProcess = None
    def is_waiting(self, syscall: PtraceSyscall) -> bool:
        if syscall.name.startswith("wait"):
            return True
        return False
    def attach(self):
        self.process = self.debugger.addProcess(self.pid, False)
    def detach(self):
        self.process.detach()
        self.debugger.quit()
    def set_timer(self, timeout: int):
        def handler(signum, frame):
            raise SyscallTimeoutException(self.process.pid)
        signal.signal(signal.SIGALRM, handler)
        signal.alarm(timeout)
    def reset_timer(self):
        signal.alarm(0)
    def wait_syscall_with_timeout(self, timeout: int):
        self.set_timer(timeout)
        self.process.waitSyscall()
        self.reset_timer()
    def wait_until_stop_or_exit(self) -> Tuple[Optional[int], str]:
        self.process.syscall()
        exitcode = None
        reason = ""
        while True:
            if not self.debugger:
                break
            try:
                self.wait_syscall_with_timeout(5)
            except ProcessExit as event:
                if event.exitcode is not None:
                    exitcode = event.exitcode
                continue
            except ProcessSignal as event:
                event.process.syscall(event.signum)
                exitcode = signal_to_exitcode(event.signum)
                reason = event.reason
                continue
            except NewProcessEvent as event:
                continue
            except ProcessExecution as event:
                continue
            except Exception as e:
                reason = str(e)
                break
            syscall = self.process.syscall_state.event(
                FunctionCallOptions(
                    write_types=False,
                    write_argname=False,
                    string_max_length=300,
                    replace_socketcall=True,
                    write_address=False,
                    max_array_count=20,
                )
            )
            self.process.syscall()
            if syscall is None:
                continue
            if syscall.result:
                continue
        self.reset_timer()
        return exitcode, reason
--- a/env.py
+++ b/env.py
@ -12,6 +12,8 @@ class DotEnv(TypedDict):
    PORT: int
    SERVER: str
    USE_GPU: bool  # optional
    PLAYGROUND_DIR: str  # optional
    LOG_LEVEL: str  # optional
    BOT_NAME: str  # optional
    AWS_ACCESS_KEY_ID: str  # optional
@ -29,6 +31,8 @@ PORT = int(os.getenv("PORT", 8000))
 settings: DotEnv = {
    "PORT": PORT,
    "SERVER": os.getenv("SERVER", f"http://localhost:{PORT}"),
    "USE_GPU": os.getenv("USE_GPU", "False").lower() == "true",
    "PLAYGROUND_DIR": os.getenv("PLAYGROUND_DIR", "playground"),
    "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
    "LOG_LEVEL": os.getenv("LOG_LEVEL", "INFO"),
    "BOT_NAME": os.getenv("BOT_NAME", "Orca"),
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
 [[package]]
 name = "accelerate"
@ -1730,6 +1730,18 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]
 [[package]]
 name = "python-ptrace"
 version = "0.9.8"
 description = "python binding of ptrace"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
    {file = "python-ptrace-0.9.8.tar.gz", hash = "sha256:1e3bc6223f626aaacde8a7979732691c11b13012e702fee9ae16c87f71633eaa"},
    {file = "python_ptrace-0.9.8-py2.py3-none-any.whl", hash = "sha256:440c58a47423eb6eeea419854b9c6c28bfd9fd6ab9ae6630a7ea8be4600b1369"},
 ]
 [[package]]
 name = "pytz"
 version = "2022.7.1"
@ -2423,6 +2435,15 @@ category = "main"
 optional = false
 python-versions = "*"
 files = [
    {file = "triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38806ee9663f4b0f7cd64790e96c579374089e58f49aac4a6608121aa55e2505"},
    {file = "triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:226941c7b8595219ddef59a1fdb821e8c744289a132415ddd584facedeb475b1"},
    {file = "triton-2.0.0-1-cp36-cp36m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4c9fc8c89874bc48eb7e7b2107a9b8d2c0bf139778637be5bfccb09191685cfd"},
    {file = "triton-2.0.0-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d2684b6a60b9f174f447f36f933e9a45f31db96cb723723ecd2dcfd1c57b778b"},
    {file = "triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9d4978298b74fcf59a75fe71e535c092b023088933b2f1df933ec32615e4beef"},
    {file = "triton-2.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:74f118c12b437fb2ca25e1a04759173b517582fcf4c7be11913316c764213656"},
    {file = "triton-2.0.0-1-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9618815a8da1d9157514f08f855d9e9ff92e329cd81c0305003eb9ec25cc5add"},
    {file = "triton-2.0.0-1-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1aca3303629cd3136375b82cb9921727f804e47ebee27b2677fef23005c3851a"},
    {file = "triton-2.0.0-1-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3e13aa8b527c9b642e3a9defcc0fbd8ffbe1c80d8ac8c15a01692478dc64d8a"},
    {file = "triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05a7e64e4ca0565535e3d5d3405d7e49f9d308505bb7773d21fb26a4c008c2"},
    {file = "triton-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4b99ca3c6844066e516658541d876c28a5f6e3a852286bbc97ad57134827fd"},
    {file = "triton-2.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47b4d70dc92fb40af553b4460492c31dc7d3a114a979ffb7a5cdedb7eb546c08"},
@ -2645,4 +2666,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "6983797961948c5893f2b0495acad6ae03cf6d012162a58bee07d671d17acf88"
+content-hash = "fa0a34600af8dc2479d51d16e1b824e50d14e2f0024b750a2b547d9b0312056e"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -25,6 +25,7 @@ accelerate = "^0.17.1"
 transformers = {git = "https://github.com/huggingface/transformers.git", rev = "main"}
 sentencepiece = "^0.1.97"
 bitsandbytes = "^0.37.2"
 python-ptrace = "^0.9.8"
 [tool.poetry.group.tools]
 optional = true