mirror of https://github.com/corca-ai/EVAL
Feature/terminal strace (#10)
* feat: exp * feat: watch syscall * feat: terminal tool with syscall tracer * refactor: move editor into tools * fix: errors * refactor: syscall and terminal * fix: USE_GPU option * fix: hard code playground dir * fix: toolsets * fix: terminal cwd playground * feat: wait syscall with timeout * fix: reset timer on wait_until_stop_or_exit ends * fix: handle in case exception is tuplefeature/alpaca
parent
072e16b122
commit
1fdf63741f
@ -1,3 +0,0 @@
|
|||||||
from .patch import CodePatcher
|
|
||||||
from .read import CodeReader
|
|
||||||
from .write import CodeWriter
|
|
@ -0,0 +1,142 @@
|
|||||||
|
from core.tools.base import tool, BaseToolSet
|
||||||
|
from logger import logger
|
||||||
|
|
||||||
|
from .patch import CodePatcher
|
||||||
|
from .read import CodeReader
|
||||||
|
from .write import CodeWriter
|
||||||
|
|
||||||
|
|
||||||
|
class CodeEditor(BaseToolSet):
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.READ",
|
||||||
|
description="Read and understand code. "
|
||||||
|
f"Input should be filename and line number group. ex. test.py|1-10 "
|
||||||
|
"and the output will be code. ",
|
||||||
|
)
|
||||||
|
def read(self, inputs: str) -> str:
|
||||||
|
try:
|
||||||
|
output = CodeReader.read(inputs)
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.READ, Input Commands: {inputs} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.SUMMARY",
|
||||||
|
description="Summary code. "
|
||||||
|
"Read the code structured into a tree. "
|
||||||
|
"If you set specific line, it will show the code from the specific line. "
|
||||||
|
"Input should be filename, depth, and specific line if you want. ex. test.py|2 or test.py|3|print('hello world') "
|
||||||
|
"and the output will be list of (line number: code). ",
|
||||||
|
)
|
||||||
|
def summary(self, inputs: str) -> str:
|
||||||
|
try:
|
||||||
|
output = CodeReader.summary(inputs)
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.SUMMARY, Input Commands: {inputs} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.APPEND",
|
||||||
|
description="Append code to the existing file. "
|
||||||
|
"If the code is completed, use the Terminal tool to execute it, if not, append the code through the this tool. "
|
||||||
|
"Input should be filename and code to append. "
|
||||||
|
"Input code must be the code that should be appended, NOT whole code. "
|
||||||
|
"ex. test.py\nprint('hello world')\n "
|
||||||
|
"and the output will be last 3 line.",
|
||||||
|
)
|
||||||
|
def append(self, inputs: str) -> str:
|
||||||
|
try:
|
||||||
|
code = CodeWriter.append(inputs)
|
||||||
|
output = (
|
||||||
|
"Last 3 line was:\n"
|
||||||
|
+ "\n".join(code.split("\n")[-3:])
|
||||||
|
+ "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.APPEND, Input: {inputs} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.WRITE",
|
||||||
|
description="Write code to create a new tool. "
|
||||||
|
"If the code is completed, use the Terminal tool to execute it, if not, append the code through the CodeEditor.APPEND tool. "
|
||||||
|
"Input should be filename and code. This file must be in playground folder. "
|
||||||
|
"ex. test.py\nprint('hello world')\n "
|
||||||
|
"and the output will be last 3 line.",
|
||||||
|
)
|
||||||
|
def write(self, inputs: str) -> str:
|
||||||
|
try:
|
||||||
|
code = CodeWriter.write(inputs)
|
||||||
|
output = (
|
||||||
|
"Last 3 line was:\n"
|
||||||
|
+ "\n".join(code.split("\n")[-3:])
|
||||||
|
+ "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.WRITE, Input: {inputs} " f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.PATCH",
|
||||||
|
description="Patch the code to correct the error if an error occurs or to improve it. "
|
||||||
|
"Input is a list of patches. The patch is separated by {seperator}. ".format(
|
||||||
|
seperator=CodePatcher.separator.replace("\n", "\\n")
|
||||||
|
)
|
||||||
|
+ "Each patch has to be formatted like below.\n"
|
||||||
|
"<filepath>|<start_line>,<start_col>|<end_line>,<end_col>|<new_code>"
|
||||||
|
"Code between start and end will be replaced with new_code. "
|
||||||
|
"The output will be written/deleted bytes or error message. ",
|
||||||
|
)
|
||||||
|
def patch(self, patches: str) -> str:
|
||||||
|
try:
|
||||||
|
w, d = CodePatcher.patch(patches)
|
||||||
|
output = f"successfully wrote {w}, deleted {d}"
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.PATCH, Input Patch: {patches} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="CodeEditor.DELETE",
|
||||||
|
description="Delete code in file for a new start. "
|
||||||
|
"Input should be filename."
|
||||||
|
"ex. test.py "
|
||||||
|
"Output will be success or error message.",
|
||||||
|
)
|
||||||
|
def delete(self, inputs: str) -> str:
|
||||||
|
filename = inputs
|
||||||
|
try:
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
f.write("")
|
||||||
|
output = "success"
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed CodeEditor.DELETE, Input filename: {inputs} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
@ -0,0 +1,67 @@
|
|||||||
|
import subprocess
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from tempfile import TemporaryFile
|
||||||
|
|
||||||
|
from env import settings
|
||||||
|
from logger import logger
|
||||||
|
from core.tools.base import tool, BaseToolSet, ToolScope, SessionGetter
|
||||||
|
from core.tools.terminal.syscall import SyscallTracer
|
||||||
|
|
||||||
|
|
||||||
|
class Terminal(BaseToolSet):
|
||||||
|
def __init__(self):
|
||||||
|
self.sessions: Dict[str, List[SyscallTracer]] = {}
|
||||||
|
|
||||||
|
@tool(
|
||||||
|
name="Terminal",
|
||||||
|
description="Executes commands in a terminal."
|
||||||
|
"If linux errno occurs, we have to solve the problem with the terminal. "
|
||||||
|
"It can't execute interactive operations or blocking operations. "
|
||||||
|
"Input should be valid commands, "
|
||||||
|
"and the output will be any output from running that command.",
|
||||||
|
scope=ToolScope.SESSION,
|
||||||
|
)
|
||||||
|
def execute(self, commands: str, get_session: SessionGetter) -> str:
|
||||||
|
session, _ = get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with TemporaryFile() as fp:
|
||||||
|
process = subprocess.Popen(
|
||||||
|
commands,
|
||||||
|
shell=True,
|
||||||
|
cwd=settings["PLAYGROUND_DIR"],
|
||||||
|
stdout=fp,
|
||||||
|
stderr=fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
tracer = SyscallTracer(process.pid)
|
||||||
|
tracer.attach()
|
||||||
|
exitcode, reason = tracer.wait_until_stop_or_exit()
|
||||||
|
logger.debug(f"Stopped terminal execution: {exitcode} {reason}")
|
||||||
|
|
||||||
|
fp.seek(0)
|
||||||
|
output = fp.read().decode()
|
||||||
|
except Exception as e:
|
||||||
|
output = str(e)
|
||||||
|
|
||||||
|
if len(output) > 1000:
|
||||||
|
output = output[:1000] + "..."
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"\nProcessed Terminal, Input Commands: {commands} "
|
||||||
|
f"Output Answer: {output}"
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import time
|
||||||
|
|
||||||
|
o = Terminal().execute(
|
||||||
|
"sleep 1; echo 1; sleep 2; echo 2; sleep 3; echo 3; sleep 10;",
|
||||||
|
lambda: ("", None),
|
||||||
|
)
|
||||||
|
print(o)
|
||||||
|
|
||||||
|
time.sleep(10) # see if timer has reset
|
@ -0,0 +1,103 @@
|
|||||||
|
from typing import Tuple, Optional
|
||||||
|
import signal
|
||||||
|
|
||||||
|
from ptrace.debugger import (
|
||||||
|
PtraceDebugger,
|
||||||
|
PtraceProcess,
|
||||||
|
ProcessExit,
|
||||||
|
ProcessSignal,
|
||||||
|
NewProcessEvent,
|
||||||
|
ProcessExecution,
|
||||||
|
)
|
||||||
|
from ptrace.syscall import PtraceSyscall
|
||||||
|
from ptrace.func_call import FunctionCallOptions
|
||||||
|
from ptrace.tools import signal_to_exitcode
|
||||||
|
|
||||||
|
|
||||||
|
class SyscallTimeoutException(Exception):
|
||||||
|
def __init__(self, pid: int, *args) -> None:
|
||||||
|
super().__init__(f"deadline exceeded while waiting syscall for {pid}", *args)
|
||||||
|
|
||||||
|
|
||||||
|
class SyscallTracer:
|
||||||
|
def __init__(self, pid: int):
|
||||||
|
self.debugger: PtraceDebugger = PtraceDebugger()
|
||||||
|
self.pid: int = pid
|
||||||
|
self.process: PtraceProcess = None
|
||||||
|
|
||||||
|
def is_waiting(self, syscall: PtraceSyscall) -> bool:
|
||||||
|
if syscall.name.startswith("wait"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def attach(self):
|
||||||
|
self.process = self.debugger.addProcess(self.pid, False)
|
||||||
|
|
||||||
|
def detach(self):
|
||||||
|
self.process.detach()
|
||||||
|
self.debugger.quit()
|
||||||
|
|
||||||
|
def set_timer(self, timeout: int):
|
||||||
|
def handler(signum, frame):
|
||||||
|
raise SyscallTimeoutException(self.process.pid)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGALRM, handler)
|
||||||
|
signal.alarm(timeout)
|
||||||
|
|
||||||
|
def reset_timer(self):
|
||||||
|
signal.alarm(0)
|
||||||
|
|
||||||
|
def wait_syscall_with_timeout(self, timeout: int):
|
||||||
|
self.set_timer(timeout)
|
||||||
|
self.process.waitSyscall()
|
||||||
|
self.reset_timer()
|
||||||
|
|
||||||
|
def wait_until_stop_or_exit(self) -> Tuple[Optional[int], str]:
|
||||||
|
self.process.syscall()
|
||||||
|
exitcode = None
|
||||||
|
reason = ""
|
||||||
|
while True:
|
||||||
|
if not self.debugger:
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.wait_syscall_with_timeout(5)
|
||||||
|
except ProcessExit as event:
|
||||||
|
if event.exitcode is not None:
|
||||||
|
exitcode = event.exitcode
|
||||||
|
continue
|
||||||
|
except ProcessSignal as event:
|
||||||
|
event.process.syscall(event.signum)
|
||||||
|
exitcode = signal_to_exitcode(event.signum)
|
||||||
|
reason = event.reason
|
||||||
|
continue
|
||||||
|
except NewProcessEvent as event:
|
||||||
|
continue
|
||||||
|
except ProcessExecution as event:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
reason = str(e)
|
||||||
|
break
|
||||||
|
|
||||||
|
syscall = self.process.syscall_state.event(
|
||||||
|
FunctionCallOptions(
|
||||||
|
write_types=False,
|
||||||
|
write_argname=False,
|
||||||
|
string_max_length=300,
|
||||||
|
replace_socketcall=True,
|
||||||
|
write_address=False,
|
||||||
|
max_array_count=20,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.process.syscall()
|
||||||
|
|
||||||
|
if syscall is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if syscall.result:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.reset_timer()
|
||||||
|
|
||||||
|
return exitcode, reason
|
Loading…
Reference in New Issue