mirror of https://github.com/corca-ai/EVAL
Feature/terminal strace (#10)
* feat: exp * feat: watch syscall * feat: terminal tool with syscall tracer * refactor: move editor into tools * fix: errors * refactor: syscall and terminal * fix: USE_GPU option * fix: hard code playground dir * fix: toolsets * fix: terminal cwd playground * feat: wait syscall with timeout * fix: reset timer on wait_until_stop_or_exit ends * fix: handle in case exception is tuplefeature/alpaca
parent
072e16b122
commit
1fdf63741f
@ -1,3 +0,0 @@
|
||||
from .patch import CodePatcher
|
||||
from .read import CodeReader
|
||||
from .write import CodeWriter
|
@ -0,0 +1,142 @@
|
||||
from core.tools.base import tool, BaseToolSet
|
||||
from logger import logger
|
||||
|
||||
from .patch import CodePatcher
|
||||
from .read import CodeReader
|
||||
from .write import CodeWriter
|
||||
|
||||
|
||||
class CodeEditor(BaseToolSet):
|
||||
@tool(
|
||||
name="CodeEditor.READ",
|
||||
description="Read and understand code. "
|
||||
f"Input should be filename and line number group. ex. test.py|1-10 "
|
||||
"and the output will be code. ",
|
||||
)
|
||||
def read(self, inputs: str) -> str:
|
||||
try:
|
||||
output = CodeReader.read(inputs)
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.READ, Input Commands: {inputs} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
@tool(
|
||||
name="CodeEditor.SUMMARY",
|
||||
description="Summary code. "
|
||||
"Read the code structured into a tree. "
|
||||
"If you set specific line, it will show the code from the specific line. "
|
||||
"Input should be filename, depth, and specific line if you want. ex. test.py|2 or test.py|3|print('hello world') "
|
||||
"and the output will be list of (line number: code). ",
|
||||
)
|
||||
def summary(self, inputs: str) -> str:
|
||||
try:
|
||||
output = CodeReader.summary(inputs)
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.SUMMARY, Input Commands: {inputs} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
@tool(
|
||||
name="CodeEditor.APPEND",
|
||||
description="Append code to the existing file. "
|
||||
"If the code is completed, use the Terminal tool to execute it, if not, append the code through the this tool. "
|
||||
"Input should be filename and code to append. "
|
||||
"Input code must be the code that should be appended, NOT whole code. "
|
||||
"ex. test.py\nprint('hello world')\n "
|
||||
"and the output will be last 3 line.",
|
||||
)
|
||||
def append(self, inputs: str) -> str:
|
||||
try:
|
||||
code = CodeWriter.append(inputs)
|
||||
output = (
|
||||
"Last 3 line was:\n"
|
||||
+ "\n".join(code.split("\n")[-3:])
|
||||
+ "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
|
||||
)
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.APPEND, Input: {inputs} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
@tool(
|
||||
name="CodeEditor.WRITE",
|
||||
description="Write code to create a new tool. "
|
||||
"If the code is completed, use the Terminal tool to execute it, if not, append the code through the CodeEditor.APPEND tool. "
|
||||
"Input should be filename and code. This file must be in playground folder. "
|
||||
"ex. test.py\nprint('hello world')\n "
|
||||
"and the output will be last 3 line.",
|
||||
)
|
||||
def write(self, inputs: str) -> str:
|
||||
try:
|
||||
code = CodeWriter.write(inputs)
|
||||
output = (
|
||||
"Last 3 line was:\n"
|
||||
+ "\n".join(code.split("\n")[-3:])
|
||||
+ "\nYou can use CodeEditor.APPEND tool to append the code if it is not completed."
|
||||
)
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.WRITE, Input: {inputs} " f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
@tool(
|
||||
name="CodeEditor.PATCH",
|
||||
description="Patch the code to correct the error if an error occurs or to improve it. "
|
||||
"Input is a list of patches. The patch is separated by {seperator}. ".format(
|
||||
seperator=CodePatcher.separator.replace("\n", "\\n")
|
||||
)
|
||||
+ "Each patch has to be formatted like below.\n"
|
||||
"<filepath>|<start_line>,<start_col>|<end_line>,<end_col>|<new_code>"
|
||||
"Code between start and end will be replaced with new_code. "
|
||||
"The output will be written/deleted bytes or error message. ",
|
||||
)
|
||||
def patch(self, patches: str) -> str:
|
||||
try:
|
||||
w, d = CodePatcher.patch(patches)
|
||||
output = f"successfully wrote {w}, deleted {d}"
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.PATCH, Input Patch: {patches} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
@tool(
|
||||
name="CodeEditor.DELETE",
|
||||
description="Delete code in file for a new start. "
|
||||
"Input should be filename."
|
||||
"ex. test.py "
|
||||
"Output will be success or error message.",
|
||||
)
|
||||
def delete(self, inputs: str) -> str:
|
||||
filename = inputs
|
||||
try:
|
||||
with open(filename, "w") as f:
|
||||
f.write("")
|
||||
output = "success"
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed CodeEditor.DELETE, Input filename: {inputs} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
@ -0,0 +1,67 @@
|
||||
import subprocess
|
||||
from typing import Dict, List
|
||||
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
from env import settings
|
||||
from logger import logger
|
||||
from core.tools.base import tool, BaseToolSet, ToolScope, SessionGetter
|
||||
from core.tools.terminal.syscall import SyscallTracer
|
||||
|
||||
|
||||
class Terminal(BaseToolSet):
|
||||
def __init__(self):
|
||||
self.sessions: Dict[str, List[SyscallTracer]] = {}
|
||||
|
||||
@tool(
|
||||
name="Terminal",
|
||||
description="Executes commands in a terminal."
|
||||
"If linux errno occurs, we have to solve the problem with the terminal. "
|
||||
"It can't execute interactive operations or blocking operations. "
|
||||
"Input should be valid commands, "
|
||||
"and the output will be any output from running that command.",
|
||||
scope=ToolScope.SESSION,
|
||||
)
|
||||
def execute(self, commands: str, get_session: SessionGetter) -> str:
|
||||
session, _ = get_session()
|
||||
|
||||
try:
|
||||
with TemporaryFile() as fp:
|
||||
process = subprocess.Popen(
|
||||
commands,
|
||||
shell=True,
|
||||
cwd=settings["PLAYGROUND_DIR"],
|
||||
stdout=fp,
|
||||
stderr=fp,
|
||||
)
|
||||
|
||||
tracer = SyscallTracer(process.pid)
|
||||
tracer.attach()
|
||||
exitcode, reason = tracer.wait_until_stop_or_exit()
|
||||
logger.debug(f"Stopped terminal execution: {exitcode} {reason}")
|
||||
|
||||
fp.seek(0)
|
||||
output = fp.read().decode()
|
||||
except Exception as e:
|
||||
output = str(e)
|
||||
|
||||
if len(output) > 1000:
|
||||
output = output[:1000] + "..."
|
||||
|
||||
logger.debug(
|
||||
f"\nProcessed Terminal, Input Commands: {commands} "
|
||||
f"Output Answer: {output}"
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
o = Terminal().execute(
|
||||
"sleep 1; echo 1; sleep 2; echo 2; sleep 3; echo 3; sleep 10;",
|
||||
lambda: ("", None),
|
||||
)
|
||||
print(o)
|
||||
|
||||
time.sleep(10) # see if timer has reset
|
@ -0,0 +1,103 @@
|
||||
from typing import Tuple, Optional
|
||||
import signal
|
||||
|
||||
from ptrace.debugger import (
|
||||
PtraceDebugger,
|
||||
PtraceProcess,
|
||||
ProcessExit,
|
||||
ProcessSignal,
|
||||
NewProcessEvent,
|
||||
ProcessExecution,
|
||||
)
|
||||
from ptrace.syscall import PtraceSyscall
|
||||
from ptrace.func_call import FunctionCallOptions
|
||||
from ptrace.tools import signal_to_exitcode
|
||||
|
||||
|
||||
class SyscallTimeoutException(Exception):
|
||||
def __init__(self, pid: int, *args) -> None:
|
||||
super().__init__(f"deadline exceeded while waiting syscall for {pid}", *args)
|
||||
|
||||
|
||||
class SyscallTracer:
|
||||
def __init__(self, pid: int):
|
||||
self.debugger: PtraceDebugger = PtraceDebugger()
|
||||
self.pid: int = pid
|
||||
self.process: PtraceProcess = None
|
||||
|
||||
def is_waiting(self, syscall: PtraceSyscall) -> bool:
|
||||
if syscall.name.startswith("wait"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def attach(self):
|
||||
self.process = self.debugger.addProcess(self.pid, False)
|
||||
|
||||
def detach(self):
|
||||
self.process.detach()
|
||||
self.debugger.quit()
|
||||
|
||||
def set_timer(self, timeout: int):
|
||||
def handler(signum, frame):
|
||||
raise SyscallTimeoutException(self.process.pid)
|
||||
|
||||
signal.signal(signal.SIGALRM, handler)
|
||||
signal.alarm(timeout)
|
||||
|
||||
def reset_timer(self):
|
||||
signal.alarm(0)
|
||||
|
||||
def wait_syscall_with_timeout(self, timeout: int):
|
||||
self.set_timer(timeout)
|
||||
self.process.waitSyscall()
|
||||
self.reset_timer()
|
||||
|
||||
def wait_until_stop_or_exit(self) -> Tuple[Optional[int], str]:
|
||||
self.process.syscall()
|
||||
exitcode = None
|
||||
reason = ""
|
||||
while True:
|
||||
if not self.debugger:
|
||||
break
|
||||
|
||||
try:
|
||||
self.wait_syscall_with_timeout(5)
|
||||
except ProcessExit as event:
|
||||
if event.exitcode is not None:
|
||||
exitcode = event.exitcode
|
||||
continue
|
||||
except ProcessSignal as event:
|
||||
event.process.syscall(event.signum)
|
||||
exitcode = signal_to_exitcode(event.signum)
|
||||
reason = event.reason
|
||||
continue
|
||||
except NewProcessEvent as event:
|
||||
continue
|
||||
except ProcessExecution as event:
|
||||
continue
|
||||
except Exception as e:
|
||||
reason = str(e)
|
||||
break
|
||||
|
||||
syscall = self.process.syscall_state.event(
|
||||
FunctionCallOptions(
|
||||
write_types=False,
|
||||
write_argname=False,
|
||||
string_max_length=300,
|
||||
replace_socketcall=True,
|
||||
write_address=False,
|
||||
max_array_count=20,
|
||||
)
|
||||
)
|
||||
|
||||
self.process.syscall()
|
||||
|
||||
if syscall is None:
|
||||
continue
|
||||
|
||||
if syscall.result:
|
||||
continue
|
||||
|
||||
self.reset_timer()
|
||||
|
||||
return exitcode, reason
|
Loading…
Reference in New Issue