This commit is contained in:
Beck LaBash 2023-04-13 21:08:28 -04:00
parent c52741524c
commit 8053a90b23
22 changed files with 277 additions and 34 deletions

10
2023-04-12_23-12-20.jsonl Normal file

File diff suppressed because one or more lines are too long

22
2023-04-12_23-22-43.jsonl Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"code": "\nclass Solution():\n\n def minReverseOperations(self, n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr\\xa0of length n where all positions are set to 0's, except position p which is set to 1.\\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\\n A subarray is a contiguous non-empty sequence of elements within an array.\\n The values of ans[i] are independent for all i's.\\n The reverse of an array is an array containing the values in reverse order.\\n \"\n from typing import List\n arr = ([0] * n)\n arr[p] = 1\n for i in banned:\n arr[i] = (- 1)\n\n def reverse_subarray(subarr):\n for i in range(len(subarr)):\n if (subarr[i] == (- 1)):\n return False\n subarr.reverse()\n return True\n ans = ([(- 1)] * n)\n for i in range(n):\n if (arr[i] == (- 1)):\n continue\n subarr = arr[max(0, ((i - k) + 1)):(i + 1)]\n while (len(subarr) < k):\n subarr.insert(0, 0)\n while (len(subarr) > k):\n subarr.pop(0)\n count = 0\n while (not reverse_subarray(subarr)):\n count += 1\n if (count > n):\n return ([(- 1)] * n)\n ans[i] = count\n return ans\n", "status": "Submission Timed-Out", "reward": false, "info": {"state": "STARTED"}}

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,48 @@
def timeout_handler(_, __):
raise TimeoutError()
import os, json
def to_jsonl(dict_data, file_path):
with open(file_path, 'a') as file:
json_line = json.dumps(dict_data)
file.write(json_line + os.linesep)
from threading import Thread
class PropagatingThread(Thread):
def run(self):
self.exc = None
try:
if hasattr(self, '_Thread__target'):
# Thread uses name mangling prior to Python 3.
self.ret = self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
else:
self.ret = self._target(*self._args, **self._kwargs)
except BaseException as e:
self.exc = e
def join(self, timeout=None):
super(PropagatingThread, self).join(timeout)
if self.exc:
raise self.exc
return self.ret
def function_with_timeout(func, args, timeout):
result_container = []
def wrapper():
result_container.append(func(*args))
thread = PropagatingThread(target=wrapper)
thread.start()
thread.join(timeout)
if thread.is_alive():
raise TimeoutError()
else:
return result_container[0]
# Py tests
# if __name__ == "__main__":

View File

@ -9,16 +9,17 @@ from .leetcode_env.leetcode_env.utils import PySubmissionFormatter, RsSubmission
def executor_factory(lang: str, is_leet: bool = False) -> Executor:
if lang == "py" or lang == "python":
if is_leet:
print("Using LeetCode Python executor")
return LeetExecutor(ProgrammingLanguage.PYTHON3,
PyExecutor(),
PySubmissionFormatter())
PySubmissionFormatter)
else:
return PyExecutor()
elif lang == "rs" or lang == "rust":
if is_leet:
return LeetExecutor(ProgrammingLanguage.RUST,
RsExecutor(),
RsSubmissionFormatter())
RsSubmissionFormatter)
else:
return RsExecutor()
else:

View File

@ -1,23 +1,32 @@
from typing import List
from .executor_types import ExecuteResult, Executor
from .executor_utils import to_jsonl
from .leetcode_env.leetcode_env.utils import SubmissionFormatter
from .leetcode_env.leetcode_env.environment import LeetCodeEnv
from .leetcode_env.leetcode_env.leetcode_types import ProgrammingLanguage, LeetCodeSubmission
from .leetcode_env.leetcode_env.utils import id_from_slug
from datetime import datetime
class LeetExecutor(Executor):
def __init__(self, lang: ProgrammingLanguage, executor: Executor, formatter: SubmissionFormatter):
self.lang = lang
self.executor = executor
self.formatter = formatter
self.env = LeetCodeEnv()
self.name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
def execute(self, func: str, tests: List[str], timeout: int = 5) -> ExecuteResult:
return self.executor.execute(func, tests, timeout)
def evaluate(self, name: str, func: str, test: str, timeout: int = 5) -> bool:
print(f'Timeout is {timeout} seconds')
leetcode_formatted_func = self.formatter.to_leetcode(func)
print('----------------- LEETCODE SUBMISSION ------------------')
print(leetcode_formatted_func)
print('--------------------------------------------------------')
submission = LeetCodeSubmission(
code = self.formatter.to_leetcode(func),
lang = self.lang,
@ -26,7 +35,23 @@ class LeetExecutor(Executor):
timeout = timeout
)
_, reward, _, _ = self.env.step(submission)
status, reward, _, info = self.env.step(submission)
print('----------------- LEETCODE EVALUATION ------------------')
print(status)
print(info)
print('--------------------------------------------------------')
to_jsonl({
'code': leetcode_formatted_func,
'status': status,
'reward': reward,
'info': info
},
f'{self.name}.jsonl'
)
return reward

View File

@ -2,7 +2,7 @@ import ast
import signal
import astunparse
from .executor_utils import timeout_handler
from .executor_utils import timeout_handler, function_with_timeout
from typing import List
from .executor_types import ExecuteResult, Executor
@ -21,12 +21,14 @@ class PyExecutor(Executor):
for i in range(num_tests):
try:
# Set the alarm
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(timeout)
# signal.signal(signal.SIGALRM, timeout_handler)
# signal.alarm(timeout)
function_with_timeout(exec, (func_test_list[i], globals()), timeout)
# Run the test and disable the alarm
exec(func_test_list[i], globals())
signal.alarm(0)
# exec(func_test_list[i], globals())
# signal.alarm(0)
success_tests += [tests[i]]
except Exception:
@ -66,12 +68,13 @@ check({name})
"""
try:
# Set the alarm
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(timeout)
# signal.signal(signal.SIGALRM, timeout_handler)
# signal.alarm(timeout)
function_with_timeout(exec, (code, globals()), timeout)
# Run the test and disable the alarm
exec(code, globals())
signal.alarm(0)
# exec(code, globals())
# signal.alarm(0)
return True
except Exception:
@ -92,11 +95,13 @@ def get_output(func: str, assert_statement: str, timeout: int = 5) -> str:
exec(func, globals())
# set the alarm
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(timeout)
# signal.signal(signal.SIGALRM, timeout_handler)
# signal.alarm(timeout)
# Run the test and disable the alarm
output = eval(func_call)
signal.alarm(0)
output = function_with_timeout(eval, (func_call,), timeout)
# output = eval(func_call)
# signal.alarm(0)
return output
except TimeoutError:
return "TIMEOUT"

View File

@ -9,8 +9,7 @@ from tenacity import (
from typing import Union, List, Optional, Callable
openai.api_key = os.getenv("OPENAI_API_KEY")
# openai.api_key = os.getenv("OPENAI_API_KEY")
def generic_generate_func_impl(
func_sig: str,
@ -38,10 +37,21 @@ def generic_generate_func_impl(
if strategy == "reflexion":
message = f"previous implementation:\n{prev_func_impl}\n\nunit tests:\n{feedback}\n\nhint:\n{self_reflection}\n\n# improved implementation\n{func_sig}"
# func_bodies is a really bad name, as it can also be just 1 string
print('----------------------- SYSTEM MESSAGE -----------------------')
print(REFLEXION_CHAT_INSTRUCTION)
print('----------------------------------------------')
print(' ----------------------- USER MESSAGE -----------------------')
print(message, flush=True)
print('----------------------------------------------')
func_bodies = gpt_chat(model, REFLEXION_CHAT_INSTRUCTION,
message, num_comps=num_comps, temperature=temperature)
else:
print('----------------------- SYSTEM MESSAGE -----------------------')
print(SIMPLE_CHAT_INSTRUCTION)
print('----------------------------------------------')
print(' ----------------------- USER MESSAGE -----------------------')
print(func_sig, flush=True)
print('----------------------------------------------')
func_bodies = gpt_chat(model, SIMPLE_CHAT_INSTRUCTION if strategy ==
"simple" else REFLEXION_CHAT_INSTRUCTION, func_sig, num_comps=num_comps, temperature=temperature)
else:
@ -56,8 +66,15 @@ def generic_generate_func_impl(
if num_comps == 1:
assert isinstance(func_bodies, str)
print('--------------------- GENERATED FUNC BODY ---------------------')
print(func_sig + fix_body(func_bodies))
print('------------------------------------------')
return func_sig + fix_body(func_bodies)
else:
print('--------------------- GENERATED FUNC BODY ---------------------')
print([func_sig + fix_body(func_body) for func_body in func_bodies])
print('------------------------------------------')
return [func_sig + fix_body(func_body) for func_body in func_bodies]
@ -78,7 +95,12 @@ def generic_generate_internal_tests(
"""
if model == "gpt-4" or model == "gpt-3.5-turbo":
message = f'{TEST_GENERATION_FEW_SHOT}\n\nfunc signature:\n{func_sig}\nunit tests:'
print('----------------------- SYSTEM MESSAGE -----------------------')
print(TEST_GENERATION_CHAT_INSTRUCTION)
print('----------------------------------------------')
print(' ----------------------- USER MESSAGE -----------------------')
print(message, flush=True)
print('----------------------------------------------')
output = gpt_chat(
model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
else:
@ -95,6 +117,9 @@ def generic_generate_internal_tests(
# cur_tests = ... # type: ignore
# cur_refinement_num += 1
print('--------------- GENERATED TESTS: ---------------')
print(valid_tests)
print('------------------------------------------------')
return sample_n_random(valid_tests, max_num_tests)
@ -107,11 +132,20 @@ def generic_generate_self_reflection(
SELF_REFLECTION_COMPLETION_INSTRUCTION: str,
) -> str:
if model == "gpt-4" or model == "gpt-3.5-turbo":
print('----------------------- SYSTEM MESSAGE -----------------------')
print(SELF_REFLECTION_CHAT_INSTRUCTION)
print('----------------------------------------------')
print(' ----------------------- USER MESSAGE -----------------------')
print(f'{func}\n\n{feedback}\n\nExplanation:', flush=True)
print('----------------------------------------------')
reflection = gpt_chat(
model, SELF_REFLECTION_CHAT_INSTRUCTION, f'{func}\n\n{feedback}\n\nExplanation:')
else:
reflection = gpt_completion(
model, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
print('--------------- GENERATED SELF REFLECTION: ---------------')
print(reflection)
print('----------------------------------------------------------')
return reflection # type: ignore

View File

@ -3,11 +3,13 @@ from .generator_utils import generic_generate_func_impl, gpt_chat, gpt_completio
from typing import Optional, List, Union
import ast
import re
PY_SIMPLE_COMPLETION_INSTRUCTION = "# Write the body of this function only."
PY_REFLEXION_COMPLETION_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only.\n\n-----"
PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = "You are PythonGPT. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation.\n\n-----"
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT. You will be given a function signature and docstring. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
PY_REFLEXION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
PY_SELF_REFLECTION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation."
@ -62,7 +64,7 @@ class PyGenerator(Generator):
temperature: float = 0.0,
) -> Union[str, List[str]]:
x = generic_generate_func_impl(
func_sig=f'from typing import *\n{func_sig}',
func_sig=func_sig,
model=model,
strategy=strategy,
prev_func_impl=prev_func_impl,
@ -74,9 +76,8 @@ class PyGenerator(Generator):
SIMPLE_CHAT_INSTRUCTION=PY_SIMPLE_CHAT_INSTRUCTION,
REFLEXION_COMPLETION_INSTRUCTION=PY_REFLEXION_COMPLETION_INSTRUCTION,
SIMPLE_COMPLETION_INSTRUCTION=PY_SIMPLE_COMPLETION_INSTRUCTION,
fix_body=(lambda x: x) if strategy == "simple" else py_fix_indentation
fix_body=fix_turbo_response if strategy == "simple" else py_fix_indentation
)
print(x, flush=True)
return x
@ -98,7 +99,6 @@ class PyGenerator(Generator):
parse_tests=parse_tests,
is_syntax_valid=py_is_syntax_valid,
)
print(x, flush=True)
return x
@ -118,8 +118,36 @@ def handle_entire_body_indent(func_body: str) -> str:
res = "\n".join([" " + line for line in split])
return res
def fix_turbo_response(func_body: str) -> str:
return fix_markdown(remove_unindented_signatures(func_body))
def fix_markdown(func_body: str) -> str:
return re.sub("`{3}", "", func_body)
def remove_unindented_signatures(code: str) -> str:
regex = r"^def\s+\w+\s*\("
before_signature = []
after_signature = []
signature_found = False
for line in code.split("\n"):
if re.match(regex, line):
signature_found = True
continue
if signature_found:
after_signature.append(line)
else:
if not line.startswith(" ") and line.strip():
line = " " + line
before_signature.append(line)
return "\n".join(before_signature + after_signature)
def py_fix_indentation(func_body: str) -> str:
func_body = fix_turbo_response(func_body)
"""
3 cases:
1. good syntax
@ -127,6 +155,7 @@ def py_fix_indentation(func_body: str) -> str:
3. entire body not good
"""
def parse_indent_rec(f_body: str, cur_state: int) -> str:
f_body = fix_markdown(f_body)
if cur_state > 1:
return f_body
code = f'{DUMMY_FUNC_SIG}\n{f_body}\n{DUMMY_FUNC_CALL}'

View File

@ -108,7 +108,8 @@ pass@k: {args.pass_at_k}
pass_at_k=args.pass_at_k,
log_path=log_path,
verbose=args.verbose,
expansion_factor=args.expansion_factor
expansion_factor=args.expansion_factor,
is_leetcode=args.is_leetcode
)
print(f"Done! Check out the logs in `{log_path}`")

View File

@ -12,9 +12,10 @@ def run_reflexion(
max_iters: int,
pass_at_k: int,
log_path: str,
verbose: bool
verbose: bool,
is_leetcode: bool = False
) -> None:
exe = executor_factory(language)
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
print_v = make_printv(verbose)

View File

@ -45,9 +45,10 @@ def run_reflexion_ucs(
pass_at_k: int,
log_path: str,
verbose: bool,
expansion_factor: int
expansion_factor: int,
is_leetcode: bool = False
) -> None:
exe = executor_factory(language)
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
num_items = len(dataset)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,3 @@
{"task_id": "minimum-reverse-operations", "prompt": "def minReverseOperations(n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\"\"\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr of length n where all positions are set to 0's, except position p which is set to 1.\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\n A subarray is a contiguous non-empty sequence of elements within an array.\n The values of ans[i] are independent for all i's.\n The reverse of an array is an array containing the values in reverse order.\n \"\"\"\n", "entry_point": "minimum-reverse-operations", "cannonical_solution": "", "test": "", "solution": "def minReverseOperations(n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\"\"\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr of length n where all positions are set to 0's, except position p which is set to 1.\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\n A subarray is a contiguous non-empty sequence of elements within an array.\n The values of ans[i] are independent for all i's.\n The reverse of an array is an array containing the values in reverse order.\n \"\"\"\n from typing import List\n\n ans = [-1] * n\n banned_set = set(banned)\n \n for i in range(n):\n if i == p or i in banned_set:\n continue\n \n distance = abs(i - p)\n if distance % k == 0:\n operations = distance // k\n valid = True\n \n for j in range(p, i, k if i > p else -k):\n if j in banned_set:\n valid = False\n break\n \n if valid:\n ans[i] = operations\n \n ans[p] = 0\n return ans", "is_solved": false}
{"task_id": "collect-coins-in-a-tree", "prompt": "def collectTheCoins(coins: List[int], edges: List[List[int]]) -> int:\n \"\"\"\n There exists an undirected and unrooted tree with n nodes indexed from 0 to n - 1. You are given an integer n and a 2D integer array edges of length n - 1, where edges[i] = [ai, bi] indicates that there is an edge between nodes ai and bi in the tree. You are also given an array coins of size n where coins[i] can be either 0 or 1, where 1 indicates the presence of a coin in the vertex i.\n Initially, you choose to start at any vertex in the tree. Then, you can perform the following operations any number of times:\n Collect all the coins that are at a distance of at most 2 from the current vertex, or\n Move to any adjacent vertex in the tree.\n Find the minimum number of edges you need to go through to collect all the coins and go back to the initial vertex.\n Note that if you pass an edge several times, you need to count it into the answer several times.\n \"\"\"\n", "entry_point": "collect-coins-in-a-tree", "cannonical_solution": "", "test": "", "solution": "def collectTheCoins(coins: List[int], edges: List[List[int]]) -> int:\n \"\"\"\n There exists an undirected and unrooted tree with n nodes indexed from 0 to n - 1. You are given an integer n and a 2D integer array edges of length n - 1, where edges[i] = [ai, bi] indicates that there is an edge between nodes ai and bi in the tree. You are also given an array coins of size n where coins[i] can be either 0 or 1, where 1 indicates the presence of a coin in the vertex i.\n Initially, you choose to start at any vertex in the tree. Then, you can perform the following operations any number of times:\n Collect all the coins that are at a distance of at most 2 from the current vertex, or\n Move to any adjacent vertex in the tree.\n Find the minimum number of edges you need to go through to collect all the coins and go back to the initial vertex.\n Note that if you pass an edge several times, you need to count it into the answer several times.\n \"\"\"\n from collections import defaultdict\n from typing import List\n\n def dfs(node: int, parent: int) -> int:\n total = 0\n for neighbor in graph[node]:\n if neighbor != parent:\n total += dfs(neighbor, node)\n if coins[node] == 1:\n total += 1\n return total\n\n graph = defaultdict(list)\n for edge in edges:\n graph[edge[0]].append(edge[1])\n graph[edge[1]].append(edge[0])\n\n max_coins = 0\n for node in range(len(coins)):\n if coins[node] == 1:\n max_coins = max(max_coins, dfs(node, -1))\n\n return 2 * (max_coins - 1)", "is_solved": false}
{"task_id": "minimum-time-to-visit-a-cell-in-a-grid", "prompt": "def minimumTime(grid: List[List[int]]) -> int:\n \"\"\"\n You are given a m x n matrix grid consisting of non-negative integers where grid[row][col] represents the minimum time required to be able to visit the cell (row, col), which means you can visit the cell (row, col) only when the time you visit it is greater than or equal to grid[row][col].\n You are standing in the top-left cell of the matrix in the 0th second, and you must move to any adjacent cell in the four directions: up, down, left, and right. Each move you make takes 1 second.\n Return the minimum time required in which you can visit the bottom-right cell of the matrix. If you cannot visit the bottom-right cell, then return -1.\n \"\"\"\n", "entry_point": "minimum-time-to-visit-a-cell-in-a-grid", "cannonical_solution": "", "test": "", "solution": "def minimumTime(grid: List[List[int]]) -> int:\n \"\"\"\n You are given a m x n matrix grid consisting of non-negative integers where grid[row][col] represents the minimum time required to be able to visit the cell (row, col), which means you can visit the cell (row, col) only when the time you visit it is greater than or equal to grid[row][col].\n You are standing in the top-left cell of the matrix in the 0th second, and you must move to any adjacent cell in the four directions: up, down, left, and right. Each move you make takes 1 second.\n Return the minimum time required in which you can visit the bottom-right cell of the matrix. If you cannot visit the bottom-right cell, then return -1.\n \"\"\"\n from heapq import heappush, heappop\n from typing import List\n\n m, n = len(grid), len(grid[0])\n visited = [[False for _ in range(n)] for _ in range(m)]\n directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]\n queue = [(grid[0][0], 0, 0)]\n\n while queue:\n time, x, y = heappop(queue)\n if x == m - 1 and y == n - 1:\n return time\n for dx, dy in directions:\n nx, ny = x + dx, y + dy\n if 0 <= nx < m and 0 <= ny < n and not visited[nx][ny]:\n visited[nx][ny] = True\n heappush(queue, (max(time, grid[nx][ny]), nx, ny))\n\n return -1", "is_solved": false}

11
run_reflexion_py_leet.sh Executable file
View File

@ -0,0 +1,11 @@
python main.py \
--run_name "reflexion_leetcode_python3_gpt4" \
--root_dir "root" \
--dataset_path ./executors/leetcode_env/leetcode_dataset/data/humaneval/leetcode-hard-py-40-uncontaminated.jsonl \
--strategy "reflexion" \
--language "py" \
--model "gpt-4" \
--pass_at_k "1" \
--max_iters "5" \
--is_leetcode \
--verbose

View File

@ -1,5 +1,5 @@
python main.py \
--run_name "simple_leetcode_python3" \
--run_name "simple_leetcode_python3_gpt4" \
--root_dir "root" \
--dataset_path ./executors/leetcode_env/leetcode_dataset/data/humaneval/leetcode-hard-py-40-uncontaminated.jsonl \
--strategy "simple" \
@ -7,4 +7,5 @@ python main.py \
--model "gpt-4" \
--pass_at_k "1" \
--max_iters "1" \
--is_leetcode \
--verbose

View File

@ -13,9 +13,10 @@ def run_simple(
language: str,
pass_at_k: int,
log_path: str,
verbose: bool
verbose: bool,
is_leetcode: bool = False
) -> None:
exe = executor_factory(language)
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
print_v = make_printv(verbose)
@ -29,7 +30,7 @@ def run_simple(
while cur_pass < pass_at_k:
cur_func_impl = gen.func_impl(item["prompt"], model, "simple")
assert isinstance(cur_func_impl, str)
is_passing = exe.evaluate(item["entry_point"], cur_func_impl, item["test"], timeout=10)
is_passing = exe.evaluate(item["entry_point"], cur_func_impl, item["test"], timeout = 20 if is_leetcode else 10)
if is_passing:
is_solved = True
num_success += 1

View File

@ -11,9 +11,10 @@ def run_test_acc(
language: str,
pass_at_k: int,
log_path: str,
verbose: bool
verbose: bool,
is_leetcode: bool = False
) -> None:
exe = executor_factory(language)
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
print_v = make_printv(verbose)