mirror of
https://github.com/GammaTauAI/reflexion-human-eval
synced 2024-11-16 00:12:59 +00:00
Fixes
This commit is contained in:
parent
c52741524c
commit
8053a90b23
10
2023-04-12_23-12-20.jsonl
Normal file
10
2023-04-12_23-12-20.jsonl
Normal file
File diff suppressed because one or more lines are too long
22
2023-04-12_23-22-43.jsonl
Normal file
22
2023-04-12_23-22-43.jsonl
Normal file
File diff suppressed because one or more lines are too long
8
2023-04-12_23-36-32.jsonl
Normal file
8
2023-04-12_23-36-32.jsonl
Normal file
File diff suppressed because one or more lines are too long
2
2023-04-12_23-54-35.jsonl
Normal file
2
2023-04-12_23-54-35.jsonl
Normal file
File diff suppressed because one or more lines are too long
1
2023-04-13_00-00-57.jsonl
Normal file
1
2023-04-13_00-00-57.jsonl
Normal file
@ -0,0 +1 @@
|
||||
{"code": "\nclass Solution():\n\n def minReverseOperations(self, n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr\\xa0of length n where all positions are set to 0's, except position p which is set to 1.\\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\\n A subarray is a contiguous non-empty sequence of elements within an array.\\n The values of ans[i] are independent for all i's.\\n The reverse of an array is an array containing the values in reverse order.\\n \"\n from typing import List\n arr = ([0] * n)\n arr[p] = 1\n for i in banned:\n arr[i] = (- 1)\n\n def reverse_subarray(subarr):\n for i in range(len(subarr)):\n if (subarr[i] == (- 1)):\n return False\n subarr.reverse()\n return True\n ans = ([(- 1)] * n)\n for i in range(n):\n if (arr[i] == (- 1)):\n continue\n subarr = arr[max(0, ((i - k) + 1)):(i + 1)]\n while (len(subarr) < k):\n subarr.insert(0, 0)\n while (len(subarr) > k):\n subarr.pop(0)\n count = 0\n while (not reverse_subarray(subarr)):\n count += 1\n if (count > n):\n return ([(- 1)] * n)\n ans[i] = count\n return ans\n", "status": "Submission Timed-Out", "reward": false, "info": {"state": "STARTED"}}
|
3
2023-04-13_21-02-29.jsonl
Normal file
3
2023-04-13_21-02-29.jsonl
Normal file
File diff suppressed because one or more lines are too long
@ -1,6 +1,48 @@
|
||||
|
||||
def timeout_handler(_, __):
|
||||
raise TimeoutError()
|
||||
|
||||
import os, json
|
||||
def to_jsonl(dict_data, file_path):
|
||||
with open(file_path, 'a') as file:
|
||||
json_line = json.dumps(dict_data)
|
||||
file.write(json_line + os.linesep)
|
||||
|
||||
from threading import Thread
|
||||
class PropagatingThread(Thread):
|
||||
def run(self):
|
||||
self.exc = None
|
||||
try:
|
||||
if hasattr(self, '_Thread__target'):
|
||||
# Thread uses name mangling prior to Python 3.
|
||||
self.ret = self._Thread__target(*self._Thread__args, **self._Thread__kwargs)
|
||||
else:
|
||||
self.ret = self._target(*self._args, **self._kwargs)
|
||||
except BaseException as e:
|
||||
self.exc = e
|
||||
|
||||
def join(self, timeout=None):
|
||||
super(PropagatingThread, self).join(timeout)
|
||||
if self.exc:
|
||||
raise self.exc
|
||||
return self.ret
|
||||
|
||||
|
||||
def function_with_timeout(func, args, timeout):
|
||||
result_container = []
|
||||
|
||||
def wrapper():
|
||||
result_container.append(func(*args))
|
||||
|
||||
thread = PropagatingThread(target=wrapper)
|
||||
thread.start()
|
||||
thread.join(timeout)
|
||||
|
||||
if thread.is_alive():
|
||||
raise TimeoutError()
|
||||
else:
|
||||
return result_container[0]
|
||||
|
||||
# Py tests
|
||||
|
||||
# if __name__ == "__main__":
|
||||
|
@ -9,16 +9,17 @@ from .leetcode_env.leetcode_env.utils import PySubmissionFormatter, RsSubmission
|
||||
def executor_factory(lang: str, is_leet: bool = False) -> Executor:
|
||||
if lang == "py" or lang == "python":
|
||||
if is_leet:
|
||||
print("Using LeetCode Python executor")
|
||||
return LeetExecutor(ProgrammingLanguage.PYTHON3,
|
||||
PyExecutor(),
|
||||
PySubmissionFormatter())
|
||||
PySubmissionFormatter)
|
||||
else:
|
||||
return PyExecutor()
|
||||
elif lang == "rs" or lang == "rust":
|
||||
if is_leet:
|
||||
return LeetExecutor(ProgrammingLanguage.RUST,
|
||||
RsExecutor(),
|
||||
RsSubmissionFormatter())
|
||||
RsSubmissionFormatter)
|
||||
else:
|
||||
return RsExecutor()
|
||||
else:
|
||||
|
@ -1,23 +1,32 @@
|
||||
from typing import List
|
||||
|
||||
from .executor_types import ExecuteResult, Executor
|
||||
from .executor_utils import to_jsonl
|
||||
from .leetcode_env.leetcode_env.utils import SubmissionFormatter
|
||||
|
||||
from .leetcode_env.leetcode_env.environment import LeetCodeEnv
|
||||
from .leetcode_env.leetcode_env.leetcode_types import ProgrammingLanguage, LeetCodeSubmission
|
||||
from .leetcode_env.leetcode_env.utils import id_from_slug
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
class LeetExecutor(Executor):
|
||||
def __init__(self, lang: ProgrammingLanguage, executor: Executor, formatter: SubmissionFormatter):
|
||||
self.lang = lang
|
||||
self.executor = executor
|
||||
self.formatter = formatter
|
||||
self.env = LeetCodeEnv()
|
||||
self.name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
||||
|
||||
def execute(self, func: str, tests: List[str], timeout: int = 5) -> ExecuteResult:
|
||||
return self.executor.execute(func, tests, timeout)
|
||||
|
||||
def evaluate(self, name: str, func: str, test: str, timeout: int = 5) -> bool:
|
||||
print(f'Timeout is {timeout} seconds')
|
||||
leetcode_formatted_func = self.formatter.to_leetcode(func)
|
||||
print('----------------- LEETCODE SUBMISSION ------------------')
|
||||
print(leetcode_formatted_func)
|
||||
print('--------------------------------------------------------')
|
||||
submission = LeetCodeSubmission(
|
||||
code = self.formatter.to_leetcode(func),
|
||||
lang = self.lang,
|
||||
@ -26,7 +35,23 @@ class LeetExecutor(Executor):
|
||||
timeout = timeout
|
||||
)
|
||||
|
||||
_, reward, _, _ = self.env.step(submission)
|
||||
status, reward, _, info = self.env.step(submission)
|
||||
|
||||
print('----------------- LEETCODE EVALUATION ------------------')
|
||||
print(status)
|
||||
print(info)
|
||||
print('--------------------------------------------------------')
|
||||
|
||||
to_jsonl({
|
||||
'code': leetcode_formatted_func,
|
||||
'status': status,
|
||||
'reward': reward,
|
||||
'info': info
|
||||
},
|
||||
f'{self.name}.jsonl'
|
||||
)
|
||||
|
||||
|
||||
|
||||
return reward
|
||||
|
||||
|
@ -2,7 +2,7 @@ import ast
|
||||
import signal
|
||||
import astunparse
|
||||
|
||||
from .executor_utils import timeout_handler
|
||||
from .executor_utils import timeout_handler, function_with_timeout
|
||||
|
||||
from typing import List
|
||||
from .executor_types import ExecuteResult, Executor
|
||||
@ -21,12 +21,14 @@ class PyExecutor(Executor):
|
||||
for i in range(num_tests):
|
||||
try:
|
||||
# Set the alarm
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(timeout)
|
||||
# signal.signal(signal.SIGALRM, timeout_handler)
|
||||
# signal.alarm(timeout)
|
||||
|
||||
function_with_timeout(exec, (func_test_list[i], globals()), timeout)
|
||||
|
||||
# Run the test and disable the alarm
|
||||
exec(func_test_list[i], globals())
|
||||
signal.alarm(0)
|
||||
# exec(func_test_list[i], globals())
|
||||
# signal.alarm(0)
|
||||
|
||||
success_tests += [tests[i]]
|
||||
except Exception:
|
||||
@ -66,12 +68,13 @@ check({name})
|
||||
"""
|
||||
try:
|
||||
# Set the alarm
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(timeout)
|
||||
# signal.signal(signal.SIGALRM, timeout_handler)
|
||||
# signal.alarm(timeout)
|
||||
function_with_timeout(exec, (code, globals()), timeout)
|
||||
|
||||
# Run the test and disable the alarm
|
||||
exec(code, globals())
|
||||
signal.alarm(0)
|
||||
# exec(code, globals())
|
||||
# signal.alarm(0)
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
@ -92,11 +95,13 @@ def get_output(func: str, assert_statement: str, timeout: int = 5) -> str:
|
||||
exec(func, globals())
|
||||
|
||||
# set the alarm
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(timeout)
|
||||
# signal.signal(signal.SIGALRM, timeout_handler)
|
||||
# signal.alarm(timeout)
|
||||
# Run the test and disable the alarm
|
||||
output = eval(func_call)
|
||||
signal.alarm(0)
|
||||
output = function_with_timeout(eval, (func_call,), timeout)
|
||||
# output = eval(func_call)
|
||||
# signal.alarm(0)
|
||||
|
||||
return output
|
||||
except TimeoutError:
|
||||
return "TIMEOUT"
|
||||
|
@ -9,8 +9,7 @@ from tenacity import (
|
||||
|
||||
from typing import Union, List, Optional, Callable
|
||||
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
def generic_generate_func_impl(
|
||||
func_sig: str,
|
||||
@ -38,10 +37,21 @@ def generic_generate_func_impl(
|
||||
if strategy == "reflexion":
|
||||
message = f"previous implementation:\n{prev_func_impl}\n\nunit tests:\n{feedback}\n\nhint:\n{self_reflection}\n\n# improved implementation\n{func_sig}"
|
||||
# func_bodies is a really bad name, as it can also be just 1 string
|
||||
print('----------------------- SYSTEM MESSAGE -----------------------')
|
||||
print(REFLEXION_CHAT_INSTRUCTION)
|
||||
print('----------------------------------------------')
|
||||
print(' ----------------------- USER MESSAGE -----------------------')
|
||||
print(message, flush=True)
|
||||
print('----------------------------------------------')
|
||||
func_bodies = gpt_chat(model, REFLEXION_CHAT_INSTRUCTION,
|
||||
message, num_comps=num_comps, temperature=temperature)
|
||||
else:
|
||||
print('----------------------- SYSTEM MESSAGE -----------------------')
|
||||
print(SIMPLE_CHAT_INSTRUCTION)
|
||||
print('----------------------------------------------')
|
||||
print(' ----------------------- USER MESSAGE -----------------------')
|
||||
print(func_sig, flush=True)
|
||||
print('----------------------------------------------')
|
||||
func_bodies = gpt_chat(model, SIMPLE_CHAT_INSTRUCTION if strategy ==
|
||||
"simple" else REFLEXION_CHAT_INSTRUCTION, func_sig, num_comps=num_comps, temperature=temperature)
|
||||
else:
|
||||
@ -56,8 +66,15 @@ def generic_generate_func_impl(
|
||||
|
||||
if num_comps == 1:
|
||||
assert isinstance(func_bodies, str)
|
||||
print('--------------------- GENERATED FUNC BODY ---------------------')
|
||||
print(func_sig + fix_body(func_bodies))
|
||||
print('------------------------------------------')
|
||||
return func_sig + fix_body(func_bodies)
|
||||
|
||||
else:
|
||||
print('--------------------- GENERATED FUNC BODY ---------------------')
|
||||
print([func_sig + fix_body(func_body) for func_body in func_bodies])
|
||||
print('------------------------------------------')
|
||||
return [func_sig + fix_body(func_body) for func_body in func_bodies]
|
||||
|
||||
|
||||
@ -78,7 +95,12 @@ def generic_generate_internal_tests(
|
||||
"""
|
||||
if model == "gpt-4" or model == "gpt-3.5-turbo":
|
||||
message = f'{TEST_GENERATION_FEW_SHOT}\n\nfunc signature:\n{func_sig}\nunit tests:'
|
||||
print('----------------------- SYSTEM MESSAGE -----------------------')
|
||||
print(TEST_GENERATION_CHAT_INSTRUCTION)
|
||||
print('----------------------------------------------')
|
||||
print(' ----------------------- USER MESSAGE -----------------------')
|
||||
print(message, flush=True)
|
||||
print('----------------------------------------------')
|
||||
output = gpt_chat(
|
||||
model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
|
||||
else:
|
||||
@ -95,6 +117,9 @@ def generic_generate_internal_tests(
|
||||
# cur_tests = ... # type: ignore
|
||||
|
||||
# cur_refinement_num += 1
|
||||
print('--------------- GENERATED TESTS: ---------------')
|
||||
print(valid_tests)
|
||||
print('------------------------------------------------')
|
||||
|
||||
return sample_n_random(valid_tests, max_num_tests)
|
||||
|
||||
@ -107,11 +132,20 @@ def generic_generate_self_reflection(
|
||||
SELF_REFLECTION_COMPLETION_INSTRUCTION: str,
|
||||
) -> str:
|
||||
if model == "gpt-4" or model == "gpt-3.5-turbo":
|
||||
print('----------------------- SYSTEM MESSAGE -----------------------')
|
||||
print(SELF_REFLECTION_CHAT_INSTRUCTION)
|
||||
print('----------------------------------------------')
|
||||
print(' ----------------------- USER MESSAGE -----------------------')
|
||||
print(f'{func}\n\n{feedback}\n\nExplanation:', flush=True)
|
||||
print('----------------------------------------------')
|
||||
reflection = gpt_chat(
|
||||
model, SELF_REFLECTION_CHAT_INSTRUCTION, f'{func}\n\n{feedback}\n\nExplanation:')
|
||||
else:
|
||||
reflection = gpt_completion(
|
||||
model, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
|
||||
print('--------------- GENERATED SELF REFLECTION: ---------------')
|
||||
print(reflection)
|
||||
print('----------------------------------------------------------')
|
||||
return reflection # type: ignore
|
||||
|
||||
|
||||
|
@ -3,11 +3,13 @@ from .generator_utils import generic_generate_func_impl, gpt_chat, gpt_completio
|
||||
|
||||
from typing import Optional, List, Union
|
||||
import ast
|
||||
import re
|
||||
|
||||
PY_SIMPLE_COMPLETION_INSTRUCTION = "# Write the body of this function only."
|
||||
PY_REFLEXION_COMPLETION_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only.\n\n-----"
|
||||
PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = "You are PythonGPT. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation.\n\n-----"
|
||||
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT. You will be given a function signature and docstring. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
|
||||
|
||||
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
|
||||
PY_REFLEXION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
|
||||
PY_SELF_REFLECTION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation."
|
||||
|
||||
@ -62,7 +64,7 @@ class PyGenerator(Generator):
|
||||
temperature: float = 0.0,
|
||||
) -> Union[str, List[str]]:
|
||||
x = generic_generate_func_impl(
|
||||
func_sig=f'from typing import *\n{func_sig}',
|
||||
func_sig=func_sig,
|
||||
model=model,
|
||||
strategy=strategy,
|
||||
prev_func_impl=prev_func_impl,
|
||||
@ -74,9 +76,8 @@ class PyGenerator(Generator):
|
||||
SIMPLE_CHAT_INSTRUCTION=PY_SIMPLE_CHAT_INSTRUCTION,
|
||||
REFLEXION_COMPLETION_INSTRUCTION=PY_REFLEXION_COMPLETION_INSTRUCTION,
|
||||
SIMPLE_COMPLETION_INSTRUCTION=PY_SIMPLE_COMPLETION_INSTRUCTION,
|
||||
fix_body=(lambda x: x) if strategy == "simple" else py_fix_indentation
|
||||
fix_body=fix_turbo_response if strategy == "simple" else py_fix_indentation
|
||||
)
|
||||
print(x, flush=True)
|
||||
return x
|
||||
|
||||
|
||||
@ -98,7 +99,6 @@ class PyGenerator(Generator):
|
||||
parse_tests=parse_tests,
|
||||
is_syntax_valid=py_is_syntax_valid,
|
||||
)
|
||||
print(x, flush=True)
|
||||
return x
|
||||
|
||||
|
||||
@ -118,8 +118,36 @@ def handle_entire_body_indent(func_body: str) -> str:
|
||||
res = "\n".join([" " + line for line in split])
|
||||
return res
|
||||
|
||||
def fix_turbo_response(func_body: str) -> str:
|
||||
return fix_markdown(remove_unindented_signatures(func_body))
|
||||
|
||||
def fix_markdown(func_body: str) -> str:
|
||||
return re.sub("`{3}", "", func_body)
|
||||
|
||||
def remove_unindented_signatures(code: str) -> str:
|
||||
regex = r"^def\s+\w+\s*\("
|
||||
|
||||
before_signature = []
|
||||
after_signature = []
|
||||
signature_found = False
|
||||
|
||||
for line in code.split("\n"):
|
||||
if re.match(regex, line):
|
||||
signature_found = True
|
||||
continue
|
||||
|
||||
if signature_found:
|
||||
after_signature.append(line)
|
||||
else:
|
||||
if not line.startswith(" ") and line.strip():
|
||||
line = " " + line
|
||||
before_signature.append(line)
|
||||
|
||||
return "\n".join(before_signature + after_signature)
|
||||
|
||||
|
||||
def py_fix_indentation(func_body: str) -> str:
|
||||
func_body = fix_turbo_response(func_body)
|
||||
"""
|
||||
3 cases:
|
||||
1. good syntax
|
||||
@ -127,6 +155,7 @@ def py_fix_indentation(func_body: str) -> str:
|
||||
3. entire body not good
|
||||
"""
|
||||
def parse_indent_rec(f_body: str, cur_state: int) -> str:
|
||||
f_body = fix_markdown(f_body)
|
||||
if cur_state > 1:
|
||||
return f_body
|
||||
code = f'{DUMMY_FUNC_SIG}\n{f_body}\n{DUMMY_FUNC_CALL}'
|
||||
|
3
main.py
3
main.py
@ -108,7 +108,8 @@ pass@k: {args.pass_at_k}
|
||||
pass_at_k=args.pass_at_k,
|
||||
log_path=log_path,
|
||||
verbose=args.verbose,
|
||||
expansion_factor=args.expansion_factor
|
||||
expansion_factor=args.expansion_factor,
|
||||
is_leetcode=args.is_leetcode
|
||||
)
|
||||
|
||||
print(f"Done! Check out the logs in `{log_path}`")
|
||||
|
@ -12,9 +12,10 @@ def run_reflexion(
|
||||
max_iters: int,
|
||||
pass_at_k: int,
|
||||
log_path: str,
|
||||
verbose: bool
|
||||
verbose: bool,
|
||||
is_leetcode: bool = False
|
||||
) -> None:
|
||||
exe = executor_factory(language)
|
||||
exe = executor_factory(language, is_leet=is_leetcode)
|
||||
gen = generator_factory(language)
|
||||
|
||||
print_v = make_printv(verbose)
|
||||
|
@ -45,9 +45,10 @@ def run_reflexion_ucs(
|
||||
pass_at_k: int,
|
||||
log_path: str,
|
||||
verbose: bool,
|
||||
expansion_factor: int
|
||||
expansion_factor: int,
|
||||
is_leetcode: bool = False
|
||||
) -> None:
|
||||
exe = executor_factory(language)
|
||||
exe = executor_factory(language, is_leet=is_leetcode)
|
||||
gen = generator_factory(language)
|
||||
|
||||
num_items = len(dataset)
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,3 @@
|
||||
{"task_id": "minimum-reverse-operations", "prompt": "def minReverseOperations(n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\"\"\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr of length n where all positions are set to 0's, except position p which is set to 1.\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\n A subarray is a contiguous non-empty sequence of elements within an array.\n The values of ans[i] are independent for all i's.\n The reverse of an array is an array containing the values in reverse order.\n \"\"\"\n", "entry_point": "minimum-reverse-operations", "cannonical_solution": "", "test": "", "solution": "def minReverseOperations(n: int, p: int, banned: List[int], k: int) -> List[int]:\n \"\"\"\n You are given an integer n and an integer p in the range [0, n - 1]. Representing a 0-indexed array arr of length n where all positions are set to 0's, except position p which is set to 1.\n You are also given an integer array banned containing some positions from the array. For the ith position in banned, arr[banned[i]] = 0, and banned[i] != p.\n You can perform multiple operations on arr. In an operation, you can choose a subarray with size k and reverse the subarray. However, the 1 in arr should never go to any of the positions in banned. In other words, after each operation arr[banned[i]] remains 0.\n Return an array ans where for each i from [0, n - 1], ans[i] is the minimum number of reverse operations needed to bring the 1 to position i in arr, or -1 if it is impossible.\n A subarray is a contiguous non-empty sequence of elements within an array.\n The values of ans[i] are independent for all i's.\n The reverse of an array is an array containing the values in reverse order.\n \"\"\"\n from typing import List\n\n ans = [-1] * n\n banned_set = set(banned)\n \n for i in range(n):\n if i == p or i in banned_set:\n continue\n \n distance = abs(i - p)\n if distance % k == 0:\n operations = distance // k\n valid = True\n \n for j in range(p, i, k if i > p else -k):\n if j in banned_set:\n valid = False\n break\n \n if valid:\n ans[i] = operations\n \n ans[p] = 0\n return ans", "is_solved": false}
|
||||
{"task_id": "collect-coins-in-a-tree", "prompt": "def collectTheCoins(coins: List[int], edges: List[List[int]]) -> int:\n \"\"\"\n There exists an undirected and unrooted tree with n nodes indexed from 0 to n - 1. You are given an integer n and a 2D integer array edges of length n - 1, where edges[i] = [ai, bi] indicates that there is an edge between nodes ai and bi in the tree. You are also given an array coins of size n where coins[i] can be either 0 or 1, where 1 indicates the presence of a coin in the vertex i.\n Initially, you choose to start at any vertex in the tree. Then, you can perform the following operations any number of times:\n Collect all the coins that are at a distance of at most 2 from the current vertex, or\n Move to any adjacent vertex in the tree.\n Find the minimum number of edges you need to go through to collect all the coins and go back to the initial vertex.\n Note that if you pass an edge several times, you need to count it into the answer several times.\n \"\"\"\n", "entry_point": "collect-coins-in-a-tree", "cannonical_solution": "", "test": "", "solution": "def collectTheCoins(coins: List[int], edges: List[List[int]]) -> int:\n \"\"\"\n There exists an undirected and unrooted tree with n nodes indexed from 0 to n - 1. You are given an integer n and a 2D integer array edges of length n - 1, where edges[i] = [ai, bi] indicates that there is an edge between nodes ai and bi in the tree. You are also given an array coins of size n where coins[i] can be either 0 or 1, where 1 indicates the presence of a coin in the vertex i.\n Initially, you choose to start at any vertex in the tree. Then, you can perform the following operations any number of times:\n Collect all the coins that are at a distance of at most 2 from the current vertex, or\n Move to any adjacent vertex in the tree.\n Find the minimum number of edges you need to go through to collect all the coins and go back to the initial vertex.\n Note that if you pass an edge several times, you need to count it into the answer several times.\n \"\"\"\n from collections import defaultdict\n from typing import List\n\n def dfs(node: int, parent: int) -> int:\n total = 0\n for neighbor in graph[node]:\n if neighbor != parent:\n total += dfs(neighbor, node)\n if coins[node] == 1:\n total += 1\n return total\n\n graph = defaultdict(list)\n for edge in edges:\n graph[edge[0]].append(edge[1])\n graph[edge[1]].append(edge[0])\n\n max_coins = 0\n for node in range(len(coins)):\n if coins[node] == 1:\n max_coins = max(max_coins, dfs(node, -1))\n\n return 2 * (max_coins - 1)", "is_solved": false}
|
||||
{"task_id": "minimum-time-to-visit-a-cell-in-a-grid", "prompt": "def minimumTime(grid: List[List[int]]) -> int:\n \"\"\"\n You are given a m x n matrix grid consisting of non-negative integers where grid[row][col] represents the minimum time required to be able to visit the cell (row, col), which means you can visit the cell (row, col) only when the time you visit it is greater than or equal to grid[row][col].\n You are standing in the top-left cell of the matrix in the 0th second, and you must move to any adjacent cell in the four directions: up, down, left, and right. Each move you make takes 1 second.\n Return the minimum time required in which you can visit the bottom-right cell of the matrix. If you cannot visit the bottom-right cell, then return -1.\n \"\"\"\n", "entry_point": "minimum-time-to-visit-a-cell-in-a-grid", "cannonical_solution": "", "test": "", "solution": "def minimumTime(grid: List[List[int]]) -> int:\n \"\"\"\n You are given a m x n matrix grid consisting of non-negative integers where grid[row][col] represents the minimum time required to be able to visit the cell (row, col), which means you can visit the cell (row, col) only when the time you visit it is greater than or equal to grid[row][col].\n You are standing in the top-left cell of the matrix in the 0th second, and you must move to any adjacent cell in the four directions: up, down, left, and right. Each move you make takes 1 second.\n Return the minimum time required in which you can visit the bottom-right cell of the matrix. If you cannot visit the bottom-right cell, then return -1.\n \"\"\"\n from heapq import heappush, heappop\n from typing import List\n\n m, n = len(grid), len(grid[0])\n visited = [[False for _ in range(n)] for _ in range(m)]\n directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]\n queue = [(grid[0][0], 0, 0)]\n\n while queue:\n time, x, y = heappop(queue)\n if x == m - 1 and y == n - 1:\n return time\n for dx, dy in directions:\n nx, ny = x + dx, y + dy\n if 0 <= nx < m and 0 <= ny < n and not visited[nx][ny]:\n visited[nx][ny] = True\n heappush(queue, (max(time, grid[nx][ny]), nx, ny))\n\n return -1", "is_solved": false}
|
11
run_reflexion_py_leet.sh
Executable file
11
run_reflexion_py_leet.sh
Executable file
@ -0,0 +1,11 @@
|
||||
python main.py \
|
||||
--run_name "reflexion_leetcode_python3_gpt4" \
|
||||
--root_dir "root" \
|
||||
--dataset_path ./executors/leetcode_env/leetcode_dataset/data/humaneval/leetcode-hard-py-40-uncontaminated.jsonl \
|
||||
--strategy "reflexion" \
|
||||
--language "py" \
|
||||
--model "gpt-4" \
|
||||
--pass_at_k "1" \
|
||||
--max_iters "5" \
|
||||
--is_leetcode \
|
||||
--verbose
|
@ -1,5 +1,5 @@
|
||||
python main.py \
|
||||
--run_name "simple_leetcode_python3" \
|
||||
--run_name "simple_leetcode_python3_gpt4" \
|
||||
--root_dir "root" \
|
||||
--dataset_path ./executors/leetcode_env/leetcode_dataset/data/humaneval/leetcode-hard-py-40-uncontaminated.jsonl \
|
||||
--strategy "simple" \
|
||||
@ -7,4 +7,5 @@ python main.py \
|
||||
--model "gpt-4" \
|
||||
--pass_at_k "1" \
|
||||
--max_iters "1" \
|
||||
--is_leetcode \
|
||||
--verbose
|
||||
|
@ -13,9 +13,10 @@ def run_simple(
|
||||
language: str,
|
||||
pass_at_k: int,
|
||||
log_path: str,
|
||||
verbose: bool
|
||||
verbose: bool,
|
||||
is_leetcode: bool = False
|
||||
) -> None:
|
||||
exe = executor_factory(language)
|
||||
exe = executor_factory(language, is_leet=is_leetcode)
|
||||
gen = generator_factory(language)
|
||||
|
||||
print_v = make_printv(verbose)
|
||||
@ -29,7 +30,7 @@ def run_simple(
|
||||
while cur_pass < pass_at_k:
|
||||
cur_func_impl = gen.func_impl(item["prompt"], model, "simple")
|
||||
assert isinstance(cur_func_impl, str)
|
||||
is_passing = exe.evaluate(item["entry_point"], cur_func_impl, item["test"], timeout=10)
|
||||
is_passing = exe.evaluate(item["entry_point"], cur_func_impl, item["test"], timeout = 20 if is_leetcode else 10)
|
||||
if is_passing:
|
||||
is_solved = True
|
||||
num_success += 1
|
||||
|
@ -11,9 +11,10 @@ def run_test_acc(
|
||||
language: str,
|
||||
pass_at_k: int,
|
||||
log_path: str,
|
||||
verbose: bool
|
||||
verbose: bool,
|
||||
is_leetcode: bool = False
|
||||
) -> None:
|
||||
exe = executor_factory(language)
|
||||
exe = executor_factory(language, is_leet=is_leetcode)
|
||||
gen = generator_factory(language)
|
||||
|
||||
print_v = make_printv(verbose)
|
||||
|
Loading…
Reference in New Issue
Block a user