bad logging fixed

This commit is contained in:
Noah Shinn 2023-03-28 23:01:45 -04:00
parent f7ad50a731
commit 700742c0b6
2 changed files with 23 additions and 5 deletions

View File

@ -92,8 +92,9 @@ def run_reflexion_ucs(
# if solved, exit early
if is_passing:
debug_print("solved at first attempt")
is_solved = True
num_success += 1
code = item["prompt"] + cur_func_impl
is_solved = evaluate(item["prompt"], code, item["test"])
num_success += 1 if is_solved else 0
break
reflection = self_reflection_generator(

View File

@ -18,6 +18,22 @@ def count_test_cases(test_str: str) -> int:
# dumb way to do this but works
return test_str.count("assert")
# from executors.py_executor import py_evaluate
# def validate_py_results(log_path: str):
# if not log_path.endswith(".jsonl"):
# raise ValueError("Please provide a valid log file")
# data = read_jsonl(log_path)
# num_success = 0
# for i, item in enumerate(data[117:122]):
# is_passing = py_evaluate(item["entry_point"], item["solution"], item["test"])
# if is_passing:
# print(f"Test {i}: {green_text('PASS')}")
# num_success += 1
# else:
# print(f"Test {i}: {red_text('FAIL')}")
# print(f"Summary: {num_success}/{len(data)} tests passed")
# print(f"Acc: {round(num_success/len(data), 2)} tests passed")
def validate_py_results(log_path: str):
if not log_path.endswith(".jsonl"):
raise ValueError("Please provide a valid log file")
@ -25,8 +41,9 @@ def validate_py_results(log_path: str):
num_success = 0
for i, item in enumerate(data):
if item["is_solved"]:
func_impl = item["prompt"] + item["solution"]
code = f'{func_impl}\n\n{item["test"]}\n\ncheck({item["entry_point"]})'
# func_impl = item["prompt"] + item["solution"]
func_impl = item["solution"]
code = f'{item["prompt"]}{func_impl}\n\n{item["test"]}\n\ncheck({item["entry_point"]})'
num_tests = count_test_cases(item["test"])
try:
def handler(signum, frame):
@ -41,7 +58,7 @@ def validate_py_results(log_path: str):
print(f"Test {i}: {green_text_out}")
num_success += 1
except Exception:
red_text_out = red_text(f"failed!")
red_text_out = red_text(f"failed but should have passed!")
print(f"Test {i}: {red_text_out}")
else:
red_text_out = red_text(f"failed!")