|
|
|
@ -18,21 +18,6 @@ def count_test_cases(test_str: str) -> int:
|
|
|
|
|
# dumb way to do this but works
|
|
|
|
|
return test_str.count("assert")
|
|
|
|
|
|
|
|
|
|
# from executors.py_executor import py_evaluate
|
|
|
|
|
# def validate_py_results(log_path: str):
|
|
|
|
|
# if not log_path.endswith(".jsonl"):
|
|
|
|
|
# raise ValueError("Please provide a valid log file")
|
|
|
|
|
# data = read_jsonl(log_path)
|
|
|
|
|
# num_success = 0
|
|
|
|
|
# for i, item in enumerate(data[117:122]):
|
|
|
|
|
# is_passing = py_evaluate(item["entry_point"], item["solution"], item["test"])
|
|
|
|
|
# if is_passing:
|
|
|
|
|
# print(f"Test {i}: {green_text('PASS')}")
|
|
|
|
|
# num_success += 1
|
|
|
|
|
# else:
|
|
|
|
|
# print(f"Test {i}: {red_text('FAIL')}")
|
|
|
|
|
# print(f"Summary: {num_success}/{len(data)} tests passed")
|
|
|
|
|
# print(f"Acc: {round(num_success/len(data), 2)} tests passed")
|
|
|
|
|
|
|
|
|
|
def validate_py_results(log_path: str):
|
|
|
|
|
if not log_path.endswith(".jsonl"):
|
|
|
|
@ -41,7 +26,6 @@ def validate_py_results(log_path: str):
|
|
|
|
|
num_success = 0
|
|
|
|
|
for i, item in enumerate(data):
|
|
|
|
|
if item["is_solved"]:
|
|
|
|
|
# func_impl = item["prompt"] + item["solution"]
|
|
|
|
|
func_impl = item["solution"]
|
|
|
|
|
code = f'{item["prompt"]}{func_impl}\n\n{item["test"]}\n\ncheck({item["entry_point"]})'
|
|
|
|
|
num_tests = count_test_cases(item["test"])
|
|
|
|
|