diff --git a/validate_py_results.py b/validate_py_results.py index e59bd2e..7be763a 100644 --- a/validate_py_results.py +++ b/validate_py_results.py @@ -18,21 +18,6 @@ def count_test_cases(test_str: str) -> int: # dumb way to do this but works return test_str.count("assert") -# from executors.py_executor import py_evaluate -# def validate_py_results(log_path: str): - # if not log_path.endswith(".jsonl"): - # raise ValueError("Please provide a valid log file") - # data = read_jsonl(log_path) - # num_success = 0 - # for i, item in enumerate(data[117:122]): - # is_passing = py_evaluate(item["entry_point"], item["solution"], item["test"]) - # if is_passing: - # print(f"Test {i}: {green_text('PASS')}") - # num_success += 1 - # else: - # print(f"Test {i}: {red_text('FAIL')}") - # print(f"Summary: {num_success}/{len(data)} tests passed") - # print(f"Acc: {round(num_success/len(data), 2)} tests passed") def validate_py_results(log_path: str): if not log_path.endswith(".jsonl"): @@ -41,7 +26,6 @@ def validate_py_results(log_path: str): num_success = 0 for i, item in enumerate(data): if item["is_solved"]: - # func_impl = item["prompt"] + item["solution"] func_impl = item["solution"] code = f'{item["prompt"]}{func_impl}\n\n{item["test"]}\n\ncheck({item["entry_point"]})' num_tests = count_test_cases(item["test"])