mirror of
https://github.com/GammaTauAI/reflexion-human-eval
synced 2024-11-16 00:12:59 +00:00
56 lines
1.5 KiB
Python
56 lines
1.5 KiB
Python
import os
|
|
import gzip
|
|
import json
|
|
import openai
|
|
import jsonlines
|
|
|
|
from typing import List
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
def read_jsonl(path: str) -> List[dict]:
|
|
if not os.path.exists(path):
|
|
raise FileNotFoundError(f"File `{path}` does not exist.")
|
|
elif not path.endswith(".jsonl"):
|
|
raise ValueError(f"File `{path}` is not a jsonl file.")
|
|
items = []
|
|
with jsonlines.open(path) as reader:
|
|
for item in reader:
|
|
items += [item]
|
|
return items
|
|
|
|
|
|
def write_jsonl(path: str, data: List[dict], append: bool = False):
|
|
with jsonlines.open(path, mode='a' if append else 'w') as writer:
|
|
for item in data:
|
|
writer.write(item)
|
|
|
|
|
|
def read_jsonl_gz(path: str) -> List[dict]:
|
|
if not path.endswith(".jsonl.gz"):
|
|
raise ValueError(f"File `{path}` is not a jsonl.gz file.")
|
|
with gzip.open(path, "rt") as f:
|
|
data = [json.loads(line) for line in f]
|
|
return data
|
|
|
|
|
|
# generator that returns the item and the index in the dataset.
|
|
# if the results_path exists, it will skip all items that have been processed
|
|
# before.
|
|
def enumerate_resume(dataset, results_path):
|
|
if not os.path.exists(results_path):
|
|
for i, item in enumerate(dataset):
|
|
yield i, item
|
|
else:
|
|
count = 0
|
|
with jsonlines.open(results_path) as reader:
|
|
for item in reader:
|
|
count += 1
|
|
|
|
for i, item in enumerate(dataset):
|
|
# skip items that have been processed before
|
|
if i < count:
|
|
continue
|
|
yield i, item
|