fix: clean, print num rows

pull/1/head
Zach Nussbaum 2 years ago
parent dfee6963a1
commit 10db136a88

@ -8,7 +8,7 @@ import pandas as pd
prompt_generation_dir = "raw_data_sanity_cleaned_delobotomized"
for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")):
if "clean" in file:
if "clean.jsonl" in file:
continue
data = []
print(file)
@ -69,5 +69,5 @@ for file in glob.glob(os.path.join(prompt_generation_dir, "*.jsonl")):
print(f"Removed {prev_len - curr_len} rows")
clean_name = file.split(".jsonl")[0] + "_clean.jsonl"
print(f"writing to {clean_name}")
print(f"writing to {curr_len} rows to {clean_name}")
df.to_json(clean_name, orient="records", lines=True)
Loading…
Cancel
Save