2023-03-28 18:47:38 +00:00
|
|
|
import glob
|
|
|
|
import pickle
|
|
|
|
import numpy as np
|
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
|
|
plt.figure()
|
2023-03-28 20:22:48 +00:00
|
|
|
for fpath in glob.glob('./eval_data/*.pkl'):
|
2023-03-28 18:47:38 +00:00
|
|
|
parts = fpath.split('__')
|
|
|
|
model_name = parts[1].replace('model-', '').replace('.pkl', '')
|
|
|
|
lora_name = parts[2].replace('lora-', '').replace('.pkl', '')
|
|
|
|
with open(fpath, 'rb') as f:
|
|
|
|
data = pickle.load(f)
|
|
|
|
perplexities = data['perplexities']
|
|
|
|
perplexities = np.nan_to_num(perplexities, 100)
|
|
|
|
perplexities = np.clip(perplexities, 0, 100)
|
2023-03-28 20:22:48 +00:00
|
|
|
if 'nomic' in fpath:
|
|
|
|
label = 'GPT4all-lora'
|
|
|
|
else:
|
|
|
|
label = 'alpaca-lora'
|
|
|
|
plt.hist(perplexities, label=label, alpha=.5)
|
2023-03-28 18:47:38 +00:00
|
|
|
|
|
|
|
plt.xlabel('Perplexity')
|
|
|
|
plt.ylabel('Frequency')
|
|
|
|
plt.legend()
|
|
|
|
plt.savefig('figs/perplexity_hist.png')
|
|
|
|
|