added eval code

2 years ago · 6d98aef302
parent 4e8e7e7300
commit 6d98aef302
6 changed files with 89 additions and 1 deletions
--- a/configs/eval/generate_baseline.yaml
+++ b/configs/eval/generate_baseline.yaml
@ -0,0 +1,17 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "tloen/alpaca-lora-7b"
+
+
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
--- a/configs/eval/generate_full.yaml
+++ b/configs/eval/generate_full.yaml
@ -0,0 +1,14 @@
+# model/tokenizer
+model_name: "nomic-ai/vicuna-full-multi-turn_epoch_0"
+tokenizer_name: "zpn/llama-7b"
+lora_path: "no-lora"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
--- a/configs/eval/generate_large_2.yaml
+++ b/configs/eval/generate_large_2.yaml
@ -0,0 +1,15 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_2"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
--- a/configs/eval/generate_large_3.yaml
+++ b/configs/eval/generate_large_3.yaml
@ -0,0 +1,15 @@
+# model/tokenizer
+model_name: "zpn/llama-7b"
+tokenizer_name: "zpn/llama-7b"
+lora: true
+lora_path: "nomic-ai/vicuna-lora-multi-turn_epoch_3"
+
+max_new_tokens: 512
+temperature: 0.001
+prompt: | 
+  #this code prints a string reversed
+  my_string = "hello how are you"
+  print(len(my_string))
+
+
+  My code above does not work. Can you help me?
--- a/eval_figures.py
+++ b/eval_figures.py
@ -0,0 +1,22 @@
+import glob
+import pickle
+import numpy as np
+from matplotlib import pyplot as plt
+
+plt.figure()
+for fpath in glob.glob('./eval_data/*multi*.pkl'):
+    parts = fpath.split('__')
+    model_name = parts[1].replace('model-', '').replace('.pkl', '')
+    lora_name = parts[2].replace('lora-', '').replace('.pkl', '')
+    with open(fpath, 'rb') as f:
+        data = pickle.load(f)
+        perplexities = data['perplexities']
+        perplexities = np.nan_to_num(perplexities, 100)
+        perplexities = np.clip(perplexities, 0, 100)
+        plt.hist(perplexities, label='{}-{}'.format(model_name, lora_name), alpha=.5)
+
+plt.xlabel('Perplexity')
+plt.ylabel('Frequency')
+plt.legend()
+plt.savefig('figs/perplexity_hist.png')
+
--- a/eval_self_instruct.py
+++ b/eval_self_instruct.py
@ -8,6 +8,11 @@ from argparse import ArgumentParser
 from peft import PeftModelForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer

+'''
+Evaluates perplexity on the outputs of:
+https://github.com/yizhongw/self-instruct/blob/main/human_eval/user_oriented_instructions.jsonl
+'''
+
 def read_jsonl_file(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
@ -47,7 +52,7 @@ def eval_example(model, tokenizer, example, config):
    continuations = []
    tokenized_continuations = []
    trajectories = []
-    for i in range(3):
+    for i in range(1):
        with torch.no_grad():
            outputs = model.generate(input_ids=input['input_ids'],
                                     max_new_tokens=config["max_new_tokens"],