|
|
|
@ -79,7 +79,17 @@
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"source": [
|
|
|
|
|
"## Building an evaluation for the OpenAI Evals framework\n"
|
|
|
|
|
"## Building an evaluation for the OpenAI Evals framework\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"To start creating an eval, we need\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"1/ The test dataset in the JSONL format.\n",
|
|
|
|
|
"2/ The eval template to be used\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"### Creating the eval dataset\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"format\n",
|
|
|
|
|
"`\"input\": [{\"role\": \"system\", \"content\": \"<input prompt>\",\"name\":\"example-user\"}, \"ideal\": \"correct answer\"]`"
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": false
|
|
|
|
@ -97,7 +107,14 @@
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"source": [
|
|
|
|
|
"## Running an evaluation"
|
|
|
|
|
"## Running an evaluation\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"we can run this eval using the oaieval CLI like this\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"pip install .\n",
|
|
|
|
|
"oaieval gpt-3.5-turbo <name of eval>\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"### Going through eval logs"
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": false
|
|
|
|
|