diff --git a/examples/How_to_finetune_chat_models.ipynb b/examples/How_to_finetune_chat_models.ipynb new file mode 100644 index 00000000..c368790b --- /dev/null +++ b/examples/How_to_finetune_chat_models.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7bcdad0e-b67c-4927-b00e-3a4d950cd8ce", + "metadata": {}, + "source": [ + "# How to fine-tune chat models\n", + "\n", + "This notebook provides a step-by-step guide for our new `gpt-3.5-turbo` fine-tuning. We'll perform entity extraction using the [RecipeNLG dataset](https://github.com/Glorf/recipenlg), which provides various recipes and a list of extracted generic ingredients for each. This is a common dataset for named entity recognition (NER) tasks.\n", + "\n", + "We will go through the following steps:\n", + "\n", + "1. **Setup:** Loading our dataset and filtering down to one domain to fine-tune on.\n", + "2. **Data preparation:** Preparing your data for fine-tuning by creating training and validation examples, and uploading them to the `Files` endpoint.\n", + "3. **Create the fine-tune:** Creating your fine-tuned model.\n", + "4. **Use model for inference:** Using your fine-tuned model for inference on new inputs.\n", + "\n", + "By the end of this you should be able to train, evaluate and deploy a fine-tuned `gpt-3.5-turbo` model.\n" + ] + }, + { + "cell_type": "markdown", + "id": "6f49cb10-f895-41f4-aa97-da606d0084d4", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "First we will import any required libraries and prepare our data.\n", + "\n", + "Fine tuning works best when focused on a particular domain. It's important to make sure your dataset is both focused enough for the model to learn, but general enough that unseen examples won't be missed. Having this in mind, we have already extracted a subset from the RecipesNLG dataset to only contain documents from www.cookbooks.com.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32036e70", + "metadata": {}, + "outputs": [], + "source": [ + "# make sure to use the latest version of the openai python package\n", + "!pip install --upgrade openai " + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "6e1f4403-37e1-4115-a215-12fd7daa1eb6", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import openai\n", + "import os\n", + "import pandas as pd\n", + "import requests\n", + "from pprint import pprint\n", + "\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f57ebc23-14b7-47f9-90b8-1d791ccfc9bc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | title | \n", + "ingredients | \n", + "directions | \n", + "link | \n", + "source | \n", + "NER | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "No-Bake Nut Cookies | \n", + "[\"1 c. firmly packed brown sugar\", \"1/2 c. eva... | \n", + "[\"In a heavy 2-quart saucepan, mix brown sugar... | \n", + "www.cookbooks.com/Recipe-Details.aspx?id=44874 | \n", + "www.cookbooks.com | \n", + "[\"brown sugar\", \"milk\", \"vanilla\", \"nuts\", \"bu... | \n", + "
1 | \n", + "Jewell Ball'S Chicken | \n", + "[\"1 small jar chipped beef, cut up\", \"4 boned ... | \n", + "[\"Place chipped beef on bottom of baking dish.... | \n", + "www.cookbooks.com/Recipe-Details.aspx?id=699419 | \n", + "www.cookbooks.com | \n", + "[\"beef\", \"chicken breasts\", \"cream of mushroom... | \n", + "
2 | \n", + "Creamy Corn | \n", + "[\"2 (16 oz.) pkg. frozen corn\", \"1 (8 oz.) pkg... | \n", + "[\"In a slow cooker, combine all ingredients. C... | \n", + "www.cookbooks.com/Recipe-Details.aspx?id=10570 | \n", + "www.cookbooks.com | \n", + "[\"frozen corn\", \"cream cheese\", \"butter\", \"gar... | \n", + "
3 | \n", + "Chicken Funny | \n", + "[\"1 large whole chicken\", \"2 (10 1/2 oz.) cans... | \n", + "[\"Boil and debone chicken.\", \"Put bite size pi... | \n", + "www.cookbooks.com/Recipe-Details.aspx?id=897570 | \n", + "www.cookbooks.com | \n", + "[\"chicken\", \"chicken gravy\", \"cream of mushroo... | \n", + "
4 | \n", + "Reeses Cups(Candy) | \n", + "[\"1 c. peanut butter\", \"3/4 c. graham cracker ... | \n", + "[\"Combine first four ingredients and press in ... | \n", + "www.cookbooks.com/Recipe-Details.aspx?id=659239 | \n", + "www.cookbooks.com | \n", + "[\"peanut butter\", \"graham cracker crumbs\", \"bu... | \n", + "
\n", - " | title | \n", - "ingredients | \n", - "directions | \n", - "link | \n", - "source | \n", - "NER | \n", - "
---|---|---|---|---|---|---|
0 | \n", - "No-Bake Nut Cookies | \n", - "[\"1 c. firmly packed brown sugar\", \"1/2 c. eva... | \n", - "[\"In a heavy 2-quart saucepan, mix brown sugar... | \n", - "www.cookbooks.com/Recipe-Details.aspx?id=44874 | \n", - "www.cookbooks.com | \n", - "[\"brown sugar\", \"milk\", \"vanilla\", \"nuts\", \"bu... | \n", - "
1 | \n", - "Jewell Ball'S Chicken | \n", - "[\"1 small jar chipped beef, cut up\", \"4 boned ... | \n", - "[\"Place chipped beef on bottom of baking dish.... | \n", - "www.cookbooks.com/Recipe-Details.aspx?id=699419 | \n", - "www.cookbooks.com | \n", - "[\"beef\", \"chicken breasts\", \"cream of mushroom... | \n", - "
2 | \n", - "Creamy Corn | \n", - "[\"2 (16 oz.) pkg. frozen corn\", \"1 (8 oz.) pkg... | \n", - "[\"In a slow cooker, combine all ingredients. C... | \n", - "www.cookbooks.com/Recipe-Details.aspx?id=10570 | \n", - "www.cookbooks.com | \n", - "[\"frozen corn\", \"cream cheese\", \"butter\", \"gar... | \n", - "
3 | \n", - "Chicken Funny | \n", - "[\"1 large whole chicken\", \"2 (10 1/2 oz.) cans... | \n", - "[\"Boil and debone chicken.\", \"Put bite size pi... | \n", - "www.cookbooks.com/Recipe-Details.aspx?id=897570 | \n", - "www.cookbooks.com | \n", - "[\"chicken\", \"chicken gravy\", \"cream of mushroo... | \n", - "
4 | \n", - "Reeses Cups(Candy) | \n", - "[\"1 c. peanut butter\", \"3/4 c. graham cracker ... | \n", - "[\"Combine first four ingredients and press in ... | \n", - "www.cookbooks.com/Recipe-Details.aspx?id=659239 | \n", - "www.cookbooks.com | \n", - "[\"peanut butter\", \"graham cracker crumbs\", \"bu... | \n", - "
\n", - " | input | \n", - "actual | \n", - "prediction | \n", - "score | \n", - "
---|---|---|---|---|
0 | \n", - "Title: Pretzel Candy\\n\\nIngredients: [\"1 lb. w... | \n", - "[\"white chocolate\", \"pretzel sticks\", \"peanuts\"] | \n", - "[\"white chocolate\", \"pretzel sticks\", \"salted ... | \n", - "0.666667 | \n", - "
1 | \n", - "Title: Salmon Party Ball\\n\\nIngredients: [\"8 o... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "0.777778 | \n", - "
2 | \n", - "Title: Fancy Fried Green Tomatoes\\n\\nIngredien... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "0.800000 | \n", - "
3 | \n", - "Title: Potluck Potatoes\\n\\nIngredients: [\"1 (2... | \n", - "[\"frozen hash brown potatoes\", \"onions\", \"salt... | \n", - "[\"frozen hash brown potatoes\", \"onions\", \"salt... | \n", - "1.000000 | \n", - "
4 | \n", - "Title: Old-Fashioned Sweet-Sour Cole Slaw\\n\\nI... | \n", - "[\"shredded green cabbage\", \"salt\", \"sugar\", \"c... | \n", - "[\"shredded green cabbage\", \"salt\", \"sugar\", \"c... | \n", - "1.000000 | \n", - "
5 | \n", - "Title: Peanut Brittle\\n\\nIngredients: [\"3 c. w... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "0.857143 | \n", - "
6 | \n", - "Title: Chicken Inspiration\\n\\nIngredients: [\"1... | \n", - "[\"chicken breast\", \"fresh mushrooms\", \"Provolo... | \n", - "[\"chicken breast\", \"mushrooms\", \"Provolone che... | \n", - "0.636364 | \n", - "
7 | \n", - "Title: Down East Blueberry Cake\\n\\nIngredients... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "1.000000 | \n", - "
8 | \n", - "Title: Cranberry-Pecan Bars\\n\\nIngredients: [\"... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "0.833333 | \n", - "
9 | \n", - "Title: Stacked Twinkie Cake\\n\\nIngredients: [\"... | \n", - "[\"coconut\", \"Twinkies\", \"pineapple\", \"pecans\",... | \n", - "[\"coconut\", \"instant French vanilla pudding\", ... | \n", - "0.600000 | \n", - "
\n", - " | input | \n", - "actual | \n", - "prediction | \n", - "score | \n", - "
---|---|---|---|---|
0 | \n", - "Title: Pretzel Candy\\n\\nIngredients: [\"1 lb. w... | \n", - "[\"white chocolate\", \"pretzel sticks\", \"peanuts\"] | \n", - "[\"white chocolate\", \"pretzel sticks\", \"salted ... | \n", - "0.666667 | \n", - "
1 | \n", - "Title: Salmon Party Ball\\n\\nIngredients: [\"8 o... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "0.888889 | \n", - "
2 | \n", - "Title: Fancy Fried Green Tomatoes\\n\\nIngredien... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "0.800000 | \n", - "
3 | \n", - "Title: Potluck Potatoes\\n\\nIngredients: [\"1 (2... | \n", - "[\"frozen hash brown potatoes\", \"onions\", \"salt... | \n", - "[\"hash brown potatoes\", \"onions\", \"salt\", \"pep... | \n", - "0.900000 | \n", - "
4 | \n", - "Title: Old-Fashioned Sweet-Sour Cole Slaw\\n\\nI... | \n", - "[\"shredded green cabbage\", \"salt\", \"sugar\", \"c... | \n", - "[\"green cabbage\", \"salt\", \"sugar\", \"cider vine... | \n", - "0.800000 | \n", - "
5 | \n", - "Title: Peanut Brittle\\n\\nIngredients: [\"3 c. w... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "0.857143 | \n", - "
6 | \n", - "Title: Chicken Inspiration\\n\\nIngredients: [\"1... | \n", - "[\"chicken breast\", \"fresh mushrooms\", \"Provolo... | \n", - "[\"chicken breast\", \"fresh mushrooms\", \"Provolo... | \n", - "0.636364 | \n", - "
7 | \n", - "Title: Down East Blueberry Cake\\n\\nIngredients... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "1.000000 | \n", - "
8 | \n", - "Title: Cranberry-Pecan Bars\\n\\nIngredients: [\"... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "0.833333 | \n", - "
9 | \n", - "Title: Stacked Twinkie Cake\\n\\nIngredients: [\"... | \n", - "[\"coconut\", \"Twinkies\", \"pineapple\", \"pecans\",... | \n", - "[\"coconut\", \"instant French vanilla pudding\", ... | \n", - "0.800000 | \n", - "
\n", - " | input | \n", - "actual | \n", - "prediction | \n", - "score | \n", - "
---|---|---|---|---|
0 | \n", - "Title: Pretzel Candy\\n\\nIngredients: [\"1 lb. w... | \n", - "[\"white chocolate\", \"pretzel sticks\", \"peanuts\"] | \n", - "[\"white chocolate\", \"pretzel sticks\", \"peanuts\"] | \n", - "1.000000 | \n", - "
1 | \n", - "Title: Salmon Party Ball\\n\\nIngredients: [\"8 o... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "[\"cream cheese\", \"salmon\", \"lemon juice\", \"hor... | \n", - "1.000000 | \n", - "
2 | \n", - "Title: Fancy Fried Green Tomatoes\\n\\nIngredien... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "[\"sour cream\", \"green onion\", \"salt\", \"eggs\", ... | \n", - "0.900000 | \n", - "
3 | \n", - "Title: Potluck Potatoes\\n\\nIngredients: [\"1 (2... | \n", - "[\"frozen hash brown potatoes\", \"onions\", \"salt... | \n", - "[\"frozen hash brown potatoes\", \"onions\", \"salt... | \n", - "1.000000 | \n", - "
4 | \n", - "Title: Old-Fashioned Sweet-Sour Cole Slaw\\n\\nI... | \n", - "[\"shredded green cabbage\", \"salt\", \"sugar\", \"c... | \n", - "[\"green cabbage\", \"salt\", \"sugar\", \"cider vine... | \n", - "0.800000 | \n", - "
5 | \n", - "Title: Peanut Brittle\\n\\nIngredients: [\"3 c. w... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "[\"white sugar\", \"water\", \"butter\", \"soda\", \"wh... | \n", - "1.000000 | \n", - "
6 | \n", - "Title: Chicken Inspiration\\n\\nIngredients: [\"1... | \n", - "[\"chicken breast\", \"fresh mushrooms\", \"Provolo... | \n", - "[\"boneless\", \"fresh mushrooms\", \"Provolone che... | \n", - "0.636364 | \n", - "
7 | \n", - "Title: Down East Blueberry Cake\\n\\nIngredients... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "[\"butter\", \"sugar\", \"eggs\", \"sour milk\", \"blue... | \n", - "1.000000 | \n", - "
8 | \n", - "Title: Cranberry-Pecan Bars\\n\\nIngredients: [\"... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "[\"flour\", \"sugar\", \"salt\", \"margarine\", \"pecan... | \n", - "1.000000 | \n", - "
9 | \n", - "Title: Stacked Twinkie Cake\\n\\nIngredients: [\"... | \n", - "[\"coconut\", \"Twinkies\", \"pineapple\", \"pecans\",... | \n", - "[\"coconut\", \"instant French vanilla pudding\", ... | \n", - "1.000000 | \n", - "
\n", - " | model | \n", - "result | \n", - "
---|---|---|
0 | \n", - "gpt-3.5-turbo | \n", - "0.810288 | \n", - "
1 | \n", - "gpt-4 | \n", - "0.820128 | \n", - "
2 | \n", - "gpt-3.5-turbo-ft | \n", - "0.923313 | \n", - "