diff --git a/Fine_tune_Llama_2_in_Google_Colab.ipynb b/Fine_tune_Llama_2_in_Google_Colab.ipynb index 2c1cf93..74bdad8 100644 --- a/Fine_tune_Llama_2_in_Google_Colab.ipynb +++ b/Fine_tune_Llama_2_in_Google_Colab.ipynb @@ -6,7 +6,7 @@ "provenance": [], "machine_shape": "hm", "gpuType": "T4", - "authorship_tag": "ABX9TyOiM3qH7dGB3tSOPgaXFX6a", + "authorship_tag": "ABX9TyO7/3B03I6/nYxIak9u97IH", "include_colab_link": true }, "kernelspec": { @@ -7223,7 +7223,7 @@ "\n", "❤️ Created by [@maximelabonne](), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da).\n", "\n", - "This notebook runs on a T4 GPU with high RAM. (Last update: 28 Jul 2023)\n" + "This notebook runs on a T4 GPU with high RAM. (Last update: 30 Jul 2023)\n" ], "metadata": { "id": "OSHlAbqzDFDq" @@ -7231,7 +7231,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "id": "GLXwJqbjtPho" }, @@ -7261,14 +7261,14 @@ "metadata": { "id": "nAMzy_0FtaUZ" }, - "execution_count": 2, + "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# The model that you want to train from the Hugging Face hub\n", - "model_name = \"daryl149/llama-2-7b-chat-hf\"\n", + "model_name = \"NousHermes/llama-2-7b-chat-hf\"\n", "\n", "# The instruction dataset to use\n", "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n", @@ -7335,7 +7335,7 @@ "max_grad_norm = 0.3\n", "\n", "# Initial learning rate (AdamW optimizer)\n", - "learning_rate = 2e-4\n", + "learning_rate = 1e-5\n", "\n", "# Weight decay to apply to all layers except bias/LayerNorm weights\n", "weight_decay = 0.001\n", @@ -7343,8 +7343,8 @@ "# Optimizer to use\n", "optim = \"paged_adamw_32bit\"\n", "\n", - "# Learning rate schedule (constant a bit better than cosine)\n", - "lr_scheduler_type = \"constant\"\n", + "# Learning rate schedule\n", + "lr_scheduler_type = \"cosine\"\n", "\n", "# Number of training steps (overrides num_train_epochs)\n", "max_steps = -1\n", @@ -7357,7 +7357,7 @@ "group_by_length = True\n", "\n", "# Save checkpoint every X updates steps\n", - "save_steps = 25\n", + "save_steps = 0\n", "\n", "# Log every X updates steps\n", "logging_steps = 25\n", @@ -7378,7 +7378,7 @@ "metadata": { "id": "ib_We3NLtj2E" }, - "execution_count": 3, + "execution_count": null, "outputs": [] }, { @@ -7664,7 +7664,7 @@ }, "outputId": "a29901b3-5257-45a7-983c-f3f5f4eb7b94" }, - "execution_count": 4, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -7999,7 +7999,7 @@ "metadata": { "id": "crj9svNe4hU5" }, - "execution_count": 5, + "execution_count": null, "outputs": [] }, { @@ -8021,7 +8021,7 @@ }, "outputId": "757676f4-1d4c-4777-e25b-3c92d37c51d8" }, - "execution_count": 6, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -8098,7 +8098,7 @@ }, "outputId": "463dab09-f8e9-4d99-ac87-19df4a51dbb5" }, - "execution_count": 4, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -8128,7 +8128,7 @@ "id": "x-xPb-_qB0dz", "colab": { "base_uri": "https://localhost:8080/", - "height": 373, + "height": 385, "referenced_widgets": [ "8d703173f8ea40f1ab04cb87ef1f083b", "c603d7b4c2d243ac9c5892cde3b0d2d5", @@ -8167,7 +8167,7 @@ }, "outputId": "97d27e69-4112-4e02-dfcc-83a96be1686f" }, - "execution_count": 5, + "execution_count": null, "outputs": [ { "output_type": "stream",