Created using Colaboratory

pull/17/head
Maxime Labonne 11 months ago
parent e975a2a94d
commit f29adf96b1

@ -6,7 +6,7 @@
"provenance": [], "provenance": [],
"machine_shape": "hm", "machine_shape": "hm",
"gpuType": "T4", "gpuType": "T4",
"authorship_tag": "ABX9TyOiM3qH7dGB3tSOPgaXFX6a", "authorship_tag": "ABX9TyO7/3B03I6/nYxIak9u97IH",
"include_colab_link": true "include_colab_link": true
}, },
"kernelspec": { "kernelspec": {
@ -7223,7 +7223,7 @@
"\n", "\n",
"❤️ Created by [@maximelabonne](), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da).\n", "❤️ Created by [@maximelabonne](), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da).\n",
"\n", "\n",
"This notebook runs on a T4 GPU with high RAM. (Last update: 28 Jul 2023)\n" "This notebook runs on a T4 GPU with high RAM. (Last update: 30 Jul 2023)\n"
], ],
"metadata": { "metadata": {
"id": "OSHlAbqzDFDq" "id": "OSHlAbqzDFDq"
@ -7231,7 +7231,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": { "metadata": {
"id": "GLXwJqbjtPho" "id": "GLXwJqbjtPho"
}, },
@ -7261,14 +7261,14 @@
"metadata": { "metadata": {
"id": "nAMzy_0FtaUZ" "id": "nAMzy_0FtaUZ"
}, },
"execution_count": 2, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"source": [ "source": [
"# The model that you want to train from the Hugging Face hub\n", "# The model that you want to train from the Hugging Face hub\n",
"model_name = \"daryl149/llama-2-7b-chat-hf\"\n", "model_name = \"NousHermes/llama-2-7b-chat-hf\"\n",
"\n", "\n",
"# The instruction dataset to use\n", "# The instruction dataset to use\n",
"dataset_name = \"mlabonne/guanaco-llama2-1k\"\n", "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n",
@ -7335,7 +7335,7 @@
"max_grad_norm = 0.3\n", "max_grad_norm = 0.3\n",
"\n", "\n",
"# Initial learning rate (AdamW optimizer)\n", "# Initial learning rate (AdamW optimizer)\n",
"learning_rate = 2e-4\n", "learning_rate = 1e-5\n",
"\n", "\n",
"# Weight decay to apply to all layers except bias/LayerNorm weights\n", "# Weight decay to apply to all layers except bias/LayerNorm weights\n",
"weight_decay = 0.001\n", "weight_decay = 0.001\n",
@ -7343,8 +7343,8 @@
"# Optimizer to use\n", "# Optimizer to use\n",
"optim = \"paged_adamw_32bit\"\n", "optim = \"paged_adamw_32bit\"\n",
"\n", "\n",
"# Learning rate schedule (constant a bit better than cosine)\n", "# Learning rate schedule\n",
"lr_scheduler_type = \"constant\"\n", "lr_scheduler_type = \"cosine\"\n",
"\n", "\n",
"# Number of training steps (overrides num_train_epochs)\n", "# Number of training steps (overrides num_train_epochs)\n",
"max_steps = -1\n", "max_steps = -1\n",
@ -7357,7 +7357,7 @@
"group_by_length = True\n", "group_by_length = True\n",
"\n", "\n",
"# Save checkpoint every X updates steps\n", "# Save checkpoint every X updates steps\n",
"save_steps = 25\n", "save_steps = 0\n",
"\n", "\n",
"# Log every X updates steps\n", "# Log every X updates steps\n",
"logging_steps = 25\n", "logging_steps = 25\n",
@ -7378,7 +7378,7 @@
"metadata": { "metadata": {
"id": "ib_We3NLtj2E" "id": "ib_We3NLtj2E"
}, },
"execution_count": 3, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
@ -7664,7 +7664,7 @@
}, },
"outputId": "a29901b3-5257-45a7-983c-f3f5f4eb7b94" "outputId": "a29901b3-5257-45a7-983c-f3f5f4eb7b94"
}, },
"execution_count": 4, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "display_data", "output_type": "display_data",
@ -7999,7 +7999,7 @@
"metadata": { "metadata": {
"id": "crj9svNe4hU5" "id": "crj9svNe4hU5"
}, },
"execution_count": 5, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
@ -8021,7 +8021,7 @@
}, },
"outputId": "757676f4-1d4c-4777-e25b-3c92d37c51d8" "outputId": "757676f4-1d4c-4777-e25b-3c92d37c51d8"
}, },
"execution_count": 6, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
@ -8098,7 +8098,7 @@
}, },
"outputId": "463dab09-f8e9-4d99-ac87-19df4a51dbb5" "outputId": "463dab09-f8e9-4d99-ac87-19df4a51dbb5"
}, },
"execution_count": 4, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "display_data", "output_type": "display_data",
@ -8128,7 +8128,7 @@
"id": "x-xPb-_qB0dz", "id": "x-xPb-_qB0dz",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 373, "height": 385,
"referenced_widgets": [ "referenced_widgets": [
"8d703173f8ea40f1ab04cb87ef1f083b", "8d703173f8ea40f1ab04cb87ef1f083b",
"c603d7b4c2d243ac9c5892cde3b0d2d5", "c603d7b4c2d243ac9c5892cde3b0d2d5",
@ -8167,7 +8167,7 @@
}, },
"outputId": "97d27e69-4112-4e02-dfcc-83a96be1686f" "outputId": "97d27e69-4112-4e02-dfcc-83a96be1686f"
}, },
"execution_count": 5, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",

Loading…
Cancel
Save