Step | \n", "Training Loss | \n", "
---|---|
1 | \n", "1.350100 | \n", "
2 | \n", "2.015800 | \n", "
3 | \n", "1.048700 | \n", "
4 | \n", "1.287700 | \n", "
5 | \n", "1.451200 | \n", "
6 | \n", "1.659900 | \n", "
7 | \n", "1.472300 | \n", "
8 | \n", "1.326700 | \n", "
9 | \n", "1.140000 | \n", "
10 | \n", "1.395300 | \n", "
11 | \n", "1.776400 | \n", "
12 | \n", "1.169100 | \n", "
13 | \n", "1.434700 | \n", "
14 | \n", "1.550400 | \n", "
15 | \n", "1.440400 | \n", "
16 | \n", "1.352100 | \n", "
17 | \n", "1.062800 | \n", "
18 | \n", "1.173400 | \n", "
19 | \n", "1.385300 | \n", "
20 | \n", "1.433300 | \n", "
21 | \n", "1.787800 | \n", "
22 | \n", "1.600200 | \n", "
23 | \n", "1.067800 | \n", "
24 | \n", "1.679300 | \n", "
25 | \n", "1.209900 | \n", "
26 | \n", "1.305200 | \n", "
27 | \n", "1.465300 | \n", "
28 | \n", "1.781800 | \n", "
29 | \n", "1.152400 | \n", "
30 | \n", "1.434400 | \n", "
31 | \n", "1.399300 | \n", "
32 | \n", "1.796300 | \n", "
33 | \n", "1.674500 | \n", "
34 | \n", "1.567600 | \n", "
35 | \n", "1.830000 | \n", "
36 | \n", "1.720200 | \n", "
37 | \n", "1.335800 | \n", "
38 | \n", "1.333000 | \n", "
39 | \n", "2.044900 | \n", "
40 | \n", "1.832200 | \n", "
41 | \n", "1.533900 | \n", "
42 | \n", "1.259900 | \n", "
43 | \n", "1.372300 | \n", "
44 | \n", "1.551600 | \n", "
45 | \n", "2.002400 | \n", "
46 | \n", "1.956100 | \n", "
47 | \n", "2.441900 | \n", "
48 | \n", "2.289100 | \n", "
49 | \n", "1.544500 | \n", "
50 | \n", "2.040300 | \n", "
51 | \n", "1.103800 | \n", "
52 | \n", "1.630800 | \n", "
53 | \n", "1.437900 | \n", "
54 | \n", "1.820900 | \n", "
55 | \n", "1.080300 | \n", "
56 | \n", "1.029200 | \n", "
57 | \n", "0.999400 | \n", "
58 | \n", "0.795900 | \n", "
59 | \n", "1.331600 | \n", "
60 | \n", "1.099500 | \n", "
61 | \n", "1.199000 | \n", "
62 | \n", "1.146000 | \n", "
63 | \n", "1.129000 | \n", "
64 | \n", "1.109500 | \n", "
65 | \n", "1.207000 | \n", "
66 | \n", "1.360600 | \n", "
67 | \n", "1.879000 | \n", "
68 | \n", "1.317200 | \n", "
69 | \n", "1.033300 | \n", "
70 | \n", "1.153400 | \n", "
71 | \n", "1.112400 | \n", "
72 | \n", "1.218400 | \n", "
73 | \n", "1.134600 | \n", "
74 | \n", "1.053200 | \n", "
75 | \n", "1.008900 | \n", "
76 | \n", "1.077000 | \n", "
77 | \n", "1.245000 | \n", "
78 | \n", "1.395900 | \n", "
79 | \n", "1.488800 | \n", "
80 | \n", "1.382500 | \n", "
81 | \n", "1.442200 | \n", "
82 | \n", "1.028500 | \n", "
83 | \n", "1.208500 | \n", "
84 | \n", "1.780200 | \n", "
85 | \n", "1.679300 | \n", "
86 | \n", "1.276600 | \n", "
87 | \n", "1.374600 | \n", "
88 | \n", "1.490000 | \n", "
89 | \n", "1.567100 | \n", "
90 | \n", "1.435000 | \n", "
91 | \n", "1.329800 | \n", "
92 | \n", "1.387600 | \n", "
93 | \n", "0.971400 | \n", "
94 | \n", "1.293800 | \n", "
95 | \n", "1.585900 | \n", "
96 | \n", "1.431700 | \n", "
97 | \n", "1.948900 | \n", "
98 | \n", "1.630500 | \n", "
99 | \n", "1.839100 | \n", "
100 | \n", "1.740900 | \n", "
101 | \n", "0.717200 | \n", "
102 | \n", "0.958100 | \n", "
103 | \n", "1.625900 | \n", "
104 | \n", "1.150000 | \n", "
105 | \n", "0.999200 | \n", "
106 | \n", "1.253100 | \n", "
107 | \n", "1.007600 | \n", "
108 | \n", "1.049700 | \n", "
109 | \n", "1.265900 | \n", "
110 | \n", "1.251300 | \n", "
111 | \n", "1.109500 | \n", "
112 | \n", "1.652500 | \n", "
113 | \n", "1.238000 | \n", "
114 | \n", "1.521300 | \n", "
115 | \n", "1.002400 | \n", "
116 | \n", "0.982400 | \n", "
117 | \n", "1.389300 | \n", "
118 | \n", "1.114900 | \n", "
119 | \n", "1.093900 | \n", "
120 | \n", "1.254200 | \n", "
121 | \n", "1.132300 | \n", "
122 | \n", "0.925300 | \n", "
123 | \n", "1.292700 | \n", "
124 | \n", "1.317600 | \n", "
125 | \n", "1.080400 | \n", "
126 | \n", "0.918800 | \n", "
127 | \n", "1.203400 | \n", "
128 | \n", "1.098800 | \n", "
129 | \n", "1.360800 | \n", "
130 | \n", "1.256900 | \n", "
131 | \n", "1.392600 | \n", "
132 | \n", "1.167600 | \n", "
133 | \n", "1.134900 | \n", "
134 | \n", "1.423700 | \n", "
135 | \n", "1.111200 | \n", "
136 | \n", "1.081600 | \n", "
137 | \n", "1.806000 | \n", "
138 | \n", "1.238800 | \n", "
139 | \n", "1.306800 | \n", "
140 | \n", "1.421900 | \n", "
141 | \n", "1.467300 | \n", "
142 | \n", "1.245100 | \n", "
143 | \n", "1.594200 | \n", "
144 | \n", "1.426000 | \n", "
145 | \n", "1.393800 | \n", "
146 | \n", "1.894400 | \n", "
147 | \n", "1.331200 | \n", "
148 | \n", "1.519400 | \n", "
149 | \n", "1.926300 | \n", "
150 | \n", "1.293200 | \n", "
151 | \n", "1.135100 | \n", "
152 | \n", "1.066700 | \n", "
153 | \n", "0.856900 | \n", "
154 | \n", "1.021500 | \n", "
155 | \n", "0.808800 | \n", "
156 | \n", "0.936300 | \n", "
157 | \n", "0.979700 | \n", "
158 | \n", "1.100200 | \n", "
159 | \n", "1.091400 | \n", "
160 | \n", "0.918800 | \n", "
161 | \n", "1.370800 | \n", "
162 | \n", "1.380300 | \n", "
163 | \n", "0.965300 | \n", "
164 | \n", "1.142400 | \n", "
165 | \n", "1.436400 | \n", "
166 | \n", "0.970400 | \n", "
167 | \n", "0.872600 | \n", "
168 | \n", "1.662500 | \n", "
169 | \n", "1.623500 | \n", "
170 | \n", "1.481700 | \n", "
171 | \n", "0.822300 | \n", "
172 | \n", "1.605500 | \n", "
173 | \n", "1.769800 | \n", "
174 | \n", "1.320100 | \n", "
175 | \n", "0.969300 | \n", "
176 | \n", "0.798700 | \n", "
177 | \n", "1.233200 | \n", "
178 | \n", "1.168500 | \n", "
179 | \n", "1.251400 | \n", "
180 | \n", "1.221500 | \n", "
181 | \n", "1.491100 | \n", "
182 | \n", "1.010200 | \n", "
183 | \n", "1.375500 | \n", "
184 | \n", "1.722900 | \n", "
185 | \n", "1.179300 | \n", "
186 | \n", "1.474400 | \n", "
187 | \n", "1.968200 | \n", "
188 | \n", "1.297200 | \n", "
189 | \n", "1.564500 | \n", "
190 | \n", "1.480700 | \n", "
191 | \n", "1.464700 | \n", "
192 | \n", "1.901400 | \n", "
193 | \n", "1.620100 | \n", "
194 | \n", "1.509000 | \n", "
195 | \n", "1.587000 | \n", "
196 | \n", "1.510000 | \n", "
197 | \n", "1.773900 | \n", "
198 | \n", "1.473200 | \n", "
199 | \n", "1.660400 | \n", "
200 | \n", "1.832600 | \n", "
201 | \n", "1.021400 | \n", "
202 | \n", "1.120400 | \n", "
203 | \n", "1.030200 | \n", "
204 | \n", "1.167500 | \n", "
205 | \n", "0.853200 | \n", "
206 | \n", "0.927000 | \n", "
207 | \n", "1.157400 | \n", "
208 | \n", "1.071600 | \n", "
209 | \n", "1.195400 | \n", "
210 | \n", "1.155800 | \n", "
211 | \n", "1.502300 | \n", "
212 | \n", "1.091600 | \n", "
213 | \n", "1.225200 | \n", "
214 | \n", "1.148900 | \n", "
215 | \n", "1.238200 | \n", "
216 | \n", "1.600200 | \n", "
217 | \n", "1.203600 | \n", "
218 | \n", "1.266200 | \n", "
219 | \n", "0.970900 | \n", "
220 | \n", "1.451000 | \n", "
221 | \n", "1.281300 | \n", "
222 | \n", "0.952500 | \n", "
223 | \n", "1.313800 | \n", "
224 | \n", "0.915700 | \n", "
225 | \n", "1.040000 | \n", "
226 | \n", "1.493800 | \n", "
227 | \n", "1.186400 | \n", "
228 | \n", "1.278700 | \n", "
229 | \n", "1.061100 | \n", "
230 | \n", "1.209000 | \n", "
231 | \n", "0.881400 | \n", "
232 | \n", "1.659300 | \n", "
233 | \n", "1.135200 | \n", "
234 | \n", "1.497800 | \n", "
235 | \n", "1.557500 | \n", "
236 | \n", "0.849200 | \n", "
237 | \n", "1.329200 | \n", "
238 | \n", "1.147700 | \n", "
239 | \n", "1.764600 | \n", "
240 | \n", "1.740000 | \n", "
241 | \n", "2.043700 | \n", "
242 | \n", "1.675000 | \n", "
243 | \n", "1.809600 | \n", "
244 | \n", "1.721400 | \n", "
245 | \n", "2.343300 | \n", "
246 | \n", "1.830400 | \n", "
247 | \n", "1.754400 | \n", "
248 | \n", "1.741900 | \n", "
249 | \n", "2.011000 | \n", "
250 | \n", "1.741700 | \n", "
"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"# %load_ext tensorboard\n",
"# %tensorboard --logdir results/runs"
],
"metadata": {
"id": "crj9svNe4hU5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Ignore warnings\n",
"logging.set_verbosity(logging.CRITICAL)\n",
"\n",
"# Run text generation pipeline with our next model\n",
"prompt = \"What is a large language model?\"\n",
"pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
"result = pipe(f\"[INST] {prompt} [/INST]\")\n",
"print(result[0]['generated_text'])"
],
"metadata": {
"id": "frlSLPin4IJ4",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
" warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n",
"\n",
"Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n",
"\n",
"Some examples of large language models include:\n",
"\n",
"* BERT (Bidirectional Encoder Representations from Transformers\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Empty VRAM\n",
"del model\n",
"del pipe\n",
"del trainer\n",
"import gc\n",
"gc.collect()\n",
"gc.collect()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mkQCviG0Zta-",
"outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"19965"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"# Reload model in FP16 and merge it with LoRA weights\n",
"base_model = AutoModelForCausalLM.from_pretrained(\n",
" model_name,\n",
" low_cpu_mem_usage=True,\n",
" return_dict=True,\n",
" torch_dtype=torch.float16,\n",
" device_map=device_map,\n",
")\n",
"model = PeftModel.from_pretrained(base_model, new_model)\n",
"model = model.merge_and_unload()\n",
"\n",
"# Reload tokenizer to save it\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"tokenizer.padding_side = \"right\""
],
"metadata": {
"id": "QQn30cRtAZ-P",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"051d193cd87f47c1971fb87544e1e615",
"9d7247c119e642c5894f15ca6974ef3e",
"a79c22bb34ec4f698a00752b47a6f631",
"d95f3a3f26c6470d984542cdfd68bec1",
"343e11c62a59448eb43bbc0c31bf5f11",
"a153c96bd1fe4c48a41e9b9c7c00dd6e",
"84da055d24694320843e13ad37438792",
"e375632975904402baea46163e2eeca1",
"95501d0b5a22407288f008bf8cc69726",
"6aef866a6c474dfabb2140ded933c5aa",
"d66fa096d442423c9447cbfbdc1aad8d"
]
},
"outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "051d193cd87f47c1971fb87544e1e615"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"!huggingface-cli login\n",
"\n",
"model.push_to_hub(new_model, use_temp_dir=False)\n",
"tokenizer.push_to_hub(new_model, use_temp_dir=False)"
],
"metadata": {
"id": "x-xPb-_qB0dz",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 373,
"referenced_widgets": [
"c99aff4cfd664ae8a165a27bea0566c8",
"e4b64cab6b7b418c8a2575ee26839039",
"c3a4fedc73b3480089ef9d13381471ed",
"bf722f71c61b4285bcbbf32fd619b3a6",
"fd11a6148b704c5b9142c5e8de2d3b25",
"f0bcdaf940d14ad796fc7ac46c8e1e64",
"b6e821c974674f2290c354238d6c919c",
"eeba50e8242c4753bfc0ea48e03f9078",
"7a1f3340688d408092adade75f4baac4",
"8c887ca9b0eb44fdb8608bf36b5db5c5",
"e4698337e6b843afac706ab657ca6af9",
"1af01f1f1aac42b8bff46fe4df8a59ad",
"eee8731f316244eda5ff0765fd12bf85",
"f135278e410f4b708435bb80fb630bcf",
"2e6fc79bf5c149d6b0bc5c52e18debc7",
"a4b0debc025444a59abd6953b3512c0d",
"130120644beb48acbc038651459af43c",
"bf77e97593a349718bdb5fd9bfd28fe3",
"f7292741953e47699540ef8712fc0d8d",
"9434350b1b9c4060812feb9ecbf63278",
"b29647e268414329be56047e522e28b9",
"27bb18a199ca47108c7a61e9c443de36",
"33ebb868f3e846f6af1a1a2a8ad6a3cb",
"1f73f8b4d4da4e74adc135f2a2f6ee65",
"68da6e6e69c8419895bea2068760534e",
"6dc1a868e08c4c3b8315116d2c46573b",
"7a5d714c17374104bb6f5caaa5541c10",
"1b6c59a51359453c926bfcddb3d0f0ea",
"dac3669f18284161a58d52f26dffb761",
"a3511f489f6d47cc8d404ab6f367b29f",
"20670478612f4b1a8a5f23d71a2609a7",
"b463153ec04749e38540389efa2981f7",
"2bb3d36d248a48fba364f14d9e840306"
]
},
"outputId": "6ed9166c-5f92-4375-eca5-dbb247c0e13a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
" _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
" _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
" _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
" _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
" _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
" \n",
" To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
"Token: \n",
"Add token as git credential? (Y/n) n\n",
"Token is valid (permission: write).\n",
"Your token has been saved to /root/.cache/huggingface/token\n",
"Login successful\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c99aff4cfd664ae8a165a27bea0566c8"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "1af01f1f1aac42b8bff46fe4df8a59ad"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "33ebb868f3e846f6af1a1a2a8ad6a3cb"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/mlabonne/llama-2-7b-miniguanaco/commit/c81a32fd0b4d39e252326e639d63e75aa68c9a4a', commit_message='Upload tokenizer', commit_description='', oid='c81a32fd0b4d39e252326e639d63e75aa68c9a4a', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"metadata": {},
"execution_count": 10
}
]
}
]
}