mirror of
https://github.com/mlabonne/llm-course.git
synced 2024-11-10 19:10:54 +00:00
1072 lines
47 KiB
Plaintext
1072 lines
47 KiB
Plaintext
|
{
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 0,
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"provenance": [],
|
|||
|
"gpuType": "T4",
|
|||
|
"authorship_tag": "ABX9TyNC/p+CjyumuththfkLa9LG",
|
|||
|
"include_colab_link": true
|
|||
|
},
|
|||
|
"kernelspec": {
|
|||
|
"name": "python3",
|
|||
|
"display_name": "Python 3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"name": "python"
|
|||
|
},
|
|||
|
"accelerator": "GPU",
|
|||
|
"widgets": {
|
|||
|
"application/vnd.jupyter.widget-state+json": {
|
|||
|
"9fdc7832238743f384543674f57a135d": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HBoxModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HBoxModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HBoxView",
|
|||
|
"box_style": "",
|
|||
|
"children": [
|
|||
|
"IPY_MODEL_e856dd2f68714377b76493d5f428043d",
|
|||
|
"IPY_MODEL_b45fac85d2034fcda9427c787124788d",
|
|||
|
"IPY_MODEL_8ae387bcc1a3478eb5da50db9449e7a0"
|
|||
|
],
|
|||
|
"layout": "IPY_MODEL_653f5250de60427cb870fe823937e6af"
|
|||
|
}
|
|||
|
},
|
|||
|
"e856dd2f68714377b76493d5f428043d": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HTMLModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HTMLModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HTMLView",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_06846d39e16d4c66bc4cc177666c959b",
|
|||
|
"placeholder": "",
|
|||
|
"style": "IPY_MODEL_0fcfa8e7621647abae79076a7aec2972",
|
|||
|
"value": "Upload 1 LFS files: 100%"
|
|||
|
}
|
|||
|
},
|
|||
|
"b45fac85d2034fcda9427c787124788d": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "FloatProgressModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "FloatProgressModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "ProgressView",
|
|||
|
"bar_style": "success",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_811878ae336b4d0c9ec8237fc37bb999",
|
|||
|
"max": 1,
|
|||
|
"min": 0,
|
|||
|
"orientation": "horizontal",
|
|||
|
"style": "IPY_MODEL_0146ec198b8640b2aa30f8466e40597d",
|
|||
|
"value": 1
|
|||
|
}
|
|||
|
},
|
|||
|
"8ae387bcc1a3478eb5da50db9449e7a0": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HTMLModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HTMLModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HTMLView",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_769dd69f4f6f4af08d1b2d15522940ec",
|
|||
|
"placeholder": "",
|
|||
|
"style": "IPY_MODEL_f32db67266234c34aa77317776cbdc48",
|
|||
|
"value": " 1/1 [00:13<00:00, 13.70s/it]"
|
|||
|
}
|
|||
|
},
|
|||
|
"653f5250de60427cb870fe823937e6af": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"06846d39e16d4c66bc4cc177666c959b": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"0fcfa8e7621647abae79076a7aec2972": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "DescriptionStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "DescriptionStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
},
|
|||
|
"811878ae336b4d0c9ec8237fc37bb999": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"0146ec198b8640b2aa30f8466e40597d": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "ProgressStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "ProgressStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"bar_color": null,
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
},
|
|||
|
"769dd69f4f6f4af08d1b2d15522940ec": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"f32db67266234c34aa77317776cbdc48": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "DescriptionStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "DescriptionStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
},
|
|||
|
"66a41698432f46de9eb325447917a389": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HBoxModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HBoxModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HBoxView",
|
|||
|
"box_style": "",
|
|||
|
"children": [
|
|||
|
"IPY_MODEL_ca0783bbd2e1428d92ee78db13d0d64d",
|
|||
|
"IPY_MODEL_ebbb561fa4cb42e1be414b6462949fd0",
|
|||
|
"IPY_MODEL_d2ee702c234b4b0f903e92e09cb1c6dd"
|
|||
|
],
|
|||
|
"layout": "IPY_MODEL_8d25924e581e40a9a39a87d4d3d14221"
|
|||
|
}
|
|||
|
},
|
|||
|
"ca0783bbd2e1428d92ee78db13d0d64d": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HTMLModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HTMLModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HTMLView",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_2e97ccd3f59447db9892c3819252c57b",
|
|||
|
"placeholder": "",
|
|||
|
"style": "IPY_MODEL_1dbd60f5ee294c65b0bb9225d81bd8af",
|
|||
|
"value": "gptq_model-4bit-128g.bin: 100%"
|
|||
|
}
|
|||
|
},
|
|||
|
"ebbb561fa4cb42e1be414b6462949fd0": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "FloatProgressModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "FloatProgressModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "ProgressView",
|
|||
|
"bar_style": "success",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_b0a9de27e4074778ab5a55a1f9d250cc",
|
|||
|
"max": 123495975,
|
|||
|
"min": 0,
|
|||
|
"orientation": "horizontal",
|
|||
|
"style": "IPY_MODEL_bc79604dbd3242bab577154cca421b83",
|
|||
|
"value": 123495975
|
|||
|
}
|
|||
|
},
|
|||
|
"d2ee702c234b4b0f903e92e09cb1c6dd": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "HTMLModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_dom_classes": [],
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "HTMLModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/controls",
|
|||
|
"_view_module_version": "1.5.0",
|
|||
|
"_view_name": "HTMLView",
|
|||
|
"description": "",
|
|||
|
"description_tooltip": null,
|
|||
|
"layout": "IPY_MODEL_8f3eab81268943a4935e04e502f95604",
|
|||
|
"placeholder": "",
|
|||
|
"style": "IPY_MODEL_6e4ed763e8c843dbbdbfd5ad0570d884",
|
|||
|
"value": " 123M/123M [00:13<00:00, 12.0MB/s]"
|
|||
|
}
|
|||
|
},
|
|||
|
"8d25924e581e40a9a39a87d4d3d14221": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"2e97ccd3f59447db9892c3819252c57b": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"1dbd60f5ee294c65b0bb9225d81bd8af": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "DescriptionStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "DescriptionStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
},
|
|||
|
"b0a9de27e4074778ab5a55a1f9d250cc": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"bc79604dbd3242bab577154cca421b83": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "ProgressStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "ProgressStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"bar_color": null,
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
},
|
|||
|
"8f3eab81268943a4935e04e502f95604": {
|
|||
|
"model_module": "@jupyter-widgets/base",
|
|||
|
"model_name": "LayoutModel",
|
|||
|
"model_module_version": "1.2.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/base",
|
|||
|
"_model_module_version": "1.2.0",
|
|||
|
"_model_name": "LayoutModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "LayoutView",
|
|||
|
"align_content": null,
|
|||
|
"align_items": null,
|
|||
|
"align_self": null,
|
|||
|
"border": null,
|
|||
|
"bottom": null,
|
|||
|
"display": null,
|
|||
|
"flex": null,
|
|||
|
"flex_flow": null,
|
|||
|
"grid_area": null,
|
|||
|
"grid_auto_columns": null,
|
|||
|
"grid_auto_flow": null,
|
|||
|
"grid_auto_rows": null,
|
|||
|
"grid_column": null,
|
|||
|
"grid_gap": null,
|
|||
|
"grid_row": null,
|
|||
|
"grid_template_areas": null,
|
|||
|
"grid_template_columns": null,
|
|||
|
"grid_template_rows": null,
|
|||
|
"height": null,
|
|||
|
"justify_content": null,
|
|||
|
"justify_items": null,
|
|||
|
"left": null,
|
|||
|
"margin": null,
|
|||
|
"max_height": null,
|
|||
|
"max_width": null,
|
|||
|
"min_height": null,
|
|||
|
"min_width": null,
|
|||
|
"object_fit": null,
|
|||
|
"object_position": null,
|
|||
|
"order": null,
|
|||
|
"overflow": null,
|
|||
|
"overflow_x": null,
|
|||
|
"overflow_y": null,
|
|||
|
"padding": null,
|
|||
|
"right": null,
|
|||
|
"top": null,
|
|||
|
"visibility": null,
|
|||
|
"width": null
|
|||
|
}
|
|||
|
},
|
|||
|
"6e4ed763e8c843dbbdbfd5ad0570d884": {
|
|||
|
"model_module": "@jupyter-widgets/controls",
|
|||
|
"model_name": "DescriptionStyleModel",
|
|||
|
"model_module_version": "1.5.0",
|
|||
|
"state": {
|
|||
|
"_model_module": "@jupyter-widgets/controls",
|
|||
|
"_model_module_version": "1.5.0",
|
|||
|
"_model_name": "DescriptionStyleModel",
|
|||
|
"_view_count": null,
|
|||
|
"_view_module": "@jupyter-widgets/base",
|
|||
|
"_view_module_version": "1.2.0",
|
|||
|
"_view_name": "StyleView",
|
|||
|
"description_width": ""
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
},
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"id": "view-in-github",
|
|||
|
"colab_type": "text"
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/GPT2_GPTQ_4bit.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"source": [
|
|||
|
"# Create a 4-bit GPT-2 model using AutoGPTQ\n",
|
|||
|
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
|
|||
|
"\n",
|
|||
|
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
|
|||
|
"\n",
|
|||
|
"## Quantize model"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"id": "yezrHxYvg_wR"
|
|||
|
}
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"id": "BhufqqQAaz6e"
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"!BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers huggingface_hub"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"import torch\n",
|
|||
|
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
|
|||
|
"from transformers import AutoTokenizer\n",
|
|||
|
"\n",
|
|||
|
"examples = [\n",
|
|||
|
" \"In the wake of the Federal Reserve's recent decision, market analysts predict a shift in the stock market dynamics, urging investors to reassess their portfolios.\",\n",
|
|||
|
" \"As quantum computing continues its rapid development, it promises to revolutionize fields such as cryptography and machine learning, posing a significant leap from classical computing.\",\n",
|
|||
|
" \"The recent elections have brought a seismic shift in the political landscape, with the newly elected government pledging to focus on healthcare and education reform.\",\n",
|
|||
|
" \"The Renaissance, a significant period in European history, was marked by a cultural rebirth and dramatic advances in art, science, and philosophical thought.\",\n",
|
|||
|
" \"With the rise of machine learning and AI, Python has emerged as a dominant language in programming due to its simplicity and powerful libraries such as TensorFlow and PyTorch.\",\n",
|
|||
|
" \"Jane Austen's 'Pride and Prejudice' continues to captivate readers with its intricate exploration of societal norms and the complexities of human relationships during the Regency era.\",\n",
|
|||
|
" \"Following an intense season, the Golden State Warriors have emerged as the NBA champions, underscoring their remarkable team play and strategic finesse.\",\n",
|
|||
|
" \"The latest Marvel film, 'Avengers: Infinity Gauntlet', has shattered box office records worldwide, reinforcing the global appeal of superhero narratives.\",\n",
|
|||
|
" \"The increasing instances of wildfires and erratic weather patterns underscore the urgent need to address climate change and implement sustainable environmental practices.\",\n",
|
|||
|
" \"In recent news, a breakthrough in the peace negotiations between the two countries has sparked hope for an end to the decade-long conflict.\",\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"# Define base model and output directory\n",
|
|||
|
"model_id = \"gpt2\"\n",
|
|||
|
"out_dir = model_id + \"-GPTQ\"\n",
|
|||
|
"\n",
|
|||
|
"# Load quantize config, model and tokenizer\n",
|
|||
|
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
|
|||
|
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
|
|||
|
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
|
|||
|
"\n",
|
|||
|
"# Determine device\n",
|
|||
|
"device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
|
|||
|
"\n",
|
|||
|
"# Tokenize examples\n",
|
|||
|
"examples_ids = [tokenizer(text, truncation=True) for text in examples]\n",
|
|||
|
"\n",
|
|||
|
"# Quantize\n",
|
|||
|
"model.quantize(\n",
|
|||
|
" examples_ids,\n",
|
|||
|
" use_triton=True,\n",
|
|||
|
" autotune_warmup_after_quantized=True,\n",
|
|||
|
" batch_size=1,\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"# Save model and tokenizer\n",
|
|||
|
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=False)\n",
|
|||
|
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=True)\n",
|
|||
|
"tokenizer.save_pretrained(out_dir)"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"base_uri": "https://localhost:8080/"
|
|||
|
},
|
|||
|
"id": "ETsG2iYrXaUg",
|
|||
|
"outputId": "322feb57-c4bf-48aa-d29b-b71738e3edf1"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"output_type": "stream",
|
|||
|
"name": "stderr",
|
|||
|
"text": [
|
|||
|
"WARNING:auto_gptq.modeling._utils:using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.\n",
|
|||
|
"100%|██████████| 11/11 [03:16<00:00, 17.87s/it]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"output_type": "execute_result",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"('gpt2-GPTQ/tokenizer_config.json',\n",
|
|||
|
" 'gpt2-GPTQ/special_tokens_map.json',\n",
|
|||
|
" 'gpt2-GPTQ/vocab.json',\n",
|
|||
|
" 'gpt2-GPTQ/merges.txt',\n",
|
|||
|
" 'gpt2-GPTQ/added_tokens.json',\n",
|
|||
|
" 'gpt2-GPTQ/tokenizer.json')"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"execution_count": 2
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Reload model and tokenizer\n",
|
|||
|
"model = AutoGPTQForCausalLM.from_quantized(\n",
|
|||
|
" out_dir,\n",
|
|||
|
" use_triton=True,\n",
|
|||
|
" device=device,\n",
|
|||
|
" use_safetensors=True,\n",
|
|||
|
")\n",
|
|||
|
"tokenizer = AutoTokenizer.from_pretrained(out_dir)"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"base_uri": "https://localhost:8080/"
|
|||
|
},
|
|||
|
"id": "nktu1FsdZ9sd",
|
|||
|
"outputId": "8f0aaf4e-5fc5-42d1-eb33-220658edb8d0"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"output_type": "stream",
|
|||
|
"name": "stderr",
|
|||
|
"text": [
|
|||
|
"WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
|
|||
|
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
|
|||
|
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"def generate_text(input_text):\n",
|
|||
|
" input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n",
|
|||
|
" attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)\n",
|
|||
|
"\n",
|
|||
|
" output = model.to(device).generate(\n",
|
|||
|
" inputs=input_ids,\n",
|
|||
|
" attention_mask=attention_mask,\n",
|
|||
|
" do_sample=True,\n",
|
|||
|
" max_length=50,\n",
|
|||
|
" top_k=50,\n",
|
|||
|
" pad_token_id=tokenizer.eos_token_id\n",
|
|||
|
" )\n",
|
|||
|
" output = tokenizer.decode(output[0], skip_special_tokens=True)\n",
|
|||
|
"\n",
|
|||
|
" return output\n",
|
|||
|
"\n",
|
|||
|
"# Generate text\n",
|
|||
|
"input_text = \"I have a dream\"\n",
|
|||
|
"generate_text(input_text)"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"base_uri": "https://localhost:8080/",
|
|||
|
"height": 36
|
|||
|
},
|
|||
|
"id": "KSIHpQ4XZ_7R",
|
|||
|
"outputId": "e6f5c8a5-e3bf-4e52-d239-6b6f190e5475"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"output_type": "execute_result",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'I have a dream,,,,,,,,, at,--,,,,,,,,,,,,,,---,,,, ( (,//,,,,---'"
|
|||
|
],
|
|||
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
|||
|
"type": "string"
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"execution_count": 24
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"source": [
|
|||
|
"## Save and load model using Hugging Face Hub"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"id": "gV8hqGdYhLQH"
|
|||
|
}
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"from huggingface_hub import notebook_login\n",
|
|||
|
"from huggingface_hub import HfApi\n",
|
|||
|
"import locale\n",
|
|||
|
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
|
|||
|
"\n",
|
|||
|
"REPO_ID = \"insert your repo/model ID\" # example: \"mlabonne/gpt2-GPTQ-4bit\"\n",
|
|||
|
"\n",
|
|||
|
"notebook_login()\n",
|
|||
|
"api = HfApi()\n",
|
|||
|
"!git config --global credential.helper store\n",
|
|||
|
"\n",
|
|||
|
"api.upload_folder(\n",
|
|||
|
" folder_path=out_dir,\n",
|
|||
|
" repo_id=REPO_ID,\n",
|
|||
|
" repo_type=\"model\",\n",
|
|||
|
")"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"base_uri": "https://localhost:8080/",
|
|||
|
"height": 100,
|
|||
|
"referenced_widgets": [
|
|||
|
"9fdc7832238743f384543674f57a135d",
|
|||
|
"e856dd2f68714377b76493d5f428043d",
|
|||
|
"b45fac85d2034fcda9427c787124788d",
|
|||
|
"8ae387bcc1a3478eb5da50db9449e7a0",
|
|||
|
"653f5250de60427cb870fe823937e6af",
|
|||
|
"06846d39e16d4c66bc4cc177666c959b",
|
|||
|
"0fcfa8e7621647abae79076a7aec2972",
|
|||
|
"811878ae336b4d0c9ec8237fc37bb999",
|
|||
|
"0146ec198b8640b2aa30f8466e40597d",
|
|||
|
"769dd69f4f6f4af08d1b2d15522940ec",
|
|||
|
"f32db67266234c34aa77317776cbdc48",
|
|||
|
"66a41698432f46de9eb325447917a389",
|
|||
|
"ca0783bbd2e1428d92ee78db13d0d64d",
|
|||
|
"ebbb561fa4cb42e1be414b6462949fd0",
|
|||
|
"d2ee702c234b4b0f903e92e09cb1c6dd",
|
|||
|
"8d25924e581e40a9a39a87d4d3d14221",
|
|||
|
"2e97ccd3f59447db9892c3819252c57b",
|
|||
|
"1dbd60f5ee294c65b0bb9225d81bd8af",
|
|||
|
"b0a9de27e4074778ab5a55a1f9d250cc",
|
|||
|
"bc79604dbd3242bab577154cca421b83",
|
|||
|
"8f3eab81268943a4935e04e502f95604",
|
|||
|
"6e4ed763e8c843dbbdbfd5ad0570d884"
|
|||
|
]
|
|||
|
},
|
|||
|
"id": "OKTxY6jQaDMv",
|
|||
|
"outputId": "f786a135-ca33-486f-88b8-9795fdb8e713"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"output_type": "display_data",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Upload 1 LFS files: 0%| | 0/1 [00:00<?, ?it/s]"
|
|||
|
],
|
|||
|
"application/vnd.jupyter.widget-view+json": {
|
|||
|
"version_major": 2,
|
|||
|
"version_minor": 0,
|
|||
|
"model_id": "9fdc7832238743f384543674f57a135d"
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {}
|
|||
|
},
|
|||
|
{
|
|||
|
"output_type": "display_data",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"gptq_model-4bit-128g.bin: 0%| | 0.00/123M [00:00<?, ?B/s]"
|
|||
|
],
|
|||
|
"application/vnd.jupyter.widget-view+json": {
|
|||
|
"version_major": 2,
|
|||
|
"version_minor": 0,
|
|||
|
"model_id": "66a41698432f46de9eb325447917a389"
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {}
|
|||
|
},
|
|||
|
{
|
|||
|
"output_type": "execute_result",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'https://huggingface.co/mlabonne/gpt2-GPTQ-4bit/tree/main/'"
|
|||
|
],
|
|||
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
|||
|
"type": "string"
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"execution_count": 12
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
|
|||
|
"from transformers import AutoTokenizer\n",
|
|||
|
"\n",
|
|||
|
"model_id = REPO_ID\n",
|
|||
|
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
|
|||
|
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
|
|||
|
"tokenizer = AutoTokenizer.from_pretrained(model_id)"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"colab": {
|
|||
|
"base_uri": "https://localhost:8080/",
|
|||
|
"height": 146
|
|||
|
},
|
|||
|
"id": "5EUZJpdEfxNz",
|
|||
|
"outputId": "7b7c2b23-fccd-47c1-e40e-1b544550da99"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"output_type": "stream",
|
|||
|
"name": "stderr",
|
|||
|
"text": [
|
|||
|
"Some weights of the model checkpoint at mlabonne/gpt2-GPTQ-4bit were not used when initializing GPT2LMHeadModel: ['transformer.h.11.attn.c_proj.qweight', 'transformer.h.10.attn.c_proj.g_idx', 'transformer.h.4.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_proj.qweight', 'transformer.h.3.attn.c_proj.scales', 'transformer.h.9.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qweight', 'transformer.h.4.attn.c_proj.scales', 'transformer.h.9.mlp.c_fc.g_idx', 'transformer.h.10.attn.c_attn.qweight', 'transformer.h.4.mlp.c_proj.scales', 'transformer.h.9.mlp.c_proj.qzeros', 'transformer.h.9.attn.c_attn.scales', 'transformer.h.0.attn.c_proj.scales', 'transformer.h.4.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qzeros', 'transformer.h.2.mlp.c_proj.qweight', 'transformer.h.9.mlp.c_proj.qweight', 'transformer.h.3.mlp.c_fc.scales', 'transformer.h.8.attn.c_attn.qzeros', 'transformer.h.1.attn.c_attn.scales', 'transformer.h.1.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qweight', 'transformer.h.8.attn.c_proj.qzeros', 'transformer.h.2.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.scales', 'transformer.h.5.mlp.c_fc.qzeros', 'transformer.h.4.mlp.c_fc.scales', 'transformer.h.6.attn.c_attn.qzeros', 'transformer.h.9.attn.c_attn.qzeros', 'transformer.h.1.mlp.c_fc.g_idx', 'transformer.h.8.attn.c_proj.scales', 'transformer.h.10.attn.c_proj.scales', 'transformer.h.2.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.scales', 'transformer.h.0.mlp.c_fc.qzeros', 'transformer.h.7.mlp.c_proj.g_idx', 'transformer.h.1.attn.c_proj.qweight', 'transformer.h.2.attn.c_proj.qzeros', 'transformer.h.0.attn.c_proj.g_idx', 'transformer.h.7.attn.c_attn.qweight', 'transformer.h.1.attn.c_attn.qzeros', 'transformer.h.6.attn.c_proj.qweight', 'transformer.h.3.mlp.c_fc.g_idx', 'transformer.h.0.attn.c_attn.scales', 'transformer.h.7.attn.c_proj.qzeros', 'transformer.h.5.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.scales', 'transformer.h.4.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_proj.g_idx', 'transformer.h.7.mlp.c_fc.qzeros', 'transformer.h.7.attn.c_attn.g_idx', 'transformer.h.1.attn.c_proj.scales', 'transformer.h.1.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.qweight', 'transformer.h.8.attn.c_attn.scales', 'transformer.h.9.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.scales', 'transformer.h.5.attn.c_proj.scales', 'transformer.h.11.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.g_idx', 'transformer.h.11.attn.c_attn.qweight', 'transformer.h.5.attn.c_attn.g_idx', 'transformer.h.9.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.qzeros', 'transformer.h.9.attn.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.g_idx', 'transformer.h.0.attn.c_proj.qzeros', 'transformer.h.11.attn.c_attn.qzeros', 'transformer.h.2.mlp.c_fc.g_idx', 'transformer.h.11.attn.c_attn.g_idx', 'transformer.h.2.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_attn.qweight', 'transformer.h.6.mlp.c_fc.g_idx', 'transformer.h.10.mlp.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qweight', 'transformer.h.5.attn.c_proj.qweight', 'transformer.h.8.mlp.c_proj.g_idx', 'transformer.h.10.attn.c_proj.qweight', 'transformer.h.3.mlp.c_proj.g_idx', 'transformer.h.7.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.g_idx', 'transformer.h.11.mlp.c_fc.qweight', 'transformer.h.7.attn.c_attn.scales', 'transformer.h.10.mlp.c_proj.qweight', 'transformer.h.10.mlp.c_fc.g_idx', 'transformer.h.3.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.g_idx', 'transformer.h.1.mlp.c_proj.qweight', 'transformer.h.11.mlp.c_proj.g_idx', 'transformer.h.10.mlp.c_fc.qweight', 'transformer.h.3.attn.c_attn.qzeros', 'transformer.h.11.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_fc.qweight', 'transformer.h.3.attn.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qzeros', 'transformer.h.10.attn.c_attn.scales', 'transformer.h.10.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_proj.qweight', 'transformer.h.10.attn.c_attn.g_idx', 'transformer.h.5.mlp.c_fc.sca
|
|||
|
"- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
|||
|
"- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
|||
|
"Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at mlabonne/gpt2-GPTQ-4bit and are newly initialized: ['transformer.h.0.mlp.c_fc.weight', 'transformer.h.3.mlp.c_proj.weight', 'transformer.h.9.mlp.c_proj.weight', 'transformer.h.4.attn.c_attn.weight', 'transformer.h.11.mlp.c_proj.weight', 'transformer.h.3.attn.c_attn.weight', 'transformer.h.7.attn.c_attn.weight', 'transformer.h.11.attn.c_attn.weight', 'transformer.h.4.attn.c_proj.weight', 'transformer.h.5.attn.c_attn.weight', 'transformer.h.7.mlp.c_proj.weight', 'transformer.h.11.mlp.c_fc.weight', 'transformer.h.7.mlp.c_fc.weight', 'transformer.h.5.mlp.c_proj.weight', 'transformer.h.9.attn.c_proj.weight', 'transformer.h.7.attn.c_proj.weight', 'transformer.h.6.mlp.c_fc.weight', 'transformer.h.4.mlp.c_fc.weight', 'transformer.h.8.mlp.c_proj.weight', 'transformer.h.2.mlp.c_proj.weight', 'transformer.h.6.attn.c_attn.weight', 'transformer.h.8.mlp.c_fc.weight', 'transformer.h.0.mlp.c_proj.weight', 'transformer.h.2.attn.c_attn.weight', 'transformer.h.6.attn.c_proj.weight', 'transformer.h.2.attn.c_proj.weight', 'transformer.h.8.attn.c_attn.weight', 'transformer.h.5.attn.c_proj.weight', 'transformer.h.3.mlp.c_fc.weight', 'transformer.h.8.attn.c_proj.weight', 'transformer.h.2.mlp.c_fc.weight', 'transformer.h.5.mlp.c_fc.weight', 'transformer.h.11.attn.c_proj.weight', 'transformer.h.3.attn.c_proj.weight', 'transformer.h.1.attn.c_proj.weight', 'transformer.h.10.attn.c_attn.weight', 'transformer.h.4.mlp.c_proj.weight', 'transformer.h.10.attn.c_proj.weight', 'transformer.h.10.mlp.c_fc.weight', 'transformer.h.9.mlp.c_fc.weight', 'transformer.h.6.mlp.c_proj.weight', 'transformer.h.9.attn.c_attn.weight', 'transformer.h.1.attn.c_attn.weight', 'transformer.h.1.mlp.c_fc.weight', 'transformer.h.1.mlp.c_proj.weight', 'transformer.h.0.attn.c_attn.weight', 'transformer.h.10.mlp.c_proj.weight', 'transformer.h.0.attn.c_proj.weight']\n",
|
|||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"output_type": "execute_result",
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'I have a dream,,,,, and,,,, and,,,,,,,,,,).,,,,,,,,,,,,,,,,,,,,,,,'"
|
|||
|
],
|
|||
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
|||
|
"type": "string"
|
|||
|
}
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"execution_count": 25
|
|||
|
}
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"input_text = \"I have a dream\"\n",
|
|||
|
"generate_text(input_text)"
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"id": "b_HOEjhUg6pG"
|
|||
|
},
|
|||
|
"execution_count": null,
|
|||
|
"outputs": []
|
|||
|
}
|
|||
|
]
|
|||
|
}
|