llm-course/GPT2_GPTQ_4bit.ipynb
2023-06-23 12:35:50 +01:00

1072 lines
47 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyNC/p+CjyumuththfkLa9LG",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"9fdc7832238743f384543674f57a135d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_e856dd2f68714377b76493d5f428043d",
"IPY_MODEL_b45fac85d2034fcda9427c787124788d",
"IPY_MODEL_8ae387bcc1a3478eb5da50db9449e7a0"
],
"layout": "IPY_MODEL_653f5250de60427cb870fe823937e6af"
}
},
"e856dd2f68714377b76493d5f428043d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_06846d39e16d4c66bc4cc177666c959b",
"placeholder": "",
"style": "IPY_MODEL_0fcfa8e7621647abae79076a7aec2972",
"value": "Upload 1 LFS files: 100%"
}
},
"b45fac85d2034fcda9427c787124788d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_811878ae336b4d0c9ec8237fc37bb999",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0146ec198b8640b2aa30f8466e40597d",
"value": 1
}
},
"8ae387bcc1a3478eb5da50db9449e7a0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_769dd69f4f6f4af08d1b2d15522940ec",
"placeholder": "",
"style": "IPY_MODEL_f32db67266234c34aa77317776cbdc48",
"value": " 1/1 [00:13<00:00, 13.70s/it]"
}
},
"653f5250de60427cb870fe823937e6af": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"06846d39e16d4c66bc4cc177666c959b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0fcfa8e7621647abae79076a7aec2972": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"811878ae336b4d0c9ec8237fc37bb999": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0146ec198b8640b2aa30f8466e40597d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"769dd69f4f6f4af08d1b2d15522940ec": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f32db67266234c34aa77317776cbdc48": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"66a41698432f46de9eb325447917a389": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ca0783bbd2e1428d92ee78db13d0d64d",
"IPY_MODEL_ebbb561fa4cb42e1be414b6462949fd0",
"IPY_MODEL_d2ee702c234b4b0f903e92e09cb1c6dd"
],
"layout": "IPY_MODEL_8d25924e581e40a9a39a87d4d3d14221"
}
},
"ca0783bbd2e1428d92ee78db13d0d64d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2e97ccd3f59447db9892c3819252c57b",
"placeholder": "",
"style": "IPY_MODEL_1dbd60f5ee294c65b0bb9225d81bd8af",
"value": "gptq_model-4bit-128g.bin: 100%"
}
},
"ebbb561fa4cb42e1be414b6462949fd0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b0a9de27e4074778ab5a55a1f9d250cc",
"max": 123495975,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_bc79604dbd3242bab577154cca421b83",
"value": 123495975
}
},
"d2ee702c234b4b0f903e92e09cb1c6dd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8f3eab81268943a4935e04e502f95604",
"placeholder": "",
"style": "IPY_MODEL_6e4ed763e8c843dbbdbfd5ad0570d884",
"value": " 123M/123M [00:13<00:00, 12.0MB/s]"
}
},
"8d25924e581e40a9a39a87d4d3d14221": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2e97ccd3f59447db9892c3819252c57b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1dbd60f5ee294c65b0bb9225d81bd8af": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b0a9de27e4074778ab5a55a1f9d250cc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"bc79604dbd3242bab577154cca421b83": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"8f3eab81268943a4935e04e502f95604": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6e4ed763e8c843dbbdbfd5ad0570d884": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/GPT2_GPTQ_4bit.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Create a 4-bit GPT-2 model using AutoGPTQ\n",
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
"\n",
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
"\n",
"## Quantize model"
],
"metadata": {
"id": "yezrHxYvg_wR"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BhufqqQAaz6e"
},
"outputs": [],
"source": [
"!BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers huggingface_hub"
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
"from transformers import AutoTokenizer\n",
"\n",
"examples = [\n",
" \"In the wake of the Federal Reserve's recent decision, market analysts predict a shift in the stock market dynamics, urging investors to reassess their portfolios.\",\n",
" \"As quantum computing continues its rapid development, it promises to revolutionize fields such as cryptography and machine learning, posing a significant leap from classical computing.\",\n",
" \"The recent elections have brought a seismic shift in the political landscape, with the newly elected government pledging to focus on healthcare and education reform.\",\n",
" \"The Renaissance, a significant period in European history, was marked by a cultural rebirth and dramatic advances in art, science, and philosophical thought.\",\n",
" \"With the rise of machine learning and AI, Python has emerged as a dominant language in programming due to its simplicity and powerful libraries such as TensorFlow and PyTorch.\",\n",
" \"Jane Austen's 'Pride and Prejudice' continues to captivate readers with its intricate exploration of societal norms and the complexities of human relationships during the Regency era.\",\n",
" \"Following an intense season, the Golden State Warriors have emerged as the NBA champions, underscoring their remarkable team play and strategic finesse.\",\n",
" \"The latest Marvel film, 'Avengers: Infinity Gauntlet', has shattered box office records worldwide, reinforcing the global appeal of superhero narratives.\",\n",
" \"The increasing instances of wildfires and erratic weather patterns underscore the urgent need to address climate change and implement sustainable environmental practices.\",\n",
" \"In recent news, a breakthrough in the peace negotiations between the two countries has sparked hope for an end to the decade-long conflict.\",\n",
"]\n",
"\n",
"# Define base model and output directory\n",
"model_id = \"gpt2\"\n",
"out_dir = model_id + \"-GPTQ\"\n",
"\n",
"# Load quantize config, model and tokenizer\n",
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"\n",
"# Determine device\n",
"device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
"\n",
"# Tokenize examples\n",
"examples_ids = [tokenizer(text, truncation=True) for text in examples]\n",
"\n",
"# Quantize\n",
"model.quantize(\n",
" examples_ids,\n",
" use_triton=True,\n",
" autotune_warmup_after_quantized=True,\n",
" batch_size=1,\n",
")\n",
"\n",
"# Save model and tokenizer\n",
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=False)\n",
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=True)\n",
"tokenizer.save_pretrained(out_dir)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ETsG2iYrXaUg",
"outputId": "322feb57-c4bf-48aa-d29b-b71738e3edf1"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:auto_gptq.modeling._utils:using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.\n",
"100%|██████████| 11/11 [03:16<00:00, 17.87s/it]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('gpt2-GPTQ/tokenizer_config.json',\n",
" 'gpt2-GPTQ/special_tokens_map.json',\n",
" 'gpt2-GPTQ/vocab.json',\n",
" 'gpt2-GPTQ/merges.txt',\n",
" 'gpt2-GPTQ/added_tokens.json',\n",
" 'gpt2-GPTQ/tokenizer.json')"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"# Reload model and tokenizer\n",
"model = AutoGPTQForCausalLM.from_quantized(\n",
" out_dir,\n",
" use_triton=True,\n",
" device=device,\n",
" use_safetensors=True,\n",
")\n",
"tokenizer = AutoTokenizer.from_pretrained(out_dir)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nktu1FsdZ9sd",
"outputId": "8f0aaf4e-5fc5-42d1-eb33-220658edb8d0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def generate_text(input_text):\n",
" input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n",
" attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)\n",
"\n",
" output = model.to(device).generate(\n",
" inputs=input_ids,\n",
" attention_mask=attention_mask,\n",
" do_sample=True,\n",
" max_length=50,\n",
" top_k=50,\n",
" pad_token_id=tokenizer.eos_token_id\n",
" )\n",
" output = tokenizer.decode(output[0], skip_special_tokens=True)\n",
"\n",
" return output\n",
"\n",
"# Generate text\n",
"input_text = \"I have a dream\"\n",
"generate_text(input_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "KSIHpQ4XZ_7R",
"outputId": "e6f5c8a5-e3bf-4e52-d239-6b6f190e5475"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'I have a dream,,,,,,,,, at,--,,,,,,,,,,,,,,---,,,, ( (,//,,,,---'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "markdown",
"source": [
"## Save and load model using Hugging Face Hub"
],
"metadata": {
"id": "gV8hqGdYhLQH"
}
},
{
"cell_type": "code",
"source": [
"from huggingface_hub import notebook_login\n",
"from huggingface_hub import HfApi\n",
"import locale\n",
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
"\n",
"REPO_ID = \"insert your repo/model ID\" # example: \"mlabonne/gpt2-GPTQ-4bit\"\n",
"\n",
"notebook_login()\n",
"api = HfApi()\n",
"!git config --global credential.helper store\n",
"\n",
"api.upload_folder(\n",
" folder_path=out_dir,\n",
" repo_id=REPO_ID,\n",
" repo_type=\"model\",\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 100,
"referenced_widgets": [
"9fdc7832238743f384543674f57a135d",
"e856dd2f68714377b76493d5f428043d",
"b45fac85d2034fcda9427c787124788d",
"8ae387bcc1a3478eb5da50db9449e7a0",
"653f5250de60427cb870fe823937e6af",
"06846d39e16d4c66bc4cc177666c959b",
"0fcfa8e7621647abae79076a7aec2972",
"811878ae336b4d0c9ec8237fc37bb999",
"0146ec198b8640b2aa30f8466e40597d",
"769dd69f4f6f4af08d1b2d15522940ec",
"f32db67266234c34aa77317776cbdc48",
"66a41698432f46de9eb325447917a389",
"ca0783bbd2e1428d92ee78db13d0d64d",
"ebbb561fa4cb42e1be414b6462949fd0",
"d2ee702c234b4b0f903e92e09cb1c6dd",
"8d25924e581e40a9a39a87d4d3d14221",
"2e97ccd3f59447db9892c3819252c57b",
"1dbd60f5ee294c65b0bb9225d81bd8af",
"b0a9de27e4074778ab5a55a1f9d250cc",
"bc79604dbd3242bab577154cca421b83",
"8f3eab81268943a4935e04e502f95604",
"6e4ed763e8c843dbbdbfd5ad0570d884"
]
},
"id": "OKTxY6jQaDMv",
"outputId": "f786a135-ca33-486f-88b8-9795fdb8e713"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload 1 LFS files: 0%| | 0/1 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9fdc7832238743f384543674f57a135d"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"gptq_model-4bit-128g.bin: 0%| | 0.00/123M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "66a41698432f46de9eb325447917a389"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'https://huggingface.co/mlabonne/gpt2-GPTQ-4bit/tree/main/'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
"from transformers import AutoTokenizer\n",
"\n",
"model_id = REPO_ID\n",
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 146
},
"id": "5EUZJpdEfxNz",
"outputId": "7b7c2b23-fccd-47c1-e40e-1b544550da99"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of the model checkpoint at mlabonne/gpt2-GPTQ-4bit were not used when initializing GPT2LMHeadModel: ['transformer.h.11.attn.c_proj.qweight', 'transformer.h.10.attn.c_proj.g_idx', 'transformer.h.4.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_proj.qweight', 'transformer.h.3.attn.c_proj.scales', 'transformer.h.9.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qweight', 'transformer.h.4.attn.c_proj.scales', 'transformer.h.9.mlp.c_fc.g_idx', 'transformer.h.10.attn.c_attn.qweight', 'transformer.h.4.mlp.c_proj.scales', 'transformer.h.9.mlp.c_proj.qzeros', 'transformer.h.9.attn.c_attn.scales', 'transformer.h.0.attn.c_proj.scales', 'transformer.h.4.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qzeros', 'transformer.h.2.mlp.c_proj.qweight', 'transformer.h.9.mlp.c_proj.qweight', 'transformer.h.3.mlp.c_fc.scales', 'transformer.h.8.attn.c_attn.qzeros', 'transformer.h.1.attn.c_attn.scales', 'transformer.h.1.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qweight', 'transformer.h.8.attn.c_proj.qzeros', 'transformer.h.2.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.scales', 'transformer.h.5.mlp.c_fc.qzeros', 'transformer.h.4.mlp.c_fc.scales', 'transformer.h.6.attn.c_attn.qzeros', 'transformer.h.9.attn.c_attn.qzeros', 'transformer.h.1.mlp.c_fc.g_idx', 'transformer.h.8.attn.c_proj.scales', 'transformer.h.10.attn.c_proj.scales', 'transformer.h.2.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.scales', 'transformer.h.0.mlp.c_fc.qzeros', 'transformer.h.7.mlp.c_proj.g_idx', 'transformer.h.1.attn.c_proj.qweight', 'transformer.h.2.attn.c_proj.qzeros', 'transformer.h.0.attn.c_proj.g_idx', 'transformer.h.7.attn.c_attn.qweight', 'transformer.h.1.attn.c_attn.qzeros', 'transformer.h.6.attn.c_proj.qweight', 'transformer.h.3.mlp.c_fc.g_idx', 'transformer.h.0.attn.c_attn.scales', 'transformer.h.7.attn.c_proj.qzeros', 'transformer.h.5.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.scales', 'transformer.h.4.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_proj.g_idx', 'transformer.h.7.mlp.c_fc.qzeros', 'transformer.h.7.attn.c_attn.g_idx', 'transformer.h.1.attn.c_proj.scales', 'transformer.h.1.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.qweight', 'transformer.h.8.attn.c_attn.scales', 'transformer.h.9.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.scales', 'transformer.h.5.attn.c_proj.scales', 'transformer.h.11.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.g_idx', 'transformer.h.11.attn.c_attn.qweight', 'transformer.h.5.attn.c_attn.g_idx', 'transformer.h.9.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.qzeros', 'transformer.h.9.attn.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.g_idx', 'transformer.h.0.attn.c_proj.qzeros', 'transformer.h.11.attn.c_attn.qzeros', 'transformer.h.2.mlp.c_fc.g_idx', 'transformer.h.11.attn.c_attn.g_idx', 'transformer.h.2.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_attn.qweight', 'transformer.h.6.mlp.c_fc.g_idx', 'transformer.h.10.mlp.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qweight', 'transformer.h.5.attn.c_proj.qweight', 'transformer.h.8.mlp.c_proj.g_idx', 'transformer.h.10.attn.c_proj.qweight', 'transformer.h.3.mlp.c_proj.g_idx', 'transformer.h.7.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.g_idx', 'transformer.h.11.mlp.c_fc.qweight', 'transformer.h.7.attn.c_attn.scales', 'transformer.h.10.mlp.c_proj.qweight', 'transformer.h.10.mlp.c_fc.g_idx', 'transformer.h.3.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.g_idx', 'transformer.h.1.mlp.c_proj.qweight', 'transformer.h.11.mlp.c_proj.g_idx', 'transformer.h.10.mlp.c_fc.qweight', 'transformer.h.3.attn.c_attn.qzeros', 'transformer.h.11.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_fc.qweight', 'transformer.h.3.attn.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qzeros', 'transformer.h.10.attn.c_attn.scales', 'transformer.h.10.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_proj.qweight', 'transformer.h.10.attn.c_attn.g_idx', 'transformer.h.5.mlp.c_fc.scales', 'transformer.h.6.mlp.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.scales', 'transformer.h.0.mlp.c_proj.scales', 'transformer.h.0.mlp.c_proj.qzeros', 'transformer.h.2.attn.c_attn.qzeros', 'transformer.h.0.mlp.c_fc.qweight', 'transformer.h.1.attn.c_proj.g_idx', 'transformer.h.6.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.qweight', 'transformer.h.2.attn.c_attn.qweight', 'transformer.h.3.attn.c_attn.scales', 'transformer.h.9.mlp.c_fc.scales', 'transformer.h.11.mlp.c_proj.qweight', 'transformer.h.11.attn.c_proj.scales', 'transformer.h.10.attn.c_attn.qzeros', 'transformer.h.11.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.qzeros', 'transformer.h.5.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.scales', 'transformer.h.5.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_attn.g_idx', 'transformer.h.6.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.qweight', 'transformer.h.6.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.qweight', 'transformer.h.2.mlp.c_fc.qweight', 'transformer.h.2.mlp.c_proj.scales', 'transformer.h.11.mlp.c_fc.qzeros', 'transformer.h.6.mlp.c_proj.qweight', 'transformer.h.8.attn.c_attn.qweight', 'transformer.h.8.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_attn.qzeros', 'transformer.h.8.mlp.c_proj.scales', 'transformer.h.1.mlp.c_fc.scales', 'transformer.h.1.mlp.c_proj.scales', 'transformer.h.0.mlp.c_fc.scales', 'transformer.h.9.attn.c_proj.scales', 'transformer.h.7.mlp.c_fc.g_idx', 'transformer.h.1.attn.c_proj.qzeros', 'transformer.h.2.attn.c_proj.g_idx', 'transformer.h.6.mlp.c_fc.qweight', 'transformer.h.7.mlp.c_proj.qzeros', 'transformer.h.6.mlp.c_proj.scales', 'transformer.h.7.mlp.c_proj.scales', 'transformer.h.2.attn.c_attn.g_idx', 'transformer.h.11.mlp.c_proj.scales', 'transformer.h.10.mlp.c_proj.g_idx', 'transformer.h.11.attn.c_attn.scales', 'transformer.h.1.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_proj.scales', 'transformer.h.10.mlp.c_proj.scales', 'transformer.h.5.attn.c_attn.qzeros', 'transformer.h.5.mlp.c_proj.scales', 'transformer.h.0.attn.c_proj.qweight', 'transformer.h.4.mlp.c_proj.g_idx', 'transformer.h.6.attn.c_proj.qzeros', 'transformer.h.5.mlp.c_fc.qweight', 'transformer.h.2.mlp.c_fc.scales', 'transformer.h.10.mlp.c_fc.scales', 'transformer.h.5.attn.c_proj.g_idx', 'transformer.h.0.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.g_idx', 'transformer.h.5.attn.c_attn.scales', 'transformer.h.1.mlp.c_proj.qzeros', 'transformer.h.4.attn.c_attn.g_idx', 'transformer.h.4.attn.c_proj.qweight', 'transformer.h.4.mlp.c_fc.qweight', 'transformer.h.7.mlp.c_proj.qweight', 'transformer.h.11.attn.c_proj.qzeros', 'transformer.h.0.mlp.c_proj.g_idx', 'transformer.h.9.attn.c_attn.qweight', 'transformer.h.11.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_proj.scales', 'transformer.h.8.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.g_idx', 'transformer.h.2.mlp.c_proj.g_idx', 'transformer.h.4.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qweight', 'transformer.h.6.mlp.c_proj.g_idx', 'transformer.h.4.mlp.c_fc.qzeros', 'transformer.h.3.mlp.c_fc.qweight', 'transformer.h.10.attn.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_proj.qweight']\n",
"- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at mlabonne/gpt2-GPTQ-4bit and are newly initialized: ['transformer.h.0.mlp.c_fc.weight', 'transformer.h.3.mlp.c_proj.weight', 'transformer.h.9.mlp.c_proj.weight', 'transformer.h.4.attn.c_attn.weight', 'transformer.h.11.mlp.c_proj.weight', 'transformer.h.3.attn.c_attn.weight', 'transformer.h.7.attn.c_attn.weight', 'transformer.h.11.attn.c_attn.weight', 'transformer.h.4.attn.c_proj.weight', 'transformer.h.5.attn.c_attn.weight', 'transformer.h.7.mlp.c_proj.weight', 'transformer.h.11.mlp.c_fc.weight', 'transformer.h.7.mlp.c_fc.weight', 'transformer.h.5.mlp.c_proj.weight', 'transformer.h.9.attn.c_proj.weight', 'transformer.h.7.attn.c_proj.weight', 'transformer.h.6.mlp.c_fc.weight', 'transformer.h.4.mlp.c_fc.weight', 'transformer.h.8.mlp.c_proj.weight', 'transformer.h.2.mlp.c_proj.weight', 'transformer.h.6.attn.c_attn.weight', 'transformer.h.8.mlp.c_fc.weight', 'transformer.h.0.mlp.c_proj.weight', 'transformer.h.2.attn.c_attn.weight', 'transformer.h.6.attn.c_proj.weight', 'transformer.h.2.attn.c_proj.weight', 'transformer.h.8.attn.c_attn.weight', 'transformer.h.5.attn.c_proj.weight', 'transformer.h.3.mlp.c_fc.weight', 'transformer.h.8.attn.c_proj.weight', 'transformer.h.2.mlp.c_fc.weight', 'transformer.h.5.mlp.c_fc.weight', 'transformer.h.11.attn.c_proj.weight', 'transformer.h.3.attn.c_proj.weight', 'transformer.h.1.attn.c_proj.weight', 'transformer.h.10.attn.c_attn.weight', 'transformer.h.4.mlp.c_proj.weight', 'transformer.h.10.attn.c_proj.weight', 'transformer.h.10.mlp.c_fc.weight', 'transformer.h.9.mlp.c_fc.weight', 'transformer.h.6.mlp.c_proj.weight', 'transformer.h.9.attn.c_attn.weight', 'transformer.h.1.attn.c_attn.weight', 'transformer.h.1.mlp.c_fc.weight', 'transformer.h.1.mlp.c_proj.weight', 'transformer.h.0.attn.c_attn.weight', 'transformer.h.10.mlp.c_proj.weight', 'transformer.h.0.attn.c_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'I have a dream,,,,, and,,,, and,,,,,,,,,,).,,,,,,,,,,,,,,,,,,,,,,,'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"source": [
"input_text = \"I have a dream\"\n",
"generate_text(input_text)"
],
"metadata": {
"id": "b_HOEjhUg6pG"
},
"execution_count": null,
"outputs": []
}
]
}