llm-course/GPT2_GPTQ_4bit.ipynb

1072 lines
47 KiB
Plaintext
Raw Normal View History

2023-06-23 11:35:50 +00:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyNC/p+CjyumuththfkLa9LG",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"9fdc7832238743f384543674f57a135d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_e856dd2f68714377b76493d5f428043d",
"IPY_MODEL_b45fac85d2034fcda9427c787124788d",
"IPY_MODEL_8ae387bcc1a3478eb5da50db9449e7a0"
],
"layout": "IPY_MODEL_653f5250de60427cb870fe823937e6af"
}
},
"e856dd2f68714377b76493d5f428043d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_06846d39e16d4c66bc4cc177666c959b",
"placeholder": "",
"style": "IPY_MODEL_0fcfa8e7621647abae79076a7aec2972",
"value": "Upload 1 LFS files: 100%"
}
},
"b45fac85d2034fcda9427c787124788d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_811878ae336b4d0c9ec8237fc37bb999",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0146ec198b8640b2aa30f8466e40597d",
"value": 1
}
},
"8ae387bcc1a3478eb5da50db9449e7a0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_769dd69f4f6f4af08d1b2d15522940ec",
"placeholder": "",
"style": "IPY_MODEL_f32db67266234c34aa77317776cbdc48",
"value": " 1/1 [00:13<00:00, 13.70s/it]"
}
},
"653f5250de60427cb870fe823937e6af": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"06846d39e16d4c66bc4cc177666c959b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0fcfa8e7621647abae79076a7aec2972": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"811878ae336b4d0c9ec8237fc37bb999": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0146ec198b8640b2aa30f8466e40597d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"769dd69f4f6f4af08d1b2d15522940ec": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f32db67266234c34aa77317776cbdc48": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"66a41698432f46de9eb325447917a389": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ca0783bbd2e1428d92ee78db13d0d64d",
"IPY_MODEL_ebbb561fa4cb42e1be414b6462949fd0",
"IPY_MODEL_d2ee702c234b4b0f903e92e09cb1c6dd"
],
"layout": "IPY_MODEL_8d25924e581e40a9a39a87d4d3d14221"
}
},
"ca0783bbd2e1428d92ee78db13d0d64d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2e97ccd3f59447db9892c3819252c57b",
"placeholder": "",
"style": "IPY_MODEL_1dbd60f5ee294c65b0bb9225d81bd8af",
"value": "gptq_model-4bit-128g.bin: 100%"
}
},
"ebbb561fa4cb42e1be414b6462949fd0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b0a9de27e4074778ab5a55a1f9d250cc",
"max": 123495975,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_bc79604dbd3242bab577154cca421b83",
"value": 123495975
}
},
"d2ee702c234b4b0f903e92e09cb1c6dd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8f3eab81268943a4935e04e502f95604",
"placeholder": "",
"style": "IPY_MODEL_6e4ed763e8c843dbbdbfd5ad0570d884",
"value": " 123M/123M [00:13<00:00, 12.0MB/s]"
}
},
"8d25924e581e40a9a39a87d4d3d14221": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2e97ccd3f59447db9892c3819252c57b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1dbd60f5ee294c65b0bb9225d81bd8af": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b0a9de27e4074778ab5a55a1f9d250cc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"bc79604dbd3242bab577154cca421b83": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"8f3eab81268943a4935e04e502f95604": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6e4ed763e8c843dbbdbfd5ad0570d884": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/GPT2_GPTQ_4bit.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Create a 4-bit GPT-2 model using AutoGPTQ\n",
"> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
"\n",
"❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
"\n",
"## Quantize model"
],
"metadata": {
"id": "yezrHxYvg_wR"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BhufqqQAaz6e"
},
"outputs": [],
"source": [
"!BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers huggingface_hub"
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
"from transformers import AutoTokenizer\n",
"\n",
"examples = [\n",
" \"In the wake of the Federal Reserve's recent decision, market analysts predict a shift in the stock market dynamics, urging investors to reassess their portfolios.\",\n",
" \"As quantum computing continues its rapid development, it promises to revolutionize fields such as cryptography and machine learning, posing a significant leap from classical computing.\",\n",
" \"The recent elections have brought a seismic shift in the political landscape, with the newly elected government pledging to focus on healthcare and education reform.\",\n",
" \"The Renaissance, a significant period in European history, was marked by a cultural rebirth and dramatic advances in art, science, and philosophical thought.\",\n",
" \"With the rise of machine learning and AI, Python has emerged as a dominant language in programming due to its simplicity and powerful libraries such as TensorFlow and PyTorch.\",\n",
" \"Jane Austen's 'Pride and Prejudice' continues to captivate readers with its intricate exploration of societal norms and the complexities of human relationships during the Regency era.\",\n",
" \"Following an intense season, the Golden State Warriors have emerged as the NBA champions, underscoring their remarkable team play and strategic finesse.\",\n",
" \"The latest Marvel film, 'Avengers: Infinity Gauntlet', has shattered box office records worldwide, reinforcing the global appeal of superhero narratives.\",\n",
" \"The increasing instances of wildfires and erratic weather patterns underscore the urgent need to address climate change and implement sustainable environmental practices.\",\n",
" \"In recent news, a breakthrough in the peace negotiations between the two countries has sparked hope for an end to the decade-long conflict.\",\n",
"]\n",
"\n",
"# Define base model and output directory\n",
"model_id = \"gpt2\"\n",
"out_dir = model_id + \"-GPTQ\"\n",
"\n",
"# Load quantize config, model and tokenizer\n",
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"\n",
"# Determine device\n",
"device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
"\n",
"# Tokenize examples\n",
"examples_ids = [tokenizer(text, truncation=True) for text in examples]\n",
"\n",
"# Quantize\n",
"model.quantize(\n",
" examples_ids,\n",
" use_triton=True,\n",
" autotune_warmup_after_quantized=True,\n",
" batch_size=1,\n",
")\n",
"\n",
"# Save model and tokenizer\n",
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=False)\n",
"model.save_quantized(model_id + \"-GPTQ\", use_safetensors=True)\n",
"tokenizer.save_pretrained(out_dir)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ETsG2iYrXaUg",
"outputId": "322feb57-c4bf-48aa-d29b-b71738e3edf1"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:auto_gptq.modeling._utils:using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.\n",
"100%|██████████| 11/11 [03:16<00:00, 17.87s/it]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('gpt2-GPTQ/tokenizer_config.json',\n",
" 'gpt2-GPTQ/special_tokens_map.json',\n",
" 'gpt2-GPTQ/vocab.json',\n",
" 'gpt2-GPTQ/merges.txt',\n",
" 'gpt2-GPTQ/added_tokens.json',\n",
" 'gpt2-GPTQ/tokenizer.json')"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"# Reload model and tokenizer\n",
"model = AutoGPTQForCausalLM.from_quantized(\n",
" out_dir,\n",
" use_triton=True,\n",
" device=device,\n",
" use_safetensors=True,\n",
")\n",
"tokenizer = AutoTokenizer.from_pretrained(out_dir)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nktu1FsdZ9sd",
"outputId": "8f0aaf4e-5fc5-42d1-eb33-220658edb8d0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
"WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def generate_text(input_text):\n",
" input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n",
" attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)\n",
"\n",
" output = model.to(device).generate(\n",
" inputs=input_ids,\n",
" attention_mask=attention_mask,\n",
" do_sample=True,\n",
" max_length=50,\n",
" top_k=50,\n",
" pad_token_id=tokenizer.eos_token_id\n",
" )\n",
" output = tokenizer.decode(output[0], skip_special_tokens=True)\n",
"\n",
" return output\n",
"\n",
"# Generate text\n",
"input_text = \"I have a dream\"\n",
"generate_text(input_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "KSIHpQ4XZ_7R",
"outputId": "e6f5c8a5-e3bf-4e52-d239-6b6f190e5475"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'I have a dream,,,,,,,,, at,--,,,,,,,,,,,,,,---,,,, ( (,//,,,,---'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "markdown",
"source": [
"## Save and load model using Hugging Face Hub"
],
"metadata": {
"id": "gV8hqGdYhLQH"
}
},
{
"cell_type": "code",
"source": [
"from huggingface_hub import notebook_login\n",
"from huggingface_hub import HfApi\n",
"import locale\n",
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
"\n",
"REPO_ID = \"insert your repo/model ID\" # example: \"mlabonne/gpt2-GPTQ-4bit\"\n",
"\n",
"notebook_login()\n",
"api = HfApi()\n",
"!git config --global credential.helper store\n",
"\n",
"api.upload_folder(\n",
" folder_path=out_dir,\n",
" repo_id=REPO_ID,\n",
" repo_type=\"model\",\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 100,
"referenced_widgets": [
"9fdc7832238743f384543674f57a135d",
"e856dd2f68714377b76493d5f428043d",
"b45fac85d2034fcda9427c787124788d",
"8ae387bcc1a3478eb5da50db9449e7a0",
"653f5250de60427cb870fe823937e6af",
"06846d39e16d4c66bc4cc177666c959b",
"0fcfa8e7621647abae79076a7aec2972",
"811878ae336b4d0c9ec8237fc37bb999",
"0146ec198b8640b2aa30f8466e40597d",
"769dd69f4f6f4af08d1b2d15522940ec",
"f32db67266234c34aa77317776cbdc48",
"66a41698432f46de9eb325447917a389",
"ca0783bbd2e1428d92ee78db13d0d64d",
"ebbb561fa4cb42e1be414b6462949fd0",
"d2ee702c234b4b0f903e92e09cb1c6dd",
"8d25924e581e40a9a39a87d4d3d14221",
"2e97ccd3f59447db9892c3819252c57b",
"1dbd60f5ee294c65b0bb9225d81bd8af",
"b0a9de27e4074778ab5a55a1f9d250cc",
"bc79604dbd3242bab577154cca421b83",
"8f3eab81268943a4935e04e502f95604",
"6e4ed763e8c843dbbdbfd5ad0570d884"
]
},
"id": "OKTxY6jQaDMv",
"outputId": "f786a135-ca33-486f-88b8-9795fdb8e713"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload 1 LFS files: 0%| | 0/1 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9fdc7832238743f384543674f57a135d"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"gptq_model-4bit-128g.bin: 0%| | 0.00/123M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "66a41698432f46de9eb325447917a389"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'https://huggingface.co/mlabonne/gpt2-GPTQ-4bit/tree/main/'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
"from transformers import AutoTokenizer\n",
"\n",
"model_id = REPO_ID\n",
"quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
"model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 146
},
"id": "5EUZJpdEfxNz",
"outputId": "7b7c2b23-fccd-47c1-e40e-1b544550da99"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of the model checkpoint at mlabonne/gpt2-GPTQ-4bit were not used when initializing GPT2LMHeadModel: ['transformer.h.11.attn.c_proj.qweight', 'transformer.h.10.attn.c_proj.g_idx', 'transformer.h.4.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_proj.qweight', 'transformer.h.3.attn.c_proj.scales', 'transformer.h.9.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qweight', 'transformer.h.4.attn.c_proj.scales', 'transformer.h.9.mlp.c_fc.g_idx', 'transformer.h.10.attn.c_attn.qweight', 'transformer.h.4.mlp.c_proj.scales', 'transformer.h.9.mlp.c_proj.qzeros', 'transformer.h.9.attn.c_attn.scales', 'transformer.h.0.attn.c_proj.scales', 'transformer.h.4.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qzeros', 'transformer.h.2.mlp.c_proj.qweight', 'transformer.h.9.mlp.c_proj.qweight', 'transformer.h.3.mlp.c_fc.scales', 'transformer.h.8.attn.c_attn.qzeros', 'transformer.h.1.attn.c_attn.scales', 'transformer.h.1.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qweight', 'transformer.h.8.attn.c_proj.qzeros', 'transformer.h.2.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.scales', 'transformer.h.5.mlp.c_fc.qzeros', 'transformer.h.4.mlp.c_fc.scales', 'transformer.h.6.attn.c_attn.qzeros', 'transformer.h.9.attn.c_attn.qzeros', 'transformer.h.1.mlp.c_fc.g_idx', 'transformer.h.8.attn.c_proj.scales', 'transformer.h.10.attn.c_proj.scales', 'transformer.h.2.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.scales', 'transformer.h.0.mlp.c_fc.qzeros', 'transformer.h.7.mlp.c_proj.g_idx', 'transformer.h.1.attn.c_proj.qweight', 'transformer.h.2.attn.c_proj.qzeros', 'transformer.h.0.attn.c_proj.g_idx', 'transformer.h.7.attn.c_attn.qweight', 'transformer.h.1.attn.c_attn.qzeros', 'transformer.h.6.attn.c_proj.qweight', 'transformer.h.3.mlp.c_fc.g_idx', 'transformer.h.0.attn.c_attn.scales', 'transformer.h.7.attn.c_proj.qzeros', 'transformer.h.5.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.scales', 'transformer.h.4.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_proj.g_idx', 'transformer.h.7.mlp.c_fc.qzeros', 'transformer.h.7.attn.c_attn.g_idx', 'transformer.h.1.attn.c_proj.scales', 'transformer.h.1.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.qweight', 'transformer.h.8.attn.c_attn.scales', 'transformer.h.9.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.scales', 'transformer.h.5.attn.c_proj.scales', 'transformer.h.11.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.g_idx', 'transformer.h.11.attn.c_attn.qweight', 'transformer.h.5.attn.c_attn.g_idx', 'transformer.h.9.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.qzeros', 'transformer.h.9.attn.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.g_idx', 'transformer.h.0.attn.c_proj.qzeros', 'transformer.h.11.attn.c_attn.qzeros', 'transformer.h.2.mlp.c_fc.g_idx', 'transformer.h.11.attn.c_attn.g_idx', 'transformer.h.2.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_attn.qweight', 'transformer.h.6.mlp.c_fc.g_idx', 'transformer.h.10.mlp.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qweight', 'transformer.h.5.attn.c_proj.qweight', 'transformer.h.8.mlp.c_proj.g_idx', 'transformer.h.10.attn.c_proj.qweight', 'transformer.h.3.mlp.c_proj.g_idx', 'transformer.h.7.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.g_idx', 'transformer.h.11.mlp.c_fc.qweight', 'transformer.h.7.attn.c_attn.scales', 'transformer.h.10.mlp.c_proj.qweight', 'transformer.h.10.mlp.c_fc.g_idx', 'transformer.h.3.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.g_idx', 'transformer.h.1.mlp.c_proj.qweight', 'transformer.h.11.mlp.c_proj.g_idx', 'transformer.h.10.mlp.c_fc.qweight', 'transformer.h.3.attn.c_attn.qzeros', 'transformer.h.11.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_fc.qweight', 'transformer.h.3.attn.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qzeros', 'transformer.h.10.attn.c_attn.scales', 'transformer.h.10.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_proj.qweight', 'transformer.h.10.attn.c_attn.g_idx', 'transformer.h.5.mlp.c_fc.sca
"- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at mlabonne/gpt2-GPTQ-4bit and are newly initialized: ['transformer.h.0.mlp.c_fc.weight', 'transformer.h.3.mlp.c_proj.weight', 'transformer.h.9.mlp.c_proj.weight', 'transformer.h.4.attn.c_attn.weight', 'transformer.h.11.mlp.c_proj.weight', 'transformer.h.3.attn.c_attn.weight', 'transformer.h.7.attn.c_attn.weight', 'transformer.h.11.attn.c_attn.weight', 'transformer.h.4.attn.c_proj.weight', 'transformer.h.5.attn.c_attn.weight', 'transformer.h.7.mlp.c_proj.weight', 'transformer.h.11.mlp.c_fc.weight', 'transformer.h.7.mlp.c_fc.weight', 'transformer.h.5.mlp.c_proj.weight', 'transformer.h.9.attn.c_proj.weight', 'transformer.h.7.attn.c_proj.weight', 'transformer.h.6.mlp.c_fc.weight', 'transformer.h.4.mlp.c_fc.weight', 'transformer.h.8.mlp.c_proj.weight', 'transformer.h.2.mlp.c_proj.weight', 'transformer.h.6.attn.c_attn.weight', 'transformer.h.8.mlp.c_fc.weight', 'transformer.h.0.mlp.c_proj.weight', 'transformer.h.2.attn.c_attn.weight', 'transformer.h.6.attn.c_proj.weight', 'transformer.h.2.attn.c_proj.weight', 'transformer.h.8.attn.c_attn.weight', 'transformer.h.5.attn.c_proj.weight', 'transformer.h.3.mlp.c_fc.weight', 'transformer.h.8.attn.c_proj.weight', 'transformer.h.2.mlp.c_fc.weight', 'transformer.h.5.mlp.c_fc.weight', 'transformer.h.11.attn.c_proj.weight', 'transformer.h.3.attn.c_proj.weight', 'transformer.h.1.attn.c_proj.weight', 'transformer.h.10.attn.c_attn.weight', 'transformer.h.4.mlp.c_proj.weight', 'transformer.h.10.attn.c_proj.weight', 'transformer.h.10.mlp.c_fc.weight', 'transformer.h.9.mlp.c_fc.weight', 'transformer.h.6.mlp.c_proj.weight', 'transformer.h.9.attn.c_attn.weight', 'transformer.h.1.attn.c_attn.weight', 'transformer.h.1.mlp.c_fc.weight', 'transformer.h.1.mlp.c_proj.weight', 'transformer.h.0.attn.c_attn.weight', 'transformer.h.10.mlp.c_proj.weight', 'transformer.h.0.attn.c_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'I have a dream,,,,, and,,,, and,,,,,,,,,,).,,,,,,,,,,,,,,,,,,,,,,,'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"source": [
"input_text = \"I have a dream\"\n",
"generate_text(input_text)"
],
"metadata": {
"id": "b_HOEjhUg6pG"
},
"execution_count": null,
"outputs": []
}
]
}