llm-course/GPT2_GPTQ_4bit.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyNC/p+CjyumuththfkLa9LG",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "9fdc7832238743f384543674f57a135d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e856dd2f68714377b76493d5f428043d",
              "IPY_MODEL_b45fac85d2034fcda9427c787124788d",
              "IPY_MODEL_8ae387bcc1a3478eb5da50db9449e7a0"
            ],
            "layout": "IPY_MODEL_653f5250de60427cb870fe823937e6af"
          }
        },
        "e856dd2f68714377b76493d5f428043d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_06846d39e16d4c66bc4cc177666c959b",
            "placeholder": "",
            "style": "IPY_MODEL_0fcfa8e7621647abae79076a7aec2972",
            "value": "Upload 1 LFS files: 100%"
          }
        },
        "b45fac85d2034fcda9427c787124788d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_811878ae336b4d0c9ec8237fc37bb999",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0146ec198b8640b2aa30f8466e40597d",
            "value": 1
          }
        },
        "8ae387bcc1a3478eb5da50db9449e7a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_769dd69f4f6f4af08d1b2d15522940ec",
            "placeholder": "",
            "style": "IPY_MODEL_f32db67266234c34aa77317776cbdc48",
            "value": " 1/1 [00:13&lt;00:00, 13.70s/it]"
          }
        },
        "653f5250de60427cb870fe823937e6af": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "06846d39e16d4c66bc4cc177666c959b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0fcfa8e7621647abae79076a7aec2972": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "811878ae336b4d0c9ec8237fc37bb999": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0146ec198b8640b2aa30f8466e40597d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "769dd69f4f6f4af08d1b2d15522940ec": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f32db67266234c34aa77317776cbdc48": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "66a41698432f46de9eb325447917a389": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ca0783bbd2e1428d92ee78db13d0d64d",
              "IPY_MODEL_ebbb561fa4cb42e1be414b6462949fd0",
              "IPY_MODEL_d2ee702c234b4b0f903e92e09cb1c6dd"
            ],
            "layout": "IPY_MODEL_8d25924e581e40a9a39a87d4d3d14221"
          }
        },
        "ca0783bbd2e1428d92ee78db13d0d64d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2e97ccd3f59447db9892c3819252c57b",
            "placeholder": "",
            "style": "IPY_MODEL_1dbd60f5ee294c65b0bb9225d81bd8af",
            "value": "gptq_model-4bit-128g.bin: 100%"
          }
        },
        "ebbb561fa4cb42e1be414b6462949fd0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b0a9de27e4074778ab5a55a1f9d250cc",
            "max": 123495975,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_bc79604dbd3242bab577154cca421b83",
            "value": 123495975
          }
        },
        "d2ee702c234b4b0f903e92e09cb1c6dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8f3eab81268943a4935e04e502f95604",
            "placeholder": "",
            "style": "IPY_MODEL_6e4ed763e8c843dbbdbfd5ad0570d884",
            "value": " 123M/123M [00:13&lt;00:00, 12.0MB/s]"
          }
        },
        "8d25924e581e40a9a39a87d4d3d14221": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e97ccd3f59447db9892c3819252c57b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1dbd60f5ee294c65b0bb9225d81bd8af": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b0a9de27e4074778ab5a55a1f9d250cc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bc79604dbd3242bab577154cca421b83": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8f3eab81268943a4935e04e502f95604": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6e4ed763e8c843dbbdbfd5ad0570d884": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/GPT2_GPTQ_4bit.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Create a 4-bit GPT-2 model using AutoGPTQ\n",
        "> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
        "\n",
        "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
        "\n",
        "## Quantize model"
      ],
      "metadata": {
        "id": "yezrHxYvg_wR"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BhufqqQAaz6e"
      },
      "outputs": [],
      "source": [
        "!BUILD_CUDA_EXT=0 pip install -q auto-gptq transformers huggingface_hub"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
        "from transformers import AutoTokenizer\n",
        "\n",
        "examples = [\n",
        "    \"In the wake of the Federal Reserve's recent decision, market analysts predict a shift in the stock market dynamics, urging investors to reassess their portfolios.\",\n",
        "    \"As quantum computing continues its rapid development, it promises to revolutionize fields such as cryptography and machine learning, posing a significant leap from classical computing.\",\n",
        "    \"The recent elections have brought a seismic shift in the political landscape, with the newly elected government pledging to focus on healthcare and education reform.\",\n",
        "    \"The Renaissance, a significant period in European history, was marked by a cultural rebirth and dramatic advances in art, science, and philosophical thought.\",\n",
        "    \"With the rise of machine learning and AI, Python has emerged as a dominant language in programming due to its simplicity and powerful libraries such as TensorFlow and PyTorch.\",\n",
        "    \"Jane Austen's 'Pride and Prejudice' continues to captivate readers with its intricate exploration of societal norms and the complexities of human relationships during the Regency era.\",\n",
        "    \"Following an intense season, the Golden State Warriors have emerged as the NBA champions, underscoring their remarkable team play and strategic finesse.\",\n",
        "    \"The latest Marvel film, 'Avengers: Infinity Gauntlet', has shattered box office records worldwide, reinforcing the global appeal of superhero narratives.\",\n",
        "    \"The increasing instances of wildfires and erratic weather patterns underscore the urgent need to address climate change and implement sustainable environmental practices.\",\n",
        "    \"In recent news, a breakthrough in the peace negotiations between the two countries has sparked hope for an end to the decade-long conflict.\",\n",
        "]\n",
        "\n",
        "# Define base model and output directory\n",
        "model_id = \"gpt2\"\n",
        "out_dir = model_id + \"-GPTQ\"\n",
        "\n",
        "# Load quantize config, model and tokenizer\n",
        "quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
        "model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
        "\n",
        "# Determine device\n",
        "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
        "\n",
        "# Tokenize examples\n",
        "examples_ids = [tokenizer(text, truncation=True) for text in examples]\n",
        "\n",
        "# Quantize\n",
        "model.quantize(\n",
        "  examples_ids,\n",
        "  use_triton=True,\n",
        "  autotune_warmup_after_quantized=True,\n",
        "  batch_size=1,\n",
        ")\n",
        "\n",
        "# Save model and tokenizer\n",
        "model.save_quantized(model_id + \"-GPTQ\", use_safetensors=False)\n",
        "model.save_quantized(model_id + \"-GPTQ\", use_safetensors=True)\n",
        "tokenizer.save_pretrained(out_dir)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ETsG2iYrXaUg",
        "outputId": "322feb57-c4bf-48aa-d29b-b71738e3edf1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:auto_gptq.modeling._utils:using autotune_warmup will move model to GPU, make sure you have enough VRAM to load the whole model.\n",
            "100%|██████████| 11/11 [03:16<00:00, 17.87s/it]\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "('gpt2-GPTQ/tokenizer_config.json',\n",
              " 'gpt2-GPTQ/special_tokens_map.json',\n",
              " 'gpt2-GPTQ/vocab.json',\n",
              " 'gpt2-GPTQ/merges.txt',\n",
              " 'gpt2-GPTQ/added_tokens.json',\n",
              " 'gpt2-GPTQ/tokenizer.json')"
            ]
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Reload model and tokenizer\n",
        "model = AutoGPTQForCausalLM.from_quantized(\n",
        "    out_dir,\n",
        "    use_triton=True,\n",
        "    device=device,\n",
        "    use_safetensors=True,\n",
        ")\n",
        "tokenizer = AutoTokenizer.from_pretrained(out_dir)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nktu1FsdZ9sd",
        "outputId": "8f0aaf4e-5fc5-42d1-eb33-220658edb8d0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:accelerate.utils.modeling:The safetensors archive passed at gpt2-GPTQ/gptq_model-4bit-128g.safetensors does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.\n",
            "WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused attention module yet, will skip inject fused attention.\n",
            "WARNING:auto_gptq.modeling._base:GPT2GPTQForCausalLM hasn't fused mlp module yet, will skip inject fused mlp.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def generate_text(input_text):\n",
        "    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)\n",
        "    attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)\n",
        "\n",
        "    output = model.to(device).generate(\n",
        "        inputs=input_ids,\n",
        "        attention_mask=attention_mask,\n",
        "        do_sample=True,\n",
        "        max_length=50,\n",
        "        top_k=50,\n",
        "        pad_token_id=tokenizer.eos_token_id\n",
        "    )\n",
        "    output = tokenizer.decode(output[0], skip_special_tokens=True)\n",
        "\n",
        "    return output\n",
        "\n",
        "# Generate text\n",
        "input_text = \"I have a dream\"\n",
        "generate_text(input_text)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 36
        },
        "id": "KSIHpQ4XZ_7R",
        "outputId": "e6f5c8a5-e3bf-4e52-d239-6b6f190e5475"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'I have a dream,,,,,,,,, at,--,,,,,,,,,,,,,,---,,,, ( (,//,,,,---'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 24
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Save and load model using Hugging Face Hub"
      ],
      "metadata": {
        "id": "gV8hqGdYhLQH"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import notebook_login\n",
        "from huggingface_hub import HfApi\n",
        "import locale\n",
        "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
        "\n",
        "REPO_ID = \"insert your repo/model ID\" # example: \"mlabonne/gpt2-GPTQ-4bit\"\n",
        "\n",
        "notebook_login()\n",
        "api = HfApi()\n",
        "!git config --global credential.helper store\n",
        "\n",
        "api.upload_folder(\n",
        "    folder_path=out_dir,\n",
        "    repo_id=REPO_ID,\n",
        "    repo_type=\"model\",\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 100,
          "referenced_widgets": [
            "9fdc7832238743f384543674f57a135d",
            "e856dd2f68714377b76493d5f428043d",
            "b45fac85d2034fcda9427c787124788d",
            "8ae387bcc1a3478eb5da50db9449e7a0",
            "653f5250de60427cb870fe823937e6af",
            "06846d39e16d4c66bc4cc177666c959b",
            "0fcfa8e7621647abae79076a7aec2972",
            "811878ae336b4d0c9ec8237fc37bb999",
            "0146ec198b8640b2aa30f8466e40597d",
            "769dd69f4f6f4af08d1b2d15522940ec",
            "f32db67266234c34aa77317776cbdc48",
            "66a41698432f46de9eb325447917a389",
            "ca0783bbd2e1428d92ee78db13d0d64d",
            "ebbb561fa4cb42e1be414b6462949fd0",
            "d2ee702c234b4b0f903e92e09cb1c6dd",
            "8d25924e581e40a9a39a87d4d3d14221",
            "2e97ccd3f59447db9892c3819252c57b",
            "1dbd60f5ee294c65b0bb9225d81bd8af",
            "b0a9de27e4074778ab5a55a1f9d250cc",
            "bc79604dbd3242bab577154cca421b83",
            "8f3eab81268943a4935e04e502f95604",
            "6e4ed763e8c843dbbdbfd5ad0570d884"
          ]
        },
        "id": "OKTxY6jQaDMv",
        "outputId": "f786a135-ca33-486f-88b8-9795fdb8e713"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9fdc7832238743f384543674f57a135d"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "gptq_model-4bit-128g.bin:   0%|          | 0.00/123M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "66a41698432f46de9eb325447917a389"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'https://huggingface.co/mlabonne/gpt2-GPTQ-4bit/tree/main/'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
        "from transformers import AutoTokenizer\n",
        "\n",
        "model_id = REPO_ID\n",
        "quantize_config = BaseQuantizeConfig(bits=4, group_size=128)\n",
        "model = AutoGPTQForCausalLM.from_pretrained(model_id, quantize_config)\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_id)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 146
        },
        "id": "5EUZJpdEfxNz",
        "outputId": "7b7c2b23-fccd-47c1-e40e-1b544550da99"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Some weights of the model checkpoint at mlabonne/gpt2-GPTQ-4bit were not used when initializing GPT2LMHeadModel: ['transformer.h.11.attn.c_proj.qweight', 'transformer.h.10.attn.c_proj.g_idx', 'transformer.h.4.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_proj.qweight', 'transformer.h.3.attn.c_proj.scales', 'transformer.h.9.attn.c_proj.g_idx', 'transformer.h.0.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qweight', 'transformer.h.4.attn.c_proj.scales', 'transformer.h.9.mlp.c_fc.g_idx', 'transformer.h.10.attn.c_attn.qweight', 'transformer.h.4.mlp.c_proj.scales', 'transformer.h.9.mlp.c_proj.qzeros', 'transformer.h.9.attn.c_attn.scales', 'transformer.h.0.attn.c_proj.scales', 'transformer.h.4.mlp.c_fc.g_idx', 'transformer.h.9.mlp.c_fc.qzeros', 'transformer.h.2.mlp.c_proj.qweight', 'transformer.h.9.mlp.c_proj.qweight', 'transformer.h.3.mlp.c_fc.scales', 'transformer.h.8.attn.c_attn.qzeros', 'transformer.h.1.attn.c_attn.scales', 'transformer.h.1.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qweight', 'transformer.h.8.attn.c_proj.qzeros', 'transformer.h.2.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.scales', 'transformer.h.5.mlp.c_fc.qzeros', 'transformer.h.4.mlp.c_fc.scales', 'transformer.h.6.attn.c_attn.qzeros', 'transformer.h.9.attn.c_attn.qzeros', 'transformer.h.1.mlp.c_fc.g_idx', 'transformer.h.8.attn.c_proj.scales', 'transformer.h.10.attn.c_proj.scales', 'transformer.h.2.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.scales', 'transformer.h.0.mlp.c_fc.qzeros', 'transformer.h.7.mlp.c_proj.g_idx', 'transformer.h.1.attn.c_proj.qweight', 'transformer.h.2.attn.c_proj.qzeros', 'transformer.h.0.attn.c_proj.g_idx', 'transformer.h.7.attn.c_attn.qweight', 'transformer.h.1.attn.c_attn.qzeros', 'transformer.h.6.attn.c_proj.qweight', 'transformer.h.3.mlp.c_fc.g_idx', 'transformer.h.0.attn.c_attn.scales', 'transformer.h.7.attn.c_proj.qzeros', 'transformer.h.5.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.scales', 'transformer.h.4.attn.c_proj.qzeros', 'transformer.h.1.mlp.c_proj.g_idx', 'transformer.h.7.mlp.c_fc.qzeros', 'transformer.h.7.attn.c_attn.g_idx', 'transformer.h.1.attn.c_proj.scales', 'transformer.h.1.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.qweight', 'transformer.h.8.attn.c_attn.scales', 'transformer.h.9.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.scales', 'transformer.h.5.attn.c_proj.scales', 'transformer.h.11.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.g_idx', 'transformer.h.11.attn.c_attn.qweight', 'transformer.h.5.attn.c_attn.g_idx', 'transformer.h.9.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.qweight', 'transformer.h.4.attn.c_attn.qzeros', 'transformer.h.9.attn.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.g_idx', 'transformer.h.0.attn.c_proj.qzeros', 'transformer.h.11.attn.c_attn.qzeros', 'transformer.h.2.mlp.c_fc.g_idx', 'transformer.h.11.attn.c_attn.g_idx', 'transformer.h.2.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_attn.qweight', 'transformer.h.6.mlp.c_fc.g_idx', 'transformer.h.10.mlp.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qweight', 'transformer.h.5.attn.c_proj.qweight', 'transformer.h.8.mlp.c_proj.g_idx', 'transformer.h.10.attn.c_proj.qweight', 'transformer.h.3.mlp.c_proj.g_idx', 'transformer.h.7.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.g_idx', 'transformer.h.11.mlp.c_fc.qweight', 'transformer.h.7.attn.c_attn.scales', 'transformer.h.10.mlp.c_proj.qweight', 'transformer.h.10.mlp.c_fc.g_idx', 'transformer.h.3.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.g_idx', 'transformer.h.1.mlp.c_proj.qweight', 'transformer.h.11.mlp.c_proj.g_idx', 'transformer.h.10.mlp.c_fc.qweight', 'transformer.h.3.attn.c_attn.qzeros', 'transformer.h.11.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_fc.qweight', 'transformer.h.3.attn.c_proj.qzeros', 'transformer.h.0.attn.c_attn.qzeros', 'transformer.h.10.attn.c_attn.scales', 'transformer.h.10.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_fc.g_idx', 'transformer.h.8.mlp.c_proj.qweight', 'transformer.h.10.attn.c_attn.g_idx', 'transformer.h.5.mlp.c_fc.scales', 'transformer.h.6.mlp.c_proj.qzeros', 'transformer.h.9.mlp.c_proj.scales', 'transformer.h.0.mlp.c_proj.scales', 'transformer.h.0.mlp.c_proj.qzeros', 'transformer.h.2.attn.c_attn.qzeros', 'transformer.h.0.mlp.c_fc.qweight', 'transformer.h.1.attn.c_proj.g_idx', 'transformer.h.6.mlp.c_fc.scales', 'transformer.h.3.attn.c_attn.qweight', 'transformer.h.2.attn.c_attn.qweight', 'transformer.h.3.attn.c_attn.scales', 'transformer.h.9.mlp.c_fc.scales', 'transformer.h.11.mlp.c_proj.qweight', 'transformer.h.11.attn.c_proj.scales', 'transformer.h.10.attn.c_attn.qzeros', 'transformer.h.11.attn.c_proj.g_idx', 'transformer.h.8.mlp.c_fc.qzeros', 'transformer.h.5.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.scales', 'transformer.h.5.mlp.c_proj.qzeros', 'transformer.h.6.attn.c_attn.g_idx', 'transformer.h.6.mlp.c_fc.qzeros', 'transformer.h.5.mlp.c_proj.qweight', 'transformer.h.6.attn.c_attn.scales', 'transformer.h.7.mlp.c_fc.qweight', 'transformer.h.2.attn.c_proj.qweight', 'transformer.h.2.mlp.c_fc.qweight', 'transformer.h.2.mlp.c_proj.scales', 'transformer.h.11.mlp.c_fc.qzeros', 'transformer.h.6.mlp.c_proj.qweight', 'transformer.h.8.attn.c_attn.qweight', 'transformer.h.8.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_attn.qzeros', 'transformer.h.8.mlp.c_proj.scales', 'transformer.h.1.mlp.c_fc.scales', 'transformer.h.1.mlp.c_proj.scales', 'transformer.h.0.mlp.c_fc.scales', 'transformer.h.9.attn.c_proj.scales', 'transformer.h.7.mlp.c_fc.g_idx', 'transformer.h.1.attn.c_proj.qzeros', 'transformer.h.2.attn.c_proj.g_idx', 'transformer.h.6.mlp.c_fc.qweight', 'transformer.h.7.mlp.c_proj.qzeros', 'transformer.h.6.mlp.c_proj.scales', 'transformer.h.7.mlp.c_proj.scales', 'transformer.h.2.attn.c_attn.g_idx', 'transformer.h.11.mlp.c_proj.scales', 'transformer.h.10.mlp.c_proj.g_idx', 'transformer.h.11.attn.c_attn.scales', 'transformer.h.1.mlp.c_fc.qzeros', 'transformer.h.6.attn.c_proj.scales', 'transformer.h.10.mlp.c_proj.scales', 'transformer.h.5.attn.c_attn.qzeros', 'transformer.h.5.mlp.c_proj.scales', 'transformer.h.0.attn.c_proj.qweight', 'transformer.h.4.mlp.c_proj.g_idx', 'transformer.h.6.attn.c_proj.qzeros', 'transformer.h.5.mlp.c_fc.qweight', 'transformer.h.2.mlp.c_fc.scales', 'transformer.h.10.mlp.c_fc.scales', 'transformer.h.5.attn.c_proj.g_idx', 'transformer.h.0.attn.c_attn.g_idx', 'transformer.h.3.attn.c_proj.g_idx', 'transformer.h.5.attn.c_attn.scales', 'transformer.h.1.mlp.c_proj.qzeros', 'transformer.h.4.attn.c_attn.g_idx', 'transformer.h.4.attn.c_proj.qweight', 'transformer.h.4.mlp.c_fc.qweight', 'transformer.h.7.mlp.c_proj.qweight', 'transformer.h.11.attn.c_proj.qzeros', 'transformer.h.0.mlp.c_proj.g_idx', 'transformer.h.9.attn.c_attn.qweight', 'transformer.h.11.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_proj.scales', 'transformer.h.8.attn.c_attn.g_idx', 'transformer.h.8.attn.c_proj.g_idx', 'transformer.h.2.mlp.c_proj.g_idx', 'transformer.h.4.attn.c_attn.qweight', 'transformer.h.3.mlp.c_proj.qweight', 'transformer.h.6.mlp.c_proj.g_idx', 'transformer.h.4.mlp.c_fc.qzeros', 'transformer.h.3.mlp.c_fc.qweight', 'transformer.h.10.attn.c_proj.qzeros', 'transformer.h.4.mlp.c_proj.qzeros', 'transformer.h.7.attn.c_proj.qweight']\n",
            "- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at mlabonne/gpt2-GPTQ-4bit and are newly initialized: ['transformer.h.0.mlp.c_fc.weight', 'transformer.h.3.mlp.c_proj.weight', 'transformer.h.9.mlp.c_proj.weight', 'transformer.h.4.attn.c_attn.weight', 'transformer.h.11.mlp.c_proj.weight', 'transformer.h.3.attn.c_attn.weight', 'transformer.h.7.attn.c_attn.weight', 'transformer.h.11.attn.c_attn.weight', 'transformer.h.4.attn.c_proj.weight', 'transformer.h.5.attn.c_attn.weight', 'transformer.h.7.mlp.c_proj.weight', 'transformer.h.11.mlp.c_fc.weight', 'transformer.h.7.mlp.c_fc.weight', 'transformer.h.5.mlp.c_proj.weight', 'transformer.h.9.attn.c_proj.weight', 'transformer.h.7.attn.c_proj.weight', 'transformer.h.6.mlp.c_fc.weight', 'transformer.h.4.mlp.c_fc.weight', 'transformer.h.8.mlp.c_proj.weight', 'transformer.h.2.mlp.c_proj.weight', 'transformer.h.6.attn.c_attn.weight', 'transformer.h.8.mlp.c_fc.weight', 'transformer.h.0.mlp.c_proj.weight', 'transformer.h.2.attn.c_attn.weight', 'transformer.h.6.attn.c_proj.weight', 'transformer.h.2.attn.c_proj.weight', 'transformer.h.8.attn.c_attn.weight', 'transformer.h.5.attn.c_proj.weight', 'transformer.h.3.mlp.c_fc.weight', 'transformer.h.8.attn.c_proj.weight', 'transformer.h.2.mlp.c_fc.weight', 'transformer.h.5.mlp.c_fc.weight', 'transformer.h.11.attn.c_proj.weight', 'transformer.h.3.attn.c_proj.weight', 'transformer.h.1.attn.c_proj.weight', 'transformer.h.10.attn.c_attn.weight', 'transformer.h.4.mlp.c_proj.weight', 'transformer.h.10.attn.c_proj.weight', 'transformer.h.10.mlp.c_fc.weight', 'transformer.h.9.mlp.c_fc.weight', 'transformer.h.6.mlp.c_proj.weight', 'transformer.h.9.attn.c_attn.weight', 'transformer.h.1.attn.c_attn.weight', 'transformer.h.1.mlp.c_fc.weight', 'transformer.h.1.mlp.c_proj.weight', 'transformer.h.0.attn.c_attn.weight', 'transformer.h.10.mlp.c_proj.weight', 'transformer.h.0.attn.c_proj.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'I have a dream,,,,, and,,,, and,,,,,,,,,,).,,,,,,,,,,,,,,,,,,,,,,,'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "input_text = \"I have a dream\"\n",
        "generate_text(input_text)"
      ],
      "metadata": {
        "id": "b_HOEjhUg6pG"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}