From 1222e172ef7e66d11a48e113e3ec1e5742cca63c Mon Sep 17 00:00:00 2001
From: Ubuntu
 <ubuntu@t1v-n-1bc05951-w-0.europe-west4-a.c.trc-tpu-project.internal>
Date: Thu, 28 Mar 2024 19:14:24 +0000
Subject: [PATCH] github actions and docker compose

update

update

run

update

adding steps

working as root

run

update

update

Add ARM Dockerfile

Temporarily require peft<0.5.0, transformers<4.32.0 (#470)

Peft 0.5 recently released and broke some compatilibities. This PR temporarily requires petals to use the previous stable version of peft while we work on 0.5.0 support.

run

update

adding steps

working as root

petals inference

moving to my org

first docker compose

move to our org

format

adding hive mind back into the setup

reformat ipynb

now to test main version

using cpu

now working.

The health server now needs to wait for the others to come up but
otherwise it is working.

adding chat

health

adding new test

run

update

Update run-tests-docker.yaml

tpu starting

running locally in cpu mode,

now we have the basic directory structure for an env, still need to
tag the items properly.

versions
---
 .github/workflows/check-style.yaml       |  11 ++-
 .github/workflows/push-docker-image.yaml |  23 +++--
 .github/workflows/run-tests-docker.yaml  |  36 ++++++++
 .github/workflows/run-tests.yaml         |   8 +-
 README.md                                |  22 +++++
 docker-compose.yml                       | 111 +++++++++++++++++++++++
 envs/cpu/is1/env.txt                     |   2 +
 envs/gpu/is1/env.txt                     |   5 +
 envs/tpu/v3-0/env.txt                    |   4 +
 etc/petals-inference.service             |  11 +++
 examples/prompt-tuning-personachat.ipynb |  24 ++---
 examples/prompt-tuning-sst2.ipynb        |  33 ++++---
 get_peersl.sh                            |   1 +
 health.env                               |   3 +
 pyproject.toml                           |   2 +-
 run.sh                                   |   1 +
 run2.sh                                  |   1 +
 src/petals/__init__.py                   |  10 +-
 18 files changed, 258 insertions(+), 50 deletions(-)
 create mode 100644 .github/workflows/run-tests-docker.yaml
 create mode 100644 docker-compose.yml
 create mode 100644 envs/cpu/is1/env.txt
 create mode 100644 envs/gpu/is1/env.txt
 create mode 100644 envs/tpu/v3-0/env.txt
 create mode 100644 etc/petals-inference.service
 create mode 100644 get_peersl.sh
 create mode 100644 health.env
 create mode 100755 run.sh
 create mode 100644 run2.sh

diff --git a/.github/workflows/check-style.yaml b/.github/workflows/check-style.yaml
index 60ea42b..d878766 100644
--- a/.github/workflows/check-style.yaml
+++ b/.github/workflows/check-style.yaml
@@ -9,18 +9,19 @@ jobs:
   black:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: psf/black@stable
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/black@main
         with:
           options: "--check --diff"
           version: "22.3.0"
   isort:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: meta-introspector/checkout@main
+      - uses: meta-introspector/setup-python@main
         with:
           python-version: 3.8
-      - uses: isort/isort-action@master
+      - uses: meta-introspector/isort-action@main
         with:
           isortVersion: "5.10.1"
+
diff --git a/.github/workflows/push-docker-image.yaml b/.github/workflows/push-docker-image.yaml
index 345b8f2..88ae129 100644
--- a/.github/workflows/push-docker-image.yaml
+++ b/.github/workflows/push-docker-image.yaml
@@ -14,15 +14,15 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
 
       - name: Docker meta
         id: meta
-        uses: crazy-max/ghaction-docker-meta@v2
+        uses: meta-introspector/metadata-action@main
         with:
           # list of Docker images to use as base name for tags
           images: |
-            learningathome/petals
+            h4ckermike/petals
           # generate Docker tags based on the following events/attributes
           tags: |
             type=ref,event=branch
@@ -33,18 +33,29 @@ jobs:
 
       - name: Set up Docker Buildx
         id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: meta-introspector/setup-buildx-action@main
 
       - name: Login to Docker Hub
         if: github.event_name != 'pull_request'
-        uses: docker/login-action@v1
+        uses: meta-introspector/login-action@main
         with:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
+      - name: Free disk space on Ubuntu runner
+        uses: meta-introspector/free-disk-space@main
+        with:
+          # found in: https://github.com/docker/build-push-action/issues/968
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v2
+        uses: meta-introspector/build-push-action@main
         with:
           context: .
           push: ${{ github.event_name != 'pull_request' }}
diff --git a/.github/workflows/run-tests-docker.yaml b/.github/workflows/run-tests-docker.yaml
new file mode 100644
index 0000000..6f6f00f
--- /dev/null
+++ b/.github/workflows/run-tests-docker.yaml
@@ -0,0 +1,36 @@
+name: Tests in docker compose
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  run-tests-in-compose:
+   # runs-on: ubuntu-latest    
+    runs-on: self-hosted
+    timeout-minutes: 20
+    steps:
+      - name: Increase swap space
+        if: ${{ matrix.os == 'ubuntu' }}
+        uses: meta-introspector/set-swap-space@main
+        with:
+          swap-size-gb: 10
+      - name: Checkout
+        uses: meta-introspector/checkout@main
+
+      - name: Build the docker-compose stack
+        run: docker-compose -f docker-compose.yml up -d
+        
+      - name: Check running containers
+        run: docker ps -a
+        
+      - name: Check logs
+        run: docker logs health
+        
+      - name: Build the docker-compose stack
+        run: docker-compose down 
+        
+
+  
+  
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
index 05cebdd..2ba5831 100644
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@@ -24,17 +24,17 @@ jobs:
     steps:
       - name: Increase swap space
         if: ${{ matrix.os == 'ubuntu' }}
-        uses: pierotofy/set-swap-space@master
+        uses: meta-introspector/set-swap-space@main
         with:
           swap-size-gb: 10
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: meta-introspector/checkout@main
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: meta-introspector/setup-python@main
         with:
           python-version: ${{ matrix.python-version }}
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: meta-introspector/cache@main
         with:
           path: ~/.cache/pip
           key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}
diff --git a/README.md b/README.md
index 1f410ef..f10374f 100644
--- a/README.md
+++ b/README.md
@@ -229,3 +229,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
 <p align="center">
     <img src="https://petals.dev/bigscience.png" width="150">
 </p>
+
+
+# setup
+
+
+ 1623  sudo cp petals-inference.service /etc/systemd/system/
+ 1634  sudo systemctl daemon-reload
+ 1635  sudo systemctl status petals-inference.service -l
+ 1636  sudo systemctl restart petals-inference.service -l
+
+ 1639  sudo useradd petals
+ 1640  sudo mkdir /home/petals
+ 1641  sudo chown petals: /home/petals/
+ 1643  sudo cp -r ~/.venv/ /home/petals/venv
+ 1644  sudo rm -rf /home/petals/venv
+ 1658  sudo mv ~/.venv/ /home/petals/venv
+ 1659  sudo chown petals: /home/petals/
+
+1670  sudo systemctl status petals-inference.service -l
+ 1674  sudo systemctl restart petals-inference.service -l
+ 1675  sudo systemctl status petals-inference.service -l
+ 
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..d6fe009
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,111 @@
+#version: "3"
+
+services:
+
+  health:
+    restart: always
+    depends_on:
+     - backbone
+    image: h4ckermike/health.petals:main
+    ports:
+      - "8100:5000"
+    env_file: health.env
+    command: flask run --host=0.0.0.0 --port=5000
+
+  inference   :
+    restart: always
+    depends_on:
+      - backbone
+    image: h4ckermike/inference.petals:main
+    ports:
+      - "8000:5000"
+    env_file: health.env
+    command: gunicorn app:app --bind 0.0.0.0:5000 --worker-class gthread --threads 100 --timeout 1000
+
+  tinyllamacpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone       
+    command: python -m petals.cli.run_server --port 31331  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    ports:
+      - "31331:31331"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+
+  tinyllamagpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31332:31332"
+    command: python -m petals.cli.run_server --port 31332  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+
+  tinyllamatpu:
+    image: h4ckermike/petals:main
+    depends_on:
+       - backbone
+    ports:
+      - "31333:31333"
+    command: python -m petals.cli.run_server --port 31333  --num_blocks=1 Maykeye/TinyLLama-v0 --initial_peers $INITIAL_PEERS  --device=$DEVICE
+
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+    restart: always
+
+    # beluga:
+  #   image: h4ckermike/petals:main
+  #   depends_on:
+  #      - backbone
+  #   deploy:
+  #     resources:
+  #       reservations:
+  #         devices:
+  #           - driver: nvidia
+  #             count: 1
+  #             capabilities: [gpu]
+  #   ports:
+  #     - "31330:31330"
+  #   restart: always
+
+  backbone:
+    image: h4ckermike/petals:main
+    command: python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path /cache/bootstrap1.id
+    volumes:
+      - petals-cache-backbone:/cache
+    network_mode: host
+    ipc: host
+    restart: unless-stopped
+    env_file: health.env
+    
+  #   # DEbug target
+  # debug_health:
+  #   #environment:
+
+  #   env_file: health.env
+  #   image: h4ckermike/health.petals:main
+  #   command: bash
+  #   stdin_open: true
+  #   tty: true
+
+
+volumes:
+  petals-cache-backbone:
diff --git a/envs/cpu/is1/env.txt b/envs/cpu/is1/env.txt
new file mode 100644
index 0000000..67a88f2
--- /dev/null
+++ b/envs/cpu/is1/env.txt
@@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+DEVICE=cpu
diff --git a/envs/gpu/is1/env.txt b/envs/gpu/is1/env.txt
new file mode 100644
index 0000000..5cfce73
--- /dev/null
+++ b/envs/gpu/is1/env.txt
@@ -0,0 +1,5 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+#PJRT_DEVICE=TPU
+DEVICE=cuda
+#DEVICE=cpu
+#DEVICE=tpux
diff --git a/envs/tpu/v3-0/env.txt b/envs/tpu/v3-0/env.txt
new file mode 100644
index 0000000..c0c3c07
--- /dev/null
+++ b/envs/tpu/v3-0/env.txt
@@ -0,0 +1,4 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+#PJRT_DEVICE=TPU
+#DEVICE=xla
+DEVICE=cpu
\ No newline at end of file
diff --git a/etc/petals-inference.service b/etc/petals-inference.service
new file mode 100644
index 0000000..ae08b6f
--- /dev/null
+++ b/etc/petals-inference.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=Petals Inference
+
+[Service]
+User=petals
+Group=petals
+Environment=PJRT_DEVICE=TPU
+ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
+
+[Install]
+WantedBy=multi-user.target
diff --git a/examples/prompt-tuning-personachat.ipynb b/examples/prompt-tuning-personachat.ipynb
index b9d1bf5..2f3f28a 100644
--- a/examples/prompt-tuning-personachat.ipynb
+++ b/examples/prompt-tuning-personachat.ipynb
@@ -85,10 +85,10 @@
     "# The latter fine-tunes separate prefixes for each transformer block,\n",
     "# so prompt-tuning will take more time but yield better results.\n",
     "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
     "\n",
     "NUM_PREFIX_TOKENS = 16\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
     "BATCH_SIZE = 8\n",
     "LR = 1e-2\n",
     "WEIGHT_DECAY = 0.0\n",
@@ -113,12 +113,10 @@
    "outputs": [],
    "source": [
     "tokenizer = BloomTokenizerFast.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
     "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
     "model = DistributedBloomForCausalLM.from_pretrained(\n",
-    "    MODEL_NAME,\n",
-    "    pre_seq_len=NUM_PREFIX_TOKENS, \n",
-    "    tuning_mode=TUNING_MODE\n",
+    "    MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
     ").to(DEVICE)"
    ]
   },
@@ -150,17 +148,13 @@
     "\n",
     "\n",
     "def tokenize(examples):\n",
-    "    outputs = {\n",
-    "        \"input_ids\": tokenizer(examples[\"chunks\"], padding='max_length', truncation=True)[\"input_ids\"]\n",
-    "    }\n",
+    "    outputs = {\"input_ids\": tokenizer(examples[\"chunks\"], padding=\"max_length\", truncation=True)[\"input_ids\"]}\n",
     "    outputs[\"labels\"] = outputs[\"input_ids\"]\n",
     "    return outputs\n",
     "\n",
     "\n",
-    "tokenized_datasets = (\n",
-    "    dataset\n",
-    "        .map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names)\n",
-    "        .map(tokenize, batched=True, remove_columns=[\"chunks\"])\n",
+    "tokenized_datasets = dataset.map(chunking, batched=True, remove_columns=dataset[\"train\"].column_names).map(\n",
+    "    tokenize, batched=True, remove_columns=[\"chunks\"]\n",
     ")\n",
     "\n",
     "\n",
@@ -241,7 +235,7 @@
     "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
     "        \"model_name\": MODEL_NAME,\n",
     "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
     ")\n",
     "\n",
     "for batch in tqdm(train_dataloader):\n",
@@ -285,7 +279,7 @@
     "        user_phrase = input()\n",
     "        if len(user_phrase) == 0:\n",
     "            break\n",
-    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors='pt')['input_ids'].to(DEVICE)\n",
+    "        inputs = tokenizer([f\"{user_phrase}\\n-----\\n\"], return_tensors=\"pt\")[\"input_ids\"].to(DEVICE)\n",
     "        while True:\n",
     "            outputs = model.generate(\n",
     "                inputs,\n",
diff --git a/examples/prompt-tuning-sst2.ipynb b/examples/prompt-tuning-sst2.ipynb
index b6f2d8a..d4f4f64 100644
--- a/examples/prompt-tuning-sst2.ipynb
+++ b/examples/prompt-tuning-sst2.ipynb
@@ -98,10 +98,10 @@
     "# The latter fine-tunes separate prefixes for each transformer block,\n",
     "# so prompt-tuning will take more time but yield better results.\n",
     "# See this paper for details of how it works: https://arxiv.org/pdf/2110.07602.pdf\n",
-    "TUNING_MODE = 'ptune'\n",
+    "TUNING_MODE = \"ptune\"\n",
     "\n",
     "NUM_PREFIX_TOKENS = 8\n",
-    "DEVICE = 'cuda'\n",
+    "DEVICE = \"cuda\"\n",
     "BATCH_SIZE = 32\n",
     "LR = 1e-2\n",
     "WEIGHT_DECAY = 0.0\n",
@@ -130,14 +130,16 @@
    "outputs": [],
    "source": [
     "tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)\n",
-    "tokenizer.padding_side = 'right'\n",
+    "tokenizer.padding_side = \"right\"\n",
     "tokenizer.model_max_length = MODEL_MAX_LENGTH\n",
     "tokenizer.pad_token = tokenizer.unk_token\n",
-    "model = DistributedLlamaForSequenceClassification.from_pretrained(\n",
-    "    MODEL_NAME,\n",
-    "    pre_seq_len=NUM_PREFIX_TOKENS,\n",
-    "    tuning_mode=TUNING_MODE\n",
-    ").float().to(DEVICE)\n",
+    "model = (\n",
+    "    DistributedLlamaForSequenceClassification.from_pretrained(\n",
+    "        MODEL_NAME, pre_seq_len=NUM_PREFIX_TOKENS, tuning_mode=TUNING_MODE\n",
+    "    )\n",
+    "    .float()\n",
+    "    .to(DEVICE)\n",
+    ")\n",
     "model.config.pad_token_id = tokenizer.pad_token_id"
    ]
   },
@@ -160,12 +162,14 @@
    },
    "outputs": [],
    "source": [
-    "task = 'sst2'\n",
+    "task = \"sst2\"\n",
     "\n",
     "dataset = load_dataset(\"glue\", task)\n",
     "\n",
+    "\n",
     "def preprocess_function(examples):\n",
-    "    return tokenizer(examples[\"sentence\"], padding='max_length', truncation=True, return_token_type_ids=False)\n",
+    "    return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, return_token_type_ids=False)\n",
+    "\n",
     "\n",
     "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
     "tokenized_datasets = tokenized_datasets.remove_columns([\"sentence\", \"idx\", \"attention_mask\"])\n",
@@ -198,9 +202,10 @@
    },
    "outputs": [],
    "source": [
-    "metric = load_metric('glue', task)\n",
+    "metric = load_metric(\"glue\", task)\n",
     "\n",
-    "def eval_metrics(model, dataloader, device='cpu'):\n",
+    "\n",
+    "def eval_metrics(model, dataloader, device=\"cpu\"):\n",
     "    model.eval()\n",
     "    for batch in dataloader:\n",
     "        batch = {k: v.to(device) for k, v in batch.items()}\n",
@@ -294,7 +299,7 @@
     "        \"num_prefix_tokens\": NUM_PREFIX_TOKENS,\n",
     "        \"model_name\": MODEL_NAME,\n",
     "        \"seed\": SEED,\n",
-    "    }\n",
+    "    },\n",
     ")\n",
     "\n",
     "scaler = torch.cuda.amp.GradScaler()\n",
@@ -305,7 +310,7 @@
     "        batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
     "\n",
     "        with torch.autocast(device_type=DEVICE, dtype=torch.float16):\n",
-    "          outputs = model(**batch)\n",
+    "            outputs = model(**batch)\n",
     "        loss = outputs.loss\n",
     "        scaler.scale(loss).backward()\n",
     "\n",
diff --git a/get_peersl.sh b/get_peersl.sh
new file mode 100644
index 0000000..5dcd3ab
--- /dev/null
+++ b/get_peersl.sh
@@ -0,0 +1 @@
+docker logs petals-backbone-1 2>&1  |grep initial_peers |cut "-d " -f18-  | sort -u > peers.txt
diff --git a/health.env b/health.env
new file mode 100644
index 0000000..20e15a4
--- /dev/null
+++ b/health.env
@@ -0,0 +1,3 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+PJRT_DEVICE=TPU
+DEVICE=xla
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index cfc991c..6f1475b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.black]
 line-length = 120
-required-version = "22.3.0"
+required-version = "24.3.0"
 
 [tool.isort]
 profile = "black"
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..15bff43
--- /dev/null
+++ b/run.sh
@@ -0,0 +1 @@
+PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10
diff --git a/run2.sh b/run2.sh
new file mode 100644
index 0000000..23c984d
--- /dev/null
+++ b/run2.sh
@@ -0,0 +1 @@
+docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id 
diff --git a/src/petals/__init__.py b/src/petals/__init__.py
index f513f65..52e5af1 100644
--- a/src/petals/__init__.py
+++ b/src/petals/__init__.py
@@ -17,13 +17,13 @@ from petals.models import *
 from petals.utils import *
 from petals.utils.logging import initialize_logs as _initialize_logs
 
-__version__ = "2.2.0"
+__version__ = "2.3.0.dev2"
 
 
-if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
-    assert (
-        version.parse("4.32.0") <= version.parse(transformers.__version__) < version.parse("5.0.0")
-    ), "Please install a proper transformers version: pip install transformers>=4.32.0,<5.0.0"
+#if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
+#    assert (
+#        version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
+#    ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
 
 
 def _override_bfloat16_mode_default():