diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index e9a018c..dcbcf9f 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -65,14 +65,6 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt pip install -r requirements-dev.txt - - name: Build bitsandbytes cpuonly - run: | - git clone https://github.com/TimDettmers/bitsandbytes.git - cd bitsandbytes - git checkout main - make cpuonly - pip install . - cd - - name: Test run: | export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))") @@ -80,8 +72,8 @@ jobs: export REF_NAME=bigscience/bloom-560m python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \ - --torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \ - --throughput 1 --attn_cache_size 0.2GiB &> server1.log & + --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 \ + --torch_dtype float32 --compression NONE --attn_cache_size 0.2GiB &> server1.log & SERVER1_PID=$! sleep 5 # wait for the first server to initialize DHT @@ -90,13 +82,13 @@ jobs: # ^-- server 1 multiaddr is determined by --identity and --host_maddrs python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 12:22 \ - --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log & + --initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server2.log & SERVER2_PID=$! sleep 10 # wait for initial servers to declare blocks, then let server decide which blocks to serve python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \ - --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log & + --initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server3.log & SERVER3_PID=$! python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 4:16 \ @@ -104,7 +96,7 @@ jobs: SERVER4_PID=$! python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --num_blocks 3 \ - --torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server5.log & + --initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server5.log & SERVER5_PID=$! tail -n 100 -f server*.log & diff --git a/cli/run_server.py b/cli/run_server.py index ac7c9da..812e51c 100644 --- a/cli/run_server.py +++ b/cli/run_server.py @@ -86,7 +86,7 @@ def main(): if args.pop("increase_file_limit"): increase_file_limit() - compression_type = args.pop("compression") + compression_type = args.pop("compression").upper() compression = getattr(CompressionType, compression_type) attn_cache_size = args.pop("attn_cache_size") diff --git a/requirements.txt b/requirements.txt index afc0290..53ae93e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ torch==1.12.0 +bitsandbytes==0.33.0 accelerate==0.10.0 huggingface-hub==0.7.0 +transformers==4.21.3 +https://github.com/learning-at-home/hivemind/archive/131f82c97ea67510d552bb7a68138ad27cbfa5d4.zip humanfriendly -https://github.com/learning-at-home/hivemind/archive/20b3b3d5f225ed525515a5383a008a8f9fad8173.zip -https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip \ No newline at end of file diff --git a/src/server/server.py b/src/server/server.py index efa1787..7b8cc9f 100644 --- a/src/server/server.py +++ b/src/server/server.py @@ -73,7 +73,8 @@ class Server(threading.Thread): logger.info(f"Serving {len(self.module_backends)} blocks:") for block_name, backend in self.module_backends.items(): num_parameters = sum(p.numel() for p in backend.module.parameters() if p.requires_grad) - logger.info(f"{block_name}: {backend.module.__class__.__name__}, {num_parameters} parameters") + parameter_msg = f"{num_parameters} trainable parameters" if num_parameters else "frozen" + logger.info(f"{block_name}: {backend.module.__class__.__name__}, {parameter_msg}") if not self.dht.is_alive(): self.dht.run_in_background(await_ready=True) diff --git a/src/server/task_pool.py b/src/server/task_pool.py index 2bf65c0..eec80bc 100644 --- a/src/server/task_pool.py +++ b/src/server/task_pool.py @@ -170,6 +170,3 @@ class PrioritizedTaskPool(TaskPoolBase): assert len(item) == 2 self._priority.value = float(item[0]) self._oldest_undispatched_timestamp.value = float(item[1]) - - def iterate_minibatches(self, *args, **kwargs) -> Generator[List[Task], None, None]: - raise NotImplementedError()