mirror of
https://github.com/bigscience-workshop/petals
synced 2024-11-16 06:12:50 +00:00
Update dependency versions (#71)
* update dependency versions * install bitsandbytes cpuonly from pip * remove deprecated API from task pool * clearer startup logs Co-authored-by: Tim Dettmers <dettmers@cs.washington.edu>
This commit is contained in:
parent
50535a8435
commit
e92487e5d2
18
.github/workflows/run-tests.yaml
vendored
18
.github/workflows/run-tests.yaml
vendored
@ -65,14 +65,6 @@ jobs:
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install -r requirements-dev.txt
|
||||
- name: Build bitsandbytes cpuonly
|
||||
run: |
|
||||
git clone https://github.com/TimDettmers/bitsandbytes.git
|
||||
cd bitsandbytes
|
||||
git checkout main
|
||||
make cpuonly
|
||||
pip install .
|
||||
cd -
|
||||
- name: Test
|
||||
run: |
|
||||
export HF_TAG=$(python -c "import os; print(os.environ.get('GITHUB_HEAD_REF') or os.environ.get('GITHUB_REF_NAME'))")
|
||||
@ -80,8 +72,8 @@ jobs:
|
||||
export REF_NAME=bigscience/bloom-560m
|
||||
|
||||
python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:12 \
|
||||
--torch_dtype float32 --identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 \
|
||||
--throughput 1 --attn_cache_size 0.2GiB &> server1.log &
|
||||
--identity tests/test.id --host_maddrs /ip4/127.0.0.1/tcp/31337 --throughput 1 \
|
||||
--torch_dtype float32 --compression NONE --attn_cache_size 0.2GiB &> server1.log &
|
||||
SERVER1_PID=$!
|
||||
|
||||
sleep 5 # wait for the first server to initialize DHT
|
||||
@ -90,13 +82,13 @@ jobs:
|
||||
# ^-- server 1 multiaddr is determined by --identity and --host_maddrs
|
||||
|
||||
python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 12:22 \
|
||||
--torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server2.log &
|
||||
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server2.log &
|
||||
SERVER2_PID=$!
|
||||
|
||||
sleep 10 # wait for initial servers to declare blocks, then let server decide which blocks to serve
|
||||
|
||||
python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 0:6 \
|
||||
--torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server3.log &
|
||||
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server3.log &
|
||||
SERVER3_PID=$!
|
||||
|
||||
python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --block_indices 4:16 \
|
||||
@ -104,7 +96,7 @@ jobs:
|
||||
SERVER4_PID=$!
|
||||
|
||||
python -m cli.run_server --converted_model_name_or_path $MODEL_NAME --num_blocks 3 \
|
||||
--torch_dtype float32 --initial_peers $INITIAL_PEERS --throughput 1 &> server5.log &
|
||||
--initial_peers $INITIAL_PEERS --throughput 1 --torch_dtype float32 &> server5.log &
|
||||
SERVER5_PID=$!
|
||||
|
||||
tail -n 100 -f server*.log &
|
||||
|
@ -86,7 +86,7 @@ def main():
|
||||
if args.pop("increase_file_limit"):
|
||||
increase_file_limit()
|
||||
|
||||
compression_type = args.pop("compression")
|
||||
compression_type = args.pop("compression").upper()
|
||||
compression = getattr(CompressionType, compression_type)
|
||||
|
||||
attn_cache_size = args.pop("attn_cache_size")
|
||||
|
@ -1,6 +1,7 @@
|
||||
torch==1.12.0
|
||||
bitsandbytes==0.33.0
|
||||
accelerate==0.10.0
|
||||
huggingface-hub==0.7.0
|
||||
transformers==4.21.3
|
||||
https://github.com/learning-at-home/hivemind/archive/131f82c97ea67510d552bb7a68138ad27cbfa5d4.zip
|
||||
humanfriendly
|
||||
https://github.com/learning-at-home/hivemind/archive/20b3b3d5f225ed525515a5383a008a8f9fad8173.zip
|
||||
https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip
|
@ -73,7 +73,8 @@ class Server(threading.Thread):
|
||||
logger.info(f"Serving {len(self.module_backends)} blocks:")
|
||||
for block_name, backend in self.module_backends.items():
|
||||
num_parameters = sum(p.numel() for p in backend.module.parameters() if p.requires_grad)
|
||||
logger.info(f"{block_name}: {backend.module.__class__.__name__}, {num_parameters} parameters")
|
||||
parameter_msg = f"{num_parameters} trainable parameters" if num_parameters else "frozen"
|
||||
logger.info(f"{block_name}: {backend.module.__class__.__name__}, {parameter_msg}")
|
||||
|
||||
if not self.dht.is_alive():
|
||||
self.dht.run_in_background(await_ready=True)
|
||||
|
@ -170,6 +170,3 @@ class PrioritizedTaskPool(TaskPoolBase):
|
||||
assert len(item) == 2
|
||||
self._priority.value = float(item[0])
|
||||
self._oldest_undispatched_timestamp.value = float(item[1])
|
||||
|
||||
def iterate_minibatches(self, *args, **kwargs) -> Generator[List[Task], None, None]:
|
||||
raise NotImplementedError()
|
||||
|
Loading…
Reference in New Issue
Block a user