name: Tests on: push: branches: [ main ] pull_request: jobs: run-tests: strategy: matrix: include: - { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.8' } - { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.11' } - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.8' } - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.11' } - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.10' } - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.11' } - { model: 'artek0chumak/TestMixtral', os: 'ubuntu', python-version: '3.8' } - { model: 'artek0chumak/TestMixtral', os: 'ubuntu', python-version: '3.11' } fail-fast: false runs-on: ${{ matrix.os }}-latest timeout-minutes: 20 steps: - name: Increase swap space if: ${{ matrix.os == 'ubuntu' }} uses: pierotofy/set-swap-space@master with: swap-size-gb: 10 - name: Checkout uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Cache dependencies uses: actions/cache@v3 with: path: ~/.cache/pip key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install .[dev] - name: Test run: | set -x # Print executed commands export MODEL_NAME="${{ matrix.model }}" export REF_NAME="${{ matrix.model }}" export ADAPTER_NAME="${{ matrix.model == 'bigscience/bloom-560m' && 'artek0chumak/bloom-560m-safe-peft' || '' }}" # [Step 1] Set up a tiny test swarm (see https://github.com/bigscience-workshop/petals/wiki/Launch-your-own-swarm) python -m petals.cli.run_dht \ --identity_path tests/bootstrap.id --host_maddrs /ip4/127.0.0.1/tcp/31337 &> bootstrap.log & BOOTSTRAP_PID=$! export INITIAL_PEERS=/ip4/127.0.0.1/tcp/31337/p2p/QmS9KwZptnVdB9FFV7uGgaTq4sEKBwcYeKZDfSpyKDUd1g # ^-- multiaddr in INITIAL_PEERS is determined by --identity_path and --host_maddrs until [ -s bootstrap.log ]; do sleep 5; done # wait for DHT init export RUN_SERVER="python -m petals.cli.run_server $MODEL_NAME \ --device cpu --torch_dtype float32 --initial_peers $INITIAL_PEERS" export TENSOR_PARALLEL_ARGS="${{ matrix.model == 'bigscience/bloom-560m' && '--tensor_parallel_devices cpu cpu' || '' }}" $RUN_SERVER --adapters $ADAPTER_NAME --num_blocks 5 --throughput 1 --mean_balance_check_period 10 &> server1.log & SERVER1_PID=$! # ^-- rebalacing test: this server chooses blocks 0:5, then sees a gap in the swarm and moves there sleep 10 # wait for the 1st server to choose blocks $RUN_SERVER --adapters $ADAPTER_NAME --block_indices 0:5 --throughput 1 --identity_path tests/server2.id &> server2.log & SERVER2_PID=$! $RUN_SERVER --adapters $ADAPTER_NAME --num_blocks 14 --throughput auto \ --attn_cache_tokens 2048 --max_chunk_size_bytes 1024 &> server3.log & SERVER3_PID=$! # ^-- chunking test $RUN_SERVER $TENSOR_PARALLEL_ARGS --block_indices 0:2 --throughput auto &> server4.log & SERVER4_PID=$! # ^-- tensor parallelism test (not compatible with adapters yet) sleep 5 # wait for the log files to appear tail -n 100 -f bootstrap.log server*.log & LOGGER_PID=$! sleep 30 # wait for servers to eval throughput, download layers, and rebalance kill -0 $BOOTSTRAP_PID $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID # ensure all peers survived init # [Step 2] Run PyTest # Share disk cache between Petals servers, clients, and HF Transformers export TRANSFORMERS_CACHE=~/.cache/petals # Necessary for @pytest.mark.forked to work properly on macOS, see https://github.com/kevlened/pytest-parallel/issues/93 export no_proxy=* export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES # Limit default ClientConfig.max_retries to see tracebacks instead of retrying indefinitely export PETALS_MAX_RETRIES=10 pytest tests --durations=0 --durations-min=1.0 -v # [Step 3] Check if benchmarks work (their results here are meaningless since it's a tiny swarm of CPU servers) python benchmarks/benchmark_inference.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ --seq_len 3 python benchmarks/benchmark_forward.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ --seq_len 3 --batch_size 3 --n_steps 1 python benchmarks/benchmark_training.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ --seq_len 3 --batch_size 3 --pre_seq_len 1 --n_steps 1 --task cls python benchmarks/benchmark_training.py --model $MODEL_NAME --initial_peers $INITIAL_PEERS --torch_dtype float32 \ --seq_len 3 --batch_size 3 --pre_seq_len 1 --n_steps 1 --task causal_lm # [Step 4] Clean up kill -s SIGINT $BOOTSTRAP_PID $SERVER1_PID $SERVER2_PID $SERVER3_PID $SERVER4_PID $LOGGER_PID