diff --git a/cli/deploy_server.sh b/cli/deploy_server.sh index 1622026..10c6eb6 100644 --- a/cli/deploy_server.sh +++ b/cli/deploy_server.sh @@ -81,5 +81,6 @@ fi # ('UNIFORM_8BIT', 4), # ('BLOCKWISE_8BIT', 5)] -python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --device ${DEVICE} --initial_peer ${INITIAL_PEER} \ - --block_indices ${BLOCK_IDS} --compression UNIFORM_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} --load_in_8bit # &> ${SERVER_ID_PATH}.log +export OMP_NUM_THREADS=16 +CUDA_VISIBLE_DEVICES=${DEVICE} python -m cli.run_server --converted_model_name_or_path ${MODEL_NAME} --torch_dtype float16 --initial_peer ${INITIAL_PEER} --cache_dir '/extra_disk_1/dbaranchuk/test-bloomd' \ + --block_indices ${BLOCK_IDS} --compression BLOCKWISE_8BIT --identity_path ${SERVER_ID_PATH} --host_maddrs ${HOST_MADDR} --load_in_8bit #&> logs/${SERVER_ID_PATH}.log diff --git a/cli/run_local_servers.sh b/cli/run_local_servers.sh index d2e1a28..94b3ca5 100644 --- a/cli/run_local_servers.sh +++ b/cli/run_local_servers.sh @@ -49,7 +49,7 @@ fi ####################### hivemind-dht &> tmp.out & -sleep 5 +sleep 20 INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-1])" ) echo "Initial peer: ${INITIAL_PEER}" @@ -103,6 +103,6 @@ done # Kill initial peer # ##################### -sleep 10 +sleep 180 pkill -f hivemind-dht # TODO: kill only particular pids of hivemind-dht rm tmp.out \ No newline at end of file diff --git a/cli/run_server.py b/cli/run_server.py index f808b82..b72f8dd 100644 --- a/cli/run_server.py +++ b/cli/run_server.py @@ -27,7 +27,7 @@ def main(): parser.add_argument('--compression', type=str, default='NONE', required=False, help='Tensor compression communication') - parser.add_argument('--num_handlers', type=int, default=16, required=False, + parser.add_argument('--num_handlers', type=int, default=8, required=False, help='server will use this many processes to handle incoming requests') parser.add_argument('--min_batch_size', type=int, default=1, help='Minimum required batch size for all expert operations') diff --git a/requirements.txt b/requirements.txt index 4d16e55..f1fddc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ torch==1.12.0 accelerate==0.10.0 huggingface-hub==0.7.0 bitsandbytes-cuda113==0.26.0 -https://github.com/learning-at-home/hivemind/archive/28261470e44f2ae4157d08b563b4d2771f3a9549.zip +https://github.com/learning-at-home/hivemind/archive/20b3b3d5f225ed525515a5383a008a8f9fad8173.zip # bnb_integration + dtype fix https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip