feat: multinode setup

This commit is contained in:
Zach Nussbaum 2023-04-05 02:53:04 +00:00
parent 98ceac34e1
commit 885b7f1a3a
3 changed files with 33 additions and 0 deletions

8
create_hostname.sh Normal file
View File

@ -0,0 +1,8 @@
#!/bin/bash
export WORKER_IP=$1
N_GPUS=8
# create dir if doesn't exist
sudo mkdir -p /job
printf "localhost slots=$N_GPUS\n$WORKER_IP slots=$N_GPUS" | sudo tee /job/hostfile
echo /job/hostfile

19
head_node_setup.sh Normal file
View File

@ -0,0 +1,19 @@
#!/bin/sh
WORKER_IP=$1
N_GPUS=$2
sudo apt install -y nfs-kernel-server
sudo mkdir -p ./data_multiplus
sudo chmod 777 ./data_multiplus
printf "${PWD}/data_multiplus ${WORKER_IP}(rw,sync,no_subtree_check)" | sudo tee -a /etc/exports
sudo systemctl restart nfs-kernel-server
sudo apt-get install -y pdsh
export DSHPATH=$PATH
export PDSH_RCMD_TYPE=ssh
ssh-keygen -t rsa -N ''
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
sudo mkdir -p /job
printf "localhost slots=$N_GPUS\n$WORKER_IP slots=$N_GPUS" | sudo tee /job/hostfile

6
worker_node_setup.sh Normal file
View File

@ -0,0 +1,6 @@
#!/bin/sh
HEAD_IP=$1
sudo apt install -y nfs-common
sudo mkdir -p ./data_multiplus
sudo mount ${HEAD_IP}:${PWD}/data_multiplus ./data_multiplus