run

update update
2 months ago · dfcdaca2ac
parent d2fcbbc72e
commit dfcdaca2ac
5 changed files with 91 additions and 4 deletions
--- a/pip.freeze
+++ b/pip.freeze
@ -0,0 +1,85 @@
+absl-py==2.1.0
+accelerate==0.28.0
+async-timeout==4.0.3
+base58==2.1.1
+bitsandbytes==0.41.1
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+cloud-tpu-client==0.10
+ConfigArgParse==1.7
+cpufeature==0.2.1
+cryptography==42.0.5
+Dijkstar==2.6.0
+filelock==3.13.3
+fsspec==2024.3.1
+google-api-core==1.34.1
+google-api-python-client==1.8.0
+google-auth==2.29.0
+google-auth-httplib2==0.2.0
+googleapis-common-protos==1.63.0
+grpcio==1.62.1
+grpcio-tools==1.62.1
+-e git+https://github.com/learning-at-home/hivemind@c295cfb7f5b29721c7ed198dcf2b5861950a54b7#egg=hivemind&subdirectory=../../../../../../../time/2024/03/27/hivemind
+httplib2==0.22.0
+huggingface-hub==0.17.3
+humanfriendly==10.0
+idna==3.6
+Jinja2==3.1.3
+libtpu-nightly==0.1.dev20231130+default
+MarkupSafe==2.1.5
+mpmath==1.3.0
+msgpack==1.0.8
+multiaddr==0.0.9
+netaddr==1.2.1
+networkx==3.1
+numpy==1.24.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.4.99
+nvidia-nvtx-cu12==12.1.105
+oauth2client==4.1.3
+packaging==24.0
+-e git+https://github.com/huggingface/peft@2665f80a1738b315dce42da8f1a9c7fe793aa9ca#egg=peft&subdirectory=../../../../../../../time/2024/03/27/peft
+-e git+https://github.com/bigscience-workshop/petals@2c55dd4ed30f72fb322b640ad51e8def70520240#egg=petals&subdirectory=../../../../../../../time/2023/09/22/petals
+prefetch-generator==1.0.3
+protobuf==3.20.3
+psutil==5.9.8
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.21
+pydantic==1.10.14
+pymultihash==0.8.2
+pyparsing==3.1.2
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+rsa==4.9
+safetensors==0.4.2
+scipy==1.10.1
+sentencepiece==0.2.0
+six==1.16.0
+sortedcontainers==2.4.0
+speedtest-cli==2.1.3
+sympy==1.12
+tensor-parallel==1.0.23
+tokenizers==0.14.1
+torch==2.2.2
+torch-xla==2.2.0
+tqdm==4.66.2
+-e git+https://github.com/huggingface/transformers.git@b71f20a7c9f3716d30f6738501559acf863e2c5c#egg=transformers&subdirectory=../../../../../../../../time/2023/07/17/experiments/transformers
+triton==2.2.0
+typing_extensions==4.10.0
+uritemplate==3.0.1
+urllib3==2.2.1
+uvloop==0.19.0
+varint==1.0.2
--- a/run.sh
+++ b/run.sh
@ -0,0 +1 @@
+PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4  --initial_peers /ip4/10.128.0.41/tcp/8099/p2p/QmVA1BeK2UYaXPwmHQ5EZ2biyzyVWdnQj2zHcEVYxgiSDY
--- a/run2.sh
+++ b/run2.sh
@ -0,0 +1 @@
+docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id 
--- a/setup.cfg
+++ b/setup.cfg
@ -40,7 +40,7 @@ install_requires =
    transformers==4.38.2  # if you change this, please also change version assert in petals/__init__.py
    speedtest-cli==2.1.3
    pydantic>=1.10,<2.0  # 2.0 is incompatible with hivemind yet
-    hivemind==1.1.10.post2
+#    hivemind==1.1.10.post2
    tensor_parallel==1.0.23
    humanfriendly
    async-timeout>=4.0.2
--- a/src/petals/server/backend.py
+++ b/src/petals/server/backend.py
@ -193,9 +193,9 @@ class TransformerBackend(ModuleBackend):

        # Explicitly free the GPU memory. This is not necessary at the time this code is written,
        # but may help to avoid future issues when the module is not garbage-collected for some reasons
-        dummy = torch.tensor([])
-        for p in self.module.parameters():
-            p.data = dummy
+#        dummy = torch.tensor([])
+#        for p in self.module.parameters():
+#            p.data = dummy


 def merge_inference_pools_inplace(backends: Dict[ExpertUID, TransformerBackend]):
				`@ -0,0 +1 @@`
				`PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4 --initial_peers /ip4/10.128.0.41/tcp/8099/p2p/QmVA1BeK2UYaXPwmHQ5EZ2biyzyVWdnQj2zHcEVYxgiSDY`
				`@ -0,0 +1 @@`
				`docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id`