petals/tests/test_full_model.py

92 lines
4.2 KiB
Python
Raw Permalink Normal View History

import pytest
2022-07-01 00:38:38 +00:00
import torch
import transformers
2022-07-06 22:06:03 +00:00
from hivemind import get_logger, use_hivemind_log_handler
from test_utils import *
2022-07-01 00:38:38 +00:00
from src.bloom.model import BloomForCausalLM
2022-07-01 00:38:38 +00:00
from src.client.remote_model import DistributedBloomForCausalLM
use_hivemind_log_handler("in_root_logger")
logger = get_logger(__file__)
@pytest.mark.forked
def test_full_model_exact_match(atol_forward=1e-3, atol_inference=1e-3):
2022-07-01 00:38:38 +00:00
tokenizer = transformers.BloomTokenizerFast.from_pretrained(MODEL_NAME)
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
model = DistributedBloomForCausalLM.from_pretrained(
MODEL_NAME, initial_peers=INITIAL_PEERS, low_cpu_mem_usage=True, torch_dtype=torch.float32
)
config = model.config
assert isinstance(model, DistributedBloomForCausalLM)
2022-07-01 00:38:38 +00:00
assert len(model.transformer.h) == model.config.n_layer
2022-07-06 22:04:47 +00:00
test_inputs = tokenizer("A cat sat on a mat", return_tensors="pt")["input_ids"]
2022-07-01 00:38:38 +00:00
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
with torch.inference_mode():
parallel_outputs = model.forward(test_inputs).logits
assert torch.all(torch.isfinite(parallel_outputs))
logger.info("Forward outputs are finite")
2022-07-01 00:38:38 +00:00
embs = model.transformer.word_embeddings(test_inputs)
embs = model.transformer.word_embeddings_layernorm(embs)
recurrent_outputs = []
with model.transformer.h.inference_session(max_length=embs.shape[1]) as sess:
for t in range(embs.shape[1]):
recurrent_outputs.append(sess.step(embs[:, t : t + 1, :]))
recurrent_outputs = torch.cat(recurrent_outputs, dim=1)
recurrent_outputs = model.transformer.ln_f(recurrent_outputs)
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
recurrent_outputs = model.lm_head(recurrent_outputs)
assert torch.allclose(recurrent_outputs, parallel_outputs, rtol=0, atol=atol_inference)
logger.info("Inference is consistent with forward")
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
del model, embs, recurrent_outputs
if REF_NAME:
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
ref_model = transformers.BloomForCausalLM.from_pretrained(
REF_NAME, low_cpu_mem_usage=True, torch_dtype=torch.float32
)
if config.vocab_size < ref_model.config.vocab_size:
ref_model.resize_token_embeddings(config.vocab_size)
logger.warning(f"Resized the reference model embeddings, new total = {ref_model.config.vocab_size}")
dummy_mask = torch.ones_like(test_inputs, dtype=torch.bool)
# note: this creates a dummy mask to make the test compatible with older transformer versions
# prior to https://github.com/huggingface/transformers/pull/17837
Miscellaneous fixes to automatic tests (#35) 1. __Reduce memory usage in in test_full_model__ - previously, loading the full model would consistently fail IF github is enforcing memory limit [example](https://github.com/bigscience-workshop/distributed-bloom/runs/7473920049?check_suite_focus=true) - the new version uses accelerate to save 2GB of peak memory, that was previously used when loading both reference model AND its state dict at the same time - only to load that state dict :) 2. __Safer delays when creating servers__ - run-tests will now wait for a few seconds after creating the first server - and before creating the second one, so as to make sure that the first server creates a DHT instance that subsequent servers can connect to. - also increased the wait time after creating servers by 30 seconds to make sure we load the model in time even when bumping into slow remotes on HF side 3. __Fix environment variables in CI to avoid build conflicts__ - the previous code was using a wrong environment variable that was always "main". The current one will correctly resolve branch name, both in main and on pull request. - For reference, below you can find sample environments when running CI in both cases: on pull request and on push to main. <details> <summary> Environment variables when building this branch (on pull request) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.12/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=98 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.5 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=3b457e8a14e5ecb0d65d6e4c0e9161f7756a8861 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/pull/35/merge RUNNER_OS=Linux GITHUB_REF_PROTECTED=false HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_cc9b46e4-56a1-40c5-ba08-5a91e21f0f95 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_0aba811a-a04b-40a2-ba42-79efb2723e9e GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=8f0072e74f2847c0851e7ff9b5e4af7c GITHUB_EVENT_NAME=pull_request GITHUB_RUN_ID=2720198689 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_0aba811a-a04b-40a2-ba42-79efb2723e9e HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220717.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=35/merge STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle CHROME_BIN=/usr/bin/google-chrome GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.4/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:23653 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF=main CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF=fix-branch-name GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> <details> <summary> Environment variables when building in main (on push) </summary> SELENIUM_JAR_PATH=/usr/share/java/selenium-server.jar GOROOT_1_17_X64=/opt/hostedtoolcache/go/1.17.11/x64 CONDA=/usr/share/miniconda GITHUB_WORKSPACE=/home/runner/work/distributed-bloom/distributed-bloom JAVA_HOME_11_X64=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_PATH=/home/runner/work/_temp/_runner_file_commands/add_path_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_ACTION=__run_2 JAVA_HOME=/usr/lib/jvm/temurin-11-jdk-amd64 GITHUB_RUN_NUMBER=53 RUNNER_NAME=GitHub Actions 3 GRADLE_HOME=/usr/share/gradle-7.4.2 XDG_CONFIG_HOME=/home/runner/.config DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 ANT_HOME=/usr/share/ant JAVA_HOME_8_X64=/usr/lib/jvm/temurin-8-jdk-amd64 HOMEBREW_PREFIX=/home/linuxbrew/.linuxbrew pythonLocation=/opt/hostedtoolcache/Python/3.9.13/x64 GITHUB_REF_TYPE=branch HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS=3650 BOOTSTRAP_HASKELL_NONINTERACTIVE=1 *** PIPX_BIN_DIR=/opt/pipx_bin DEPLOYMENT_BASEPATH=/opt/runner GITHUB_ACTIONS=true ANDROID_NDK_LATEST_HOME=/usr/local/lib/android/sdk/ndk/24.0.8215888 GITHUB_SHA=49242d81006454d687ff3293c49f6bf234793627 POWERSHELL_DISTRIBUTION_CHANNEL=GitHub-Actions-ubuntu20 DOTNET_MULTILEVEL_LOOKUP=0 GITHUB_REF=refs/heads/main RUNNER_OS=Linux GITHUB_REF_PROTECTED=true HOME=/home/runner GITHUB_API_URL=https://api.github.com/ LANG=C.UTF-8 BLOOM_TESTING_WRITE_TOKEN=*** RUNNER_TRACKING_ID=github_7668f06a-99e1-4ed1-81e9-46d75fab3f33 STATS_KEEPALIVE=false RUNNER_ARCH=X64 RUNNER_TEMP=/home/runner/work/_temp EDGEWEBDRIVER=/usr/local/share/edge_driver GITHUB_ENV=/home/runner/work/_temp/_runner_file_commands/set_env_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 GITHUB_EVENT_PATH=/home/runner/work/_temp/_github_workflow/event.json INVOCATION_ID=3dadac48981b4a679a33224db89be1ed GITHUB_EVENT_NAME=push GITHUB_RUN_ID=2680158280 JAVA_HOME_17_X64=/usr/lib/jvm/temurin-17-jdk-amd64 ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle GITHUB_STEP_SUMMARY=/home/runner/work/_temp/_runner_file_commands/step_summary_cd6c1ed2-0d0f-496d-b7a6-ffa476dcc144 HOMEBREW_NO_AUTO_UPDATE=1 GITHUB_ACTOR=justheuristic NVM_DIR=/home/runner/.nvm SGX_AESM_ADDR=1 GITHUB_RUN_ATTEMPT=1 ANDROID_HOME=/usr/local/lib/android/sdk GITHUB_GRAPHQL_URL=https://api.github.com/graphql ACCEPT_EULA=Y RUNNER_USER=runner USER=runner GITHUB_SERVER_URL=https://github.com/ HOMEBREW_CELLAR=/home/linuxbrew/.linuxbrew/Cellar PIPX_HOME=/opt/pipx GECKOWEBDRIVER=/usr/local/share/gecko_driver CHROMEWEBDRIVER=/usr/local/share/chrome_driver SHLVL=0 ANDROID_SDK_ROOT=/usr/local/lib/android/sdk VCPKG_INSTALLATION_ROOT=/usr/local/share/vcpkg HOMEBREW_REPOSITORY=/home/linuxbrew/.linuxbrew/Homebrew RUNNER_TOOL_CACHE=/opt/hostedtoolcache ImageVersion=20220710.1 DOTNET_NOLOGO=1 GITHUB_REF_NAME=main STATS_PFS=true GRAALVM_11_ROOT=/usr/local/graalvm/graalvm-ce-java11-22.1.0 GITHUB_JOB=convert-model LD_LIBRARY_PATH=/opt/hostedtoolcache/Python/3.9.13/x64/lib XDG_RUNTIME_DIR=/run/user/1001 AZURE_EXTENSION_DIR=/opt/az/azcliextensions PERFLOG_LOCATION_SETTING=RUNNER_PERFLOG GITHUB_REPOSITORY=bigscience-workshop/distributed-bloom CHROME_BIN=/usr/bin/google-chrome ANDROID_NDK_ROOT=/usr/local/lib/android/sdk/ndk-bundle GOROOT_1_18_X64=/opt/hostedtoolcache/go/1.18.3/x64 GITHUB_RETENTION_DAYS=90 JOURNAL_STREAM=8:22000 RUNNER_WORKSPACE=/home/runner/work/distributed-bloom LEIN_HOME=/usr/local/lib/lein LEIN_JAR=/usr/local/lib/lein/self-installs/leiningen-2.9.8-standalone.jar GITHUB_ACTION_REPOSITORY= PATH=/opt/hostedtoolcache/Python/3.9.13/x64/bin:/opt/hostedtoolcache/Python/3.9.13/x64:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin RUNNER_PERFLOG=/home/runner/perflog GITHUB_BASE_REF= CI=true SWIFT_PATH=/usr/share/swift/usr/bin ImageOS=ubuntu20 GITHUB_REPOSITORY_OWNER=bigscience-workshop GITHUB_HEAD_REF= GITHUB_ACTION_REF= GITHUB_WORKFLOW=Tests DEBIAN_FRONTEND=noninteractive AGENT_TOOLSDIRECTORY=/opt/hostedtoolcache GOROOT_1_16_X64=/opt/hostedtoolcache/go/1.16.15/x64 _=/usr/bin/env </details> Co-authored-by: Dmitry Baranchuk <dmitrybaranchuk@gmail.com>
2022-07-22 19:38:40 +00:00
ref_outputs = ref_model.forward(test_inputs, attention_mask=dummy_mask).logits.float()
assert torch.allclose(ref_outputs, parallel_outputs, rtol=0, atol=atol_forward)
logger.warning(f"Distributed forward is consistent with {type(ref_model)}.forward")
del ref_model, ref_outputs, dummy_mask
else:
logger.warning("Did not test exact match with local model: REF_NAME environment variable is not set")
assert False
@pytest.mark.forked
def test_greedy_generation(max_new_tokens=4):
tokenizer = transformers.BloomTokenizerFast.from_pretrained(MODEL_NAME)
model = DistributedBloomForCausalLM.from_pretrained(
MODEL_NAME, initial_peers=INITIAL_PEERS, low_cpu_mem_usage=True, torch_dtype=torch.float32
)
inputs = tokenizer("A cat sat on a mat", return_tensors="pt")["input_ids"]
remote_outputs = model.generate(
inputs,
max_new_tokens=max_new_tokens,
)
hf_outputs = BloomForCausalLM.greedy_search(model, input_ids=inputs, max_length=inputs.size(1) + max_new_tokens)
assert torch.allclose(remote_outputs, hf_outputs), "Greedy search are not identical to HF"
inputs_batch = tokenizer(["A cat sat on a mat", "A dog sat on a mat"], return_tensors="pt", padding=True)[
"input_ids"
]
remote_outputs_batch = model.generate(
inputs_batch,
max_new_tokens=max_new_tokens,
)
hf_outputs_batch = BloomForCausalLM.greedy_search(
model, input_ids=inputs_batch, max_length=inputs_batch.size(1) + max_new_tokens
)
assert torch.allclose(
remote_outputs_batch, hf_outputs_batch
), "Greedy search are not identical to HF in multibatch mode"