From 00d1704f07aa2df1db92935c14cf21d426cd8920 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 11 Jun 2026 13:40:42 -0700 Subject: [PATCH 1/2] use 7.1.1 over 7.2 --- pyproject.toml | 4 +++- scripts/install-tuolumne-torchpypi.sh | 8 ++++---- scripts/scaffold-tuolumne-torchpypi.job | 18 +++++++++--------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 70eb0d5..434b6eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,9 @@ cuda = [ "mpi4py==4.1.1", ] rocm = [ - "torch==2.12.0+rocm7.2", + "torch==2.12.0+rocm7.1", + "torchaudio==2.11.0+rocm7.1", + "torchvision==0.27.0+rocm7.1", "mpi4py==4.1.1", ] rocmwci = [ diff --git a/scripts/install-tuolumne-torchpypi.sh b/scripts/install-tuolumne-torchpypi.sh index 91876c4..0b7c597 100644 --- a/scripts/install-tuolumne-torchpypi.sh +++ b/scripts/install-tuolumne-torchpypi.sh @@ -1,5 +1,5 @@ -ml load python/3.11.5 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip -ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi -pip install -e .[rocm] --find-links https://download.pytorch.org/whl/torch/ --find-links https://download.pytorch.org/whl/triton-rocm/ 2>&1 | tee install.log +ml load python/3.13.2 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip +ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi +pip install -e .[rocm] --find-links https://download.pytorch.org/whl/torch/ --find-links https://download.pytorch.org/whl/torchaudio/ --find-links https://download.pytorch.org/whl/torchvision/ --find-links https://download.pytorch.org/whl/triton-rocm/ 2>&1 | tee install.log # libmpi.so.12 does not exist => ls /opt/cray/pe/lib64/ | grep libmpi -patchelf --replace-needed libmpi.so.12 libmpi_gnu.so.12 .venvs/scaffoldvenv-tuo-pypi/lib/python3.11/site-packages/mpi4py/MPI.mpich.cpython-311-x86_64-linux-gnu.so +patchelf --replace-needed libmpi.so.12 libmpi_gnu.so.12 .venvs/scaffoldvenv-tuo-pypi/lib/python3.13/site-packages/mpi4py/MPI.mpich.cpython-313-x86_64-linux-gnu.so diff --git a/scripts/scaffold-tuolumne-torchpypi.job b/scripts/scaffold-tuolumne-torchpypi.job index c78af96..556a7e6 100644 --- a/scripts/scaffold-tuolumne-torchpypi.job +++ b/scripts/scaffold-tuolumne-torchpypi.job @@ -7,20 +7,20 @@ # flux: -qpdebug # flux: -B flask -ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi +ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi . .venvs/scaffoldvenv-tuo-pypi/bin/activate -export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-7.2.0/install/lib/librccl-net.so +export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-7.1.1/install/lib/librccl-net.so # Disable direct convolution benchmarking (should speedup warmup by a significant amount, does the below three options together) -# export MIOPEN_DEBUG_CONV_DIRECT=0 -# Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd) -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0 -# Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0 -# Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0 +export MIOPEN_DEBUG_CONV_DIRECT=0 +# # Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd) +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0 +# # Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0 +# # Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0 CONFIG_PATH="$(pwd)/ScaFFold/configs/benchmark_default.yml" FRACT_BASE_DIR="${FRACT_BASE_DIR:-$(pwd)/ScaFFold/fractals}" From 0028de9a5e4134f5db2b435775c4040fe8088d78 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 18 Jun 2026 10:12:50 -0700 Subject: [PATCH 2/2] Update scaffold-tuolumne-torchpypi.job --- scripts/scaffold-tuolumne-torchpypi.job | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/scaffold-tuolumne-torchpypi.job b/scripts/scaffold-tuolumne-torchpypi.job index 556a7e6..cc3b061 100644 --- a/scripts/scaffold-tuolumne-torchpypi.job +++ b/scripts/scaffold-tuolumne-torchpypi.job @@ -15,11 +15,11 @@ export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm- # Disable direct convolution benchmarking (should speedup warmup by a significant amount, does the below three options together) export MIOPEN_DEBUG_CONV_DIRECT=0 -# # Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd) +# Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd) # export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0 -# # Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd +# Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd # export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0 -# # Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd +# Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd # export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0 CONFIG_PATH="$(pwd)/ScaFFold/configs/benchmark_default.yml"