diff --git a/pyproject.toml b/pyproject.toml index 70eb0d5..434b6eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,9 @@ cuda = [ "mpi4py==4.1.1", ] rocm = [ - "torch==2.12.0+rocm7.2", + "torch==2.12.0+rocm7.1", + "torchaudio==2.11.0+rocm7.1", + "torchvision==0.27.0+rocm7.1", "mpi4py==4.1.1", ] rocmwci = [ diff --git a/scripts/install-tuolumne-torchpypi.sh b/scripts/install-tuolumne-torchpypi.sh index 91876c4..0b7c597 100644 --- a/scripts/install-tuolumne-torchpypi.sh +++ b/scripts/install-tuolumne-torchpypi.sh @@ -1,5 +1,5 @@ -ml load python/3.11.5 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip -ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi -pip install -e .[rocm] --find-links https://download.pytorch.org/whl/torch/ --find-links https://download.pytorch.org/whl/triton-rocm/ 2>&1 | tee install.log +ml load python/3.13.2 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip +ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi +pip install -e .[rocm] --find-links https://download.pytorch.org/whl/torch/ --find-links https://download.pytorch.org/whl/torchaudio/ --find-links https://download.pytorch.org/whl/torchvision/ --find-links https://download.pytorch.org/whl/triton-rocm/ 2>&1 | tee install.log # libmpi.so.12 does not exist => ls /opt/cray/pe/lib64/ | grep libmpi -patchelf --replace-needed libmpi.so.12 libmpi_gnu.so.12 .venvs/scaffoldvenv-tuo-pypi/lib/python3.11/site-packages/mpi4py/MPI.mpich.cpython-311-x86_64-linux-gnu.so +patchelf --replace-needed libmpi.so.12 libmpi_gnu.so.12 .venvs/scaffoldvenv-tuo-pypi/lib/python3.13/site-packages/mpi4py/MPI.mpich.cpython-313-x86_64-linux-gnu.so diff --git a/scripts/scaffold-tuolumne-torchpypi.job b/scripts/scaffold-tuolumne-torchpypi.job index c78af96..cc3b061 100644 --- a/scripts/scaffold-tuolumne-torchpypi.job +++ b/scripts/scaffold-tuolumne-torchpypi.job @@ -7,20 +7,20 @@ # flux: -qpdebug # flux: -B flask -ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi +ml cce/21.0.1 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi . .venvs/scaffoldvenv-tuo-pypi/bin/activate -export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-7.2.0/install/lib/librccl-net.so +export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-7.1.1/install/lib/librccl-net.so # Disable direct convolution benchmarking (should speedup warmup by a significant amount, does the below three options together) -# export MIOPEN_DEBUG_CONV_DIRECT=0 +export MIOPEN_DEBUG_CONV_DIRECT=0 # Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd) -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0 +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0 # Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0 +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0 # Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd -export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0 +# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0 CONFIG_PATH="$(pwd)/ScaFFold/configs/benchmark_default.yml" FRACT_BASE_DIR="${FRACT_BASE_DIR:-$(pwd)/ScaFFold/fractals}"