diff --git a/scripts/scaffold-tuolumne.job b/scripts/scaffold-tuolumne.job index bbbba33..a22d8c6 100644 --- a/scripts/scaffold-tuolumne.job +++ b/scripts/scaffold-tuolumne.job @@ -13,7 +13,8 @@ ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi # (1) Avoid libmagma error # (2) Removing libmpi may cause segfault on mpi4py import -export LD_PRELOAD="/opt/rocm-7.1.1/llvm/lib/libomp.so /opt/cray/pe/mpich/9.1.0/ofi/gnu/11.2/lib/libmpi_gnu.so.12" +# (3-5) undefined symbol: cblas_gemm_f16f16f32 +export LD_PRELOAD="/opt/rocm-7.1.1/llvm/lib/libomp.so /opt/cray/pe/mpich/9.1.0/ofi/gnu/11.2/lib/libmpi_gnu.so.12 /opt/intel/oneapi/mkl/2024.2/lib/libmkl_core.so.2 /opt/intel/oneapi/mkl/2024.2/lib/libmkl_gnu_thread.so.2 /opt/intel/oneapi/mkl/2024.2/lib/libmkl_intel_lp64.so.2" # Disable direct convolution benchmarking (should speedup warmup by a significant amount, does the below three options together) # export MIOPEN_DEBUG_CONV_DIRECT=0