Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ if (MFC_ALL)
endif()

# Validate CMAKE_BUILD_TYPE to catch typos (CMake is case-sensitive).
set(_VALID_BUILD_TYPES "Debug" "Release" "RelDebug" "")
set(_VALID_BUILD_TYPES "Debug" "Release" "RelDebug" "Fast" "")
if (NOT CMAKE_BUILD_TYPE IN_LIST _VALID_BUILD_TYPES)
message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE '${CMAKE_BUILD_TYPE}'. Valid: Debug, RelDebug, Release")
message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE '${CMAKE_BUILD_TYPE}'. Valid: Debug, RelDebug, Release, Fast")
endif()

# RelDebug: a lighter debug mode for CI. Compiler-specific blocks below add the
Expand All @@ -51,6 +51,13 @@ set(CMAKE_C_FLAGS_RELDEBUG "-g" CACHE STRING "")
set(CMAKE_CXX_FLAGS_RELDEBUG "-g" CACHE STRING "")
set(CMAKE_Fortran_FLAGS_RELDEBUG "-g" CACHE STRING "")

# Fast: fast-iteration dev builds (e.g. GPU print-debugging). Deliberately matches
# none of the Release-only (IPO, -march=native) or Debug/RelDebug-only (MFC_DEBUG,
# -gpu=debug) conditional blocks below, so it inherits none of them - just a light -O1.
set(CMAKE_C_FLAGS_FAST "-O1" CACHE STRING "")
set(CMAKE_CXX_FLAGS_FAST "-O1" CACHE STRING "")
set(CMAKE_Fortran_FLAGS_FAST "-O1" CACHE STRING "")

if (MFC_SINGLE_PRECISION)
add_compile_definitions(MFC_SINGLE_PRECISION)
else()
Expand Down Expand Up @@ -330,6 +337,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug")
add_compile_definitions(MFC_DEBUG)
endif()

# Fast: light optimization for dev iteration. Like Debug/RelDebug, the real opt
# flag is injected here (the CMAKE_*_FLAGS_FAST cache vars are placeholders).
# -O1 keeps compile time low while giving acceptable runtime; no MFC_DEBUG, so
# device routines stay free of host-only debug aborts and compile without IPO.
if (CMAKE_BUILD_TYPE STREQUAL "Fast")
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-O1>)
endif()



# HANDLE_SOURCES: Given a target (herein <target>):
Expand Down Expand Up @@ -676,8 +691,15 @@ exit 0
target_compile_options(${a_target} PRIVATE -fopenmp)
target_link_options(${a_target} PRIVATE -fopenmp)
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS})
if(CMAKE_BUILD_TYPE STREQUAL "Fast")
# Fast dev-iteration: -O1 + -fno-lto eliminates whole-program device LTO.
# (-fopenmp-target-jit is not yet supported by AMD flang as of 23.x.)
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -O1 -fno-lto -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fno-lto)
else()
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS})
endif()
endif()
endif()

Expand Down Expand Up @@ -746,9 +768,14 @@ exit 0
find_library(HIP_LIB amdhip64
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
target_include_directories(${a_target} PRIVATE
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" "$ENV{OLCF_AFAR_ROOT}/lib/llvm/lib" REQUIRED)
if(EXISTS "$ENV{OLCF_AFAR_ROOT}/lib/llvm/include/hipfort/amdgcn")
target_include_directories(${a_target} PRIVATE
"$ENV{OLCF_AFAR_ROOT}/lib/llvm/include/hipfort/amdgcn")
else()
target_include_directories(${a_target} PRIVATE
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
endif()
target_link_libraries(${a_target} PRIVATE
${HIP_LIB} ${HIPFORT_AMDGCN_LIB})

Expand Down
57 changes: 54 additions & 3 deletions toolchain/mfc/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,53 @@
_MAKE_PROGRESS_RE = re.compile(r"^\[\s*(\d+)%\]\s+(.*)$")


def _cmake_build_type() -> str:
"""Map the CLI build-mode flags to a CMAKE_BUILD_TYPE string."""
if ARG("debug"):
return "Debug"
if ARG("reldebug"):
return "RelDebug"
if ARG("fast_build", False):
return "Fast"
return "Release"


def _apply_fast_build_gpu_arch() -> None:
"""Under --fast-build on an NVHPC GPU build, restrict device codegen to a
single compute capability (the node's GPU), overriding the multi-arch
MFC_CUDA_CC that the module files set. CMake reads $ENV{MFC_CUDA_CC}.

Cray/AMD GPU builds don't use MFC_CUDA_CC (they are already single-arch via
craype-accel/--offload-arch), so this only acts when MFC_CUDA_CC is set.
Hard-errors if no GPU is detectable and no explicit arch is provided."""
if not ARG("fast_build", False) or ARG("gpu") == gpuConfigOptions.NONE.value:
return
if not os.environ.get("MFC_CUDA_CC"): # not an NVHPC node; nothing to do
return

override = os.environ.get("MFC_FAST_ARCH") # escape hatch for login nodes
if override:
os.environ["MFC_CUDA_CC"] = override
return

try:
result = subprocess.run(
["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader"],
capture_output=True,
text=True,
timeout=10,
check=False,
)
caps = [ln.strip().replace(".", "") for ln in result.stdout.splitlines() if ln.strip()]
except (OSError, subprocess.SubprocessError):
caps = []

if not caps:
raise MFCException("--fast-build: could not detect a local GPU compute capability " "(no GPU visible via nvidia-smi). Run on a GPU node, or set " "MFC_FAST_ARCH=<cc> (e.g. MFC_FAST_ARCH=90).")

os.environ["MFC_CUDA_CC"] = caps[0]


def _run_build_with_progress(command: typing.List[str], target_name: str, streaming: bool = False) -> subprocess.CompletedProcess:
"""
Run a build command with a progress bar that parses ninja output.
Expand Down Expand Up @@ -367,6 +414,10 @@ def is_buildable(self) -> bool:
def configure(self, case: Case):
if ARG("debug") and ARG("reldebug"):
raise MFCException("--debug and --reldebug are mutually exclusive.")
if ARG("fast_build", False) and (ARG("debug") or ARG("reldebug")):
raise MFCException("--fast-build is mutually exclusive with --debug/--reldebug.")

_apply_fast_build_gpu_arch()

build_dirpath = self.get_staging_dirpath(case)
cmake_dirpath = self.get_cmake_dirpath()
Expand All @@ -386,9 +437,9 @@ def configure(self, case: Case):
# build the configured targets. This is mostly useful for debugging.
# See: https://cmake.org/cmake/help/latest/variable/CMAKE_EXPORT_COMPILE_COMMANDS.html.
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
# Set build type (Debug, RelDebug, or Release).
# Set build type (Debug, RelDebug, Fast, or Release).
# See: https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html
f"-DCMAKE_BUILD_TYPE={'Debug' if ARG('debug') else 'RelDebug' if ARG('reldebug') else 'Release'}",
f"-DCMAKE_BUILD_TYPE={_cmake_build_type()}",
# Used by FIND_PACKAGE (/FindXXX) to search for packages, with the
# second highest level of priority, still letting users manually
# specify <PackageName>_ROOT, which has precedence over CMAKE_PREFIX_PATH.
Expand Down Expand Up @@ -468,7 +519,7 @@ def build(self, case: input.MFCInputFile):
"--parallel",
ARG("jobs"),
"--config",
"Debug" if ARG("debug") else "RelDebug" if ARG("reldebug") else "Release",
_cmake_build_type(),
]

verbosity = ARG("verbose")
Expand Down
2 changes: 1 addition & 1 deletion toolchain/mfc/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .printer import cons
from .state import MFCConfig

MFC_LOCK_CURRENT_VERSION: int = 8
MFC_LOCK_CURRENT_VERSION: int = 9


@dataclasses.dataclass
Expand Down
1 change: 1 addition & 0 deletions toolchain/mfc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class MFCConfig:
gpu: str = gpuConfigOptions.NONE.value
debug: bool = False
reldebug: bool = False
fast_build: bool = False
gcov: bool = False
unified: bool = False
single: bool = False
Expand Down
Loading