-
Notifications
You must be signed in to change notification settings - Fork 31
CK JIT integration #582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
CK JIT integration #582
Changes from all commits
4b39dab
98b6797
0d44485
a83550b
53b297b
dfc21ae
15d1879
5a10461
acc7b5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ set(CMAKE_CXX_STANDARD 17) | |
| project(ck_fused_attn LANGUAGES HIP CXX) | ||
|
|
||
|
|
||
| set(AITER_MHA_INSTALL_PREFIX "transformer_engine" CACHE STRING "aiter mha shared lib install prefix in TE") | ||
| set(AITER_MHA_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/transformer_engine/lib") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor: this replaces the previous |
||
|
|
||
| #Corresponding runtime check is in nvte_get_fused_attn_backend() | ||
| list(FIND CMAKE_HIP_ARCHITECTURES "gfx1250" _gfx1250_idx) | ||
|
|
@@ -67,22 +67,56 @@ else() | |
| message(WARNING "Python interpreter not found; skipping AITER API validation.") | ||
| endif() | ||
|
|
||
| if(DEFINED AITER_MHA_PATH) | ||
| message(STATUS "[AITER-BUILD] Using AITER_MHA_PATH=${AITER_MHA_PATH}") | ||
| # use pre-built te_libmha_fwd.so te_libmha_bwd.so | ||
| set(__AITER_MHA_PATH ${AITER_MHA_PATH}) | ||
| set(__AITER_CACHE_DIR "") | ||
| set(__AITER_MHA_PATH "") | ||
| set(__QOLA_INCLUDE_DIR "") | ||
| if(NOT "$ENV{NVTE_CK_JIT}" STREQUAL "0") | ||
| set(__USE_CK_JIT TRUE) | ||
| else() | ||
| set(__AITER_MHA_PATH "") | ||
| set(__USE_CK_JIT FALSE) | ||
| endif() | ||
| if(DEFINED ENV{AITER_MHA_PATH}) | ||
| message(STATUS "[AITER-BUILD] Using AITER_MHA_PATH=$ENV{AITER_MHA_PATH}") | ||
| # use pre-built libraries and includes from a location specified by the user | ||
| set(__AITER_CACHE_DIR $ENV{AITER_MHA_PATH}) | ||
| elseif(NOT __USE_CK_JIT) #disable for CK_JIT for now | ||
| # use pre-built cache | ||
| include("${CMAKE_CURRENT_LIST_DIR}/aiter_prebuilt.cmake") | ||
| get_prebuilt_aiter(__AITER_MHA_PATH) | ||
| get_prebuilt_aiter(__AITER_CACHE_DIR) | ||
| elseif(DEFINED ENV{NVTE_AITER_PREBUILT_BASE_URL}) | ||
| message(WARNING "[AITER-BUILD] NVTE_AITER_PREBUILT_BASE_URL is set but will be ignored because CK_JIT is enabled.") | ||
| endif() | ||
|
Comment on lines
+73
to
+88
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two concerns about the default-on behavior:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto on point 2, I think we should still use the pre-built cache if available as first priority.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using of pre-built AITER cache was disabled for now because the idea of CK_JIT is reduce building time so the cache is not needed. While in general we may still have benefits of cache, enabling of it with CK_JIT requires at minimum uploading of CK_JIT built AITER libs and to avoid conflict with other (non CK_JIT) branches different AITER commit is needed for CK_JIT. So till this logistic task is completed and for wider testing, CK_JIT was made mutually exclusive with AITER pre-built cache.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant more so the idea that if there's a pre-built cache already, we don't need to use CK JIT at all -- not that we need to cache CK JIT. If we explicitly use CK JIT, then absolutely no need to worry about the prebuilt cache, but it's the case where we don't specify build strategy that I'm concerned about. In that case, I think we still need to be able to use a pre-built cache by default if available. What do you think? |
||
|
|
||
| if(__AITER_MHA_PATH STREQUAL "") | ||
| # If not available, fallback: Build from source via QoLA | ||
| list(JOIN CMAKE_HIP_ARCHITECTURES ";" GPU_ARCHS_STR) | ||
| message(STATUS "[AITER-BUILD] Building AITER kernels for ${GPU_ARCHS_STR} via QoLA.") | ||
| set(__QOLA_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../3rdparty/QoLA") | ||
| if(__AITER_CACHE_DIR STREQUAL "") | ||
| # If not available or not requested, build from source via QoLA | ||
| list(JOIN CMAKE_HIP_ARCHITECTURES ";" GPU_ARCHS_STR) | ||
| message(STATUS "[AITER-BUILD] Building AITER kernels for ${GPU_ARCHS_STR} via QoLA.") | ||
| set(__QOLA_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../3rdparty/QoLA") | ||
| set(__QOLA_MANIFEST "${CMAKE_CURRENT_LIST_DIR}/qola_manifest.toml") | ||
| if(__USE_CK_JIT) | ||
| message(STATUS "[AITER-BUILD] CK_JIT is enabled; will build CK kernels via CK_JIT.") | ||
| set(__CK_JIT_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/ck_jit") | ||
| set(__QOLA_BUILD_DIR "${__CK_JIT_BUILD_DIR}/qola") #Need it under ck_jit to clean on full build | ||
| if(DEFINED ENV{NVTE_CK_JIT_DIR}) | ||
| set(__CK_JIT_SOURCE_DIR $ENV{NVTE_CK_JIT_DIR}) | ||
| else() | ||
| set(__CK_JIT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../3rdparty/ck_jit") | ||
| endif() | ||
| execute_process( | ||
| COMMAND ${Python_EXECUTABLE} "${__CK_JIT_SOURCE_DIR}/ck_jit_build.py" full | ||
| --with-qola | ||
| --qola-dir ${__QOLA_DIR} | ||
| --qola-manifest ${__QOLA_MANIFEST} | ||
| --qola-output "${__QOLA_BUILD_DIR}" | ||
| --gpu-archs "${GPU_ARCHS_STR}" | ||
| --aiter-dir ${__AITER_SOURCE_DIR} | ||
| --tmp-dir "${__CK_JIT_BUILD_DIR}" | ||
| --install-dir ${AITER_MHA_INSTALL_DIR} | ||
| --jit-name "te_ck_jit" | ||
| RESULT_VARIABLE QOLA_BUILD_RESULT | ||
| ) | ||
| else() | ||
| set(__QOLA_BUILD_DIR "${__QOLA_DIR}/build") | ||
| set(__QOLA_MANIFEST "${CMAKE_CURRENT_LIST_DIR}/qola_manifest.toml") | ||
| execute_process( | ||
| COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${__QOLA_DIR}:$ENV{PYTHONPATH}" | ||
| ${Python_EXECUTABLE} -m qola.cli build | ||
|
|
@@ -92,22 +126,29 @@ else() | |
| --arch "${GPU_ARCHS_STR}" | ||
| RESULT_VARIABLE QOLA_BUILD_RESULT | ||
| ) | ||
| if(NOT QOLA_BUILD_RESULT EQUAL 0) | ||
| message(FATAL_ERROR "[AITER-BUILD] QoLA build failed.") | ||
| endif() | ||
| endif() | ||
| if(NOT QOLA_BUILD_RESULT EQUAL 0) | ||
| message(FATAL_ERROR "[AITER-BUILD] QoLA build failed.") | ||
| endif() | ||
|
|
||
| if(__USE_CK_JIT) | ||
| set(__AITER_MHA_PATH ${AITER_MHA_INSTALL_DIR}) | ||
| set(__QOLA_INCLUDE_DIR "${__QOLA_BUILD_DIR}/include") | ||
| else() | ||
| # Copy the final .so libs and exported public headers into the aiter | ||
| # prebuilt cache so downstream consumers see a self-contained tree. | ||
| get_default_aiter_cache_dir(__QOLA_CACHE_DIR) | ||
| set(__QOLA_CACHE_LIB "${__QOLA_CACHE_DIR}/lib") | ||
| get_default_aiter_cache_dir(__AITER_CACHE_DIR) | ||
| set(__QOLA_CACHE_LIB "${__AITER_CACHE_DIR}/lib") | ||
| file(MAKE_DIRECTORY ${__QOLA_CACHE_LIB}) | ||
| file(GLOB __QOLA_BUILT_LIBS "${__QOLA_BUILD_DIR}/lib/*.so") | ||
| file(COPY ${__QOLA_BUILT_LIBS} DESTINATION ${__QOLA_CACHE_LIB}) | ||
| file(COPY "${__QOLA_BUILD_DIR}/include" DESTINATION "${__QOLA_CACHE_DIR}") | ||
| file(COPY "${__QOLA_BUILD_DIR}/include" DESTINATION "${__AITER_CACHE_DIR}") | ||
| set(__AITER_MHA_PATH "${__QOLA_CACHE_LIB}") | ||
| else() | ||
| message(STATUS "[AITER-BUILD] Using pre-built AITER from ${__AITER_MHA_PATH}") | ||
| set(__QOLA_INCLUDE_DIR "${__AITER_CACHE_DIR}/include") | ||
| endif() | ||
| else() | ||
| set(__AITER_MHA_PATH "${__AITER_CACHE_DIR}/lib") | ||
| set(__QOLA_INCLUDE_DIR "${__AITER_CACHE_DIR}/include") | ||
| endif() | ||
|
|
||
| set(ck_fused_attn_SOURCES) | ||
|
|
@@ -129,7 +170,6 @@ list(APPEND CK_FUSED_ATTN_COMPILE_OPTIONS | |
| # Public QoLA headers ship alongside the .so libs in ${__AITER_MHA_PATH}/../include | ||
| # (emitted by qola.cli build, or copied from the QoLA build dir above for the | ||
| # source-build path). | ||
| set(__QOLA_INCLUDE_DIR "${__AITER_MHA_PATH}/../include") | ||
| if(NOT EXISTS "${__QOLA_INCLUDE_DIR}/qola_config.h") | ||
| message(FATAL_ERROR "Could not find QoLA public headers at ${__QOLA_INCLUDE_DIR}.") | ||
| endif() | ||
|
|
@@ -146,5 +186,7 @@ target_link_libraries(ck_fused_attn PUBLIC ${ck_fused_attn_LINKER_LIBS}) | |
| target_compile_options(ck_fused_attn PRIVATE ${CK_FUSED_ATTN_COMPILE_OPTIONS}) | ||
| set_target_properties(ck_fused_attn PROPERTIES INSTALL_RPATH "$ORIGIN") | ||
|
|
||
| install(FILES ${__AITER_MHA_PATH}/te_libmha_fwd.so ${__AITER_MHA_PATH}/te_libmha_bwd.so DESTINATION ${CMAKE_INSTALL_PREFIX}/${AITER_MHA_INSTALL_PREFIX}/lib) | ||
| install(TARGETS ck_fused_attn DESTINATION ${CMAKE_INSTALL_PREFIX}/${AITER_MHA_INSTALL_PREFIX}/lib) | ||
| if (NOT "${__AITER_MHA_PATH}" STREQUAL "${AITER_MHA_INSTALL_DIR}") | ||
| install(FILES ${__AITER_MHA_PATH}/te_libmha_fwd.so ${__AITER_MHA_PATH}/te_libmha_bwd.so DESTINATION ${AITER_MHA_INSTALL_DIR}) | ||
| endif() | ||
| install(TARGETS ck_fused_attn DESTINATION ${AITER_MHA_INSTALL_DIR}) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This hunk silently drops the
NVTE_AOTRITON_PATH->-DAOTRITON_PATH=…andNVTE_CK_FUSED_ATTN_PATH->-DAITER_MHA_PATH=…translations.Downstream the CMake files now read
$ENV{AOTRITON_PATH}and$ENV{AITER_MHA_PATH}directly (seeaotriton/CMakeLists.txt:11andck_fused_attn/CMakeLists.txt:78), so users have to rename their env vars fromNVTE_AOTRITON_PATH/NVTE_CK_FUSED_ATTN_PATHto the bare names. Anyone with existing scripts setting theNVTE_*form will get a silent "ignored" failure — the build will quietly fall back to the default behavior instead of using their prebuilt path.PR is marked as breaking, but the description doesn't call this rename out. Two reasonable options:
NVTE_*aliases here as a thin shim (a couple of lines) so old usage still works; orThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this an intentional part of this change? Seems unrelated?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing of NVTE_AOTRITON_PATH is just a reflection of earlier changes in CMakeLists.txt to use ENV{AOTRITON_PATH}, so NVTE_AOTRITON_PATH is actually unused.
It is not directly related but just aligned to corresponding NVTE_CK_FUSED_ATTN_PATH. The removal of latter in this PR has few reasons:
It is, however, can be discussed separately, not as a part of this PR