From 7dc42ddaed3007585d56591a7a4cbd26950f2b6b Mon Sep 17 00:00:00 2001 From: stardriftfx Date: Mon, 16 Mar 2026 12:46:47 -0700 Subject: [PATCH 1/4] - updated nvcc compile flag to compile on local machine, if target not user-specified, documentation - wiki page on GPU Arch levels and their differences --- CMakeLists.txt | 23 ++++++++++++------ docs/Developer/GPUArchLevels.md | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 docs/Developer/GPUArchLevels.md diff --git a/CMakeLists.txt b/CMakeLists.txt index fb0f43972..739e1ac8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,12 +31,13 @@ endif() # to your desired architecture: \ # #cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 .. -# +# #"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable. -#If no TARGET_ARCH is passed in then it will default to 37 which is the kepler architecture +#If TARGET_ARCH is not user-specified then it will default to native. ############################################################################################ if(NOT DEFINED TARGET_ARCH) - set(TARGET_ARCH 37) +# If target not specified by user, use local machine's GPU architecture + set(TARGET_ARCH "native") endif() #CONDITIONAL FLAG to turn on the validation mode @@ -50,13 +51,21 @@ if(ENABLE_CUDA) message("\n----Generating Makefile for Graphitti GPU version----") project(Graphitti LANGUAGES CXX CUDA C) #Verify CUDA package is present - find_Package(CUDA REQUIRED) + find_package(CUDA REQUIRED) #Set the USE_GPU preprocessor macro so that GPU code will be compiled. add_compile_definitions(USE_GPU) -#Specify the CUDA architecture / gencode that will be targeted - ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware + +# Specify the CUDA architecture / gencode that will be targeted +# Set gencode and architecture variables to the correct values for your specific NVIDIA hardware set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH}) +# Sets real and visual architecture switch +# sm_${TARGET_ARCH} = Real architecture +# compute_${TARGET_ARCH} = Virtual architecture +# (embs PTX code e.g. Parallel Thread Execution. +# If on a newer GPU, then the CUDA driver compiles a working binary) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + -gencode=arch=compute_${TARGET_ARCH}, + code=[sm_${TARGET_ARCH}, compute_${TARGET_ARCH}]) message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") else() diff --git a/docs/Developer/GPUArchLevels.md b/docs/Developer/GPUArchLevels.md new file mode 100644 index 000000000..240df4d4e --- /dev/null +++ b/docs/Developer/GPUArchLevels.md @@ -0,0 +1,41 @@ +# GPU Architecture Levels +Originally, Graphitti used CUDA Compute Capability (CC 3.5). This later changed to use a flexible architecture model, such that Graphitti can now be compiled for modern hardware (such as Volta or Lovelace). This allows the simulator to leverage modern GPU features while maintaining backwards compatibility for older cards by using conditional compilation. + +# Supported Architectures + +| Architecture | Compute Capability | Project Build Compatibility | +| :--- | :--- | :--- | +| **Kepler** | 3.5 / 3.7 | **Baseline**: Minimum version for backwards compatibility. | +| **Volta** | 7.0 | **Target**: Primary architecture for high-performance server runs. | +| **Ampere** | 8.0 / 8.6 | **Development**: Common for modern local development. | +| **Ada Lovelace**| 8.9 | **Current**: Latest generation available in the lab. | + +## Compute Capability +* Binary Compatibility (cubin): Strictly backwards compatible. A binary for raiju (3.7) runs on ghidorah (8.9), but not vice versa. +- Backwards Compatibility: Any code compiled on a older architecture will work on newer ones. + +- Forwards Compatibility (PTX): Any code compiled for a specific architecture will require that or a newer one to run. + - Parallel Thread Execution (PTX): PTX is included in lab builds. + +# Specifying Target Architecture + +By default, if not user-specified, TARGET_ARCH to set to `"native"` +which auto-detects and utilizes local hardware. + +Legacy Support: -DTARGET_ARCH=35 + +Otachi Server: -DTARGET_ARCH=70 + +# Performance Notes +## Lab Servers Reference Table +Info retrieved from `nvidia-smi` (in terminal). + +| Lab Server | GPU Model | Architecture | Compute Capability | Recommended `TARGET_ARCH` | +| :--- | :--- | :--- | :--- | :--- | +| **raiju** | Tesla K80 | **Kepler** | 3.7 | `37` | +| **otachi** | Tesla V100-PCIE-16GB | **Volta** | 7.0 | `70` | +| **ghidorah** | RTX 4500 Ada Generation | **Ada Lovelace** | 8.9 | `89` | + +Note: Per project guidelines, conditional compilation adds structural complexity (code cruft). We only implement architecture-specific paths if they produce a measurable benefit. + +- Example: Run a 5-10 minute simulation and then measure performance `nvidia-smi` and compare it to baseline results. From ed0faa7aebe5dd9605f66f235a00faa4110652c0 Mon Sep 17 00:00:00 2001 From: Star Wong Date: Tue, 17 Mar 2026 15:05:52 -0700 Subject: [PATCH 2/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- docs/Developer/GPUArchLevels.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Developer/GPUArchLevels.md b/docs/Developer/GPUArchLevels.md index 240df4d4e..ddf952e8a 100644 --- a/docs/Developer/GPUArchLevels.md +++ b/docs/Developer/GPUArchLevels.md @@ -12,7 +12,7 @@ Originally, Graphitti used CUDA Compute Capability (CC 3.5). This later changed ## Compute Capability * Binary Compatibility (cubin): Strictly backwards compatible. A binary for raiju (3.7) runs on ghidorah (8.9), but not vice versa. -- Backwards Compatibility: Any code compiled on a older architecture will work on newer ones. +- Backwards Compatibility: Any code compiled on an older architecture will work on newer ones. - Forwards Compatibility (PTX): Any code compiled for a specific architecture will require that or a newer one to run. - Parallel Thread Execution (PTX): PTX is included in lab builds. From 0ff39e9a58f1ed39cafecd4154d3ca96fc364988 Mon Sep 17 00:00:00 2001 From: Star Wong Date: Tue, 17 Mar 2026 15:11:03 -0700 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- CMakeLists.txt | 11 +++++------ docs/Developer/GPUArchLevels.md | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 739e1ac8e..3363c8d04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,16 +57,15 @@ if(ENABLE_CUDA) # Specify the CUDA architecture / gencode that will be targeted # Set gencode and architecture variables to the correct values for your specific NVIDIA hardware - set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH}) -# Sets real and visual architecture switch +# Sets real and virtual architecture switch # sm_${TARGET_ARCH} = Real architecture # compute_${TARGET_ARCH} = Virtual architecture -# (embs PTX code e.g. Parallel Thread Execution. +# (embeds PTX code e.g. Parallel Thread Execution. # If on a newer GPU, then the CUDA driver compiles a working binary) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - -gencode=arch=compute_${TARGET_ARCH}, - code=[sm_${TARGET_ARCH}, compute_${TARGET_ARCH}]) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH},compute_${TARGET_ARCH}") message(STATUS "Using CUDA architecture: ${TARGET_ARCH}") + else() message("\n----Generating Makefile for Graphitti CPU version----") diff --git a/docs/Developer/GPUArchLevels.md b/docs/Developer/GPUArchLevels.md index ddf952e8a..abfbd5f7b 100644 --- a/docs/Developer/GPUArchLevels.md +++ b/docs/Developer/GPUArchLevels.md @@ -19,7 +19,7 @@ Originally, Graphitti used CUDA Compute Capability (CC 3.5). This later changed # Specifying Target Architecture -By default, if not user-specified, TARGET_ARCH to set to `"native"` +By default, if not user-specified, `TARGET_ARCH` is set to `"native"` which auto-detects and utilizes local hardware. Legacy Support: -DTARGET_ARCH=35 From 9747fce8756e8f86f3aaa45a0cf965245da61feb Mon Sep 17 00:00:00 2001 From: stardriftfx Date: Wed, 18 Mar 2026 13:18:10 -0700 Subject: [PATCH 4/4] cuda enabled (not sure if this didnt get sent) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 739e1ac8e..81ba91080 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.12) #CONDITIONAL FLAG(to run simulation on CPU or GPU) # #For GPU: -#set(ENABLE_CUDA YES) +set(ENABLE_CUDA YES) # #For CPU: #set(ENABLE_CUDA NO)