From 2cbe46e7ef9599eb8c74eedc5f88923862c4bd35 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 22 Apr 2026 15:50:06 -0700 Subject: [PATCH 1/3] Allow overriding srt-slurm repo/ref at the launcher level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds optional SRT_SLURM_REPO / SRT_SLURM_REF env vars that the GB200 launcher reads to choose a non-default srt-slurm fork/branch. Defaults are unchanged — production runs that don't set these still clone NVIDIA/srt-slurm@sa-submission-q2-2026 (dynamo-vllm, dynamo-trt+kimik2.5) or ishandhanani/srt-slurm@sa-submission-q1-2026 (everything else) as before. Also exports LM_EVAL_WORKSPACE=$GITHUB_WORKSPACE alongside the existing INFMAX_WORKSPACE. The generalized lm-eval support in NVIDIA/srt-slurm#41 reads LM_EVAL_WORKSPACE; the legacy sa-submission branches read INFMAX_WORKSPACE. Exporting both keeps a single launcher working with either version. Plumbs srt-slurm-repo and srt-slurm-ref as optional inputs through e2e-tests.yml -> benchmark-multinode-tmpl.yml so workflow_dispatch runs can target a specific srt-slurm fork/branch for validation without touching production defaults. --- .../workflows/benchmark-multinode-tmpl.yml | 15 ++++++++++ .github/workflows/e2e-tests.yml | 24 +++++++++++++++ runners/launch_gb200-nv.sh | 30 ++++++++++++------- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml index 13ee99618..353fe98a4 100644 --- a/.github/workflows/benchmark-multinode-tmpl.yml +++ b/.github/workflows/benchmark-multinode-tmpl.yml @@ -95,6 +95,16 @@ on: description: "Git ref (branch/sha) to checkout" required: false type: string + srt-slurm-repo: + description: "Override srt-slurm clone URL (leave empty to use launcher default)" + required: false + type: string + default: "" + srt-slurm-ref: + description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)" + required: false + type: string + default: "" env: RANDOM_RANGE_RATIO: 0.8 @@ -126,6 +136,11 @@ env: DECODE_EP: ${{ inputs.decode-ep }} DECODE_DP_ATTN: ${{ inputs.decode-dp-attn }} + # Optional override for which srt-slurm repo/ref the launcher clones. + # Leave empty to use the launcher's built-in defaults per framework. + SRT_SLURM_REPO: ${{ inputs.srt-slurm-repo }} + SRT_SLURM_REF: ${{ inputs.srt-slurm-ref }} + permissions: contents: read diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 19a60b9ea..78ad78724 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -16,6 +16,16 @@ on: description: "Ref (branch/sha) to checkout for generating configs" required: false type: string + srt-slurm-repo: + description: "Override srt-slurm clone URL (leave empty to use launcher default)" + required: false + type: string + default: "" + srt-slurm-ref: + description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)" + required: false + type: string + default: "" workflow_call: inputs: generate-cli-command: @@ -30,6 +40,16 @@ on: description: "Ref (branch/sha) to checkout for generating configs" required: false type: string + srt-slurm-repo: + description: "Override srt-slurm clone URL (leave empty to use launcher default)" + required: false + type: string + default: "" + srt-slurm-ref: + description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)" + required: false + type: string + default: "" jobs: get-jobs: @@ -102,6 +122,8 @@ jobs: decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} run-eval: false ref: ${{ inputs.ref }} + srt-slurm-repo: ${{ inputs.srt-slurm-repo }} + srt-slurm-ref: ${{ inputs.srt-slurm-ref }} test-sweep-multi-node-evals: needs: get-jobs @@ -143,6 +165,8 @@ jobs: eval-only: true eval-conc: ${{ matrix.config.eval-conc }} ref: ${{ inputs.ref }} + srt-slurm-repo: ${{ inputs.srt-slurm-repo }} + srt-slurm-ref: ${{ inputs.srt-slurm-ref }} test-sweep-single-node: needs: get-jobs diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh index b746e4a24..b37b78d64 100755 --- a/runners/launch_gb200-nv.sh +++ b/runners/launch_gb200-nv.sh @@ -134,20 +134,27 @@ if [ -d "$SRT_REPO_DIR" ]; then rm -rf "$SRT_REPO_DIR" fi +# Allow SRT_SLURM_REPO / SRT_SLURM_REF to override the default clone source +# (useful for testing WIP branches like the generalized lm-eval-main). if [[ $FRAMEWORK == "dynamo-vllm" ]]; then - git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" - cd "$SRT_REPO_DIR" - git checkout sa-submission-q2-2026 + DEFAULT_SRT_REPO="https://github.com/NVIDIA/srt-slurm.git" + DEFAULT_SRT_REF="sa-submission-q2-2026" elif [[ $FRAMEWORK == "dynamo-trt" && $MODEL_PREFIX == "kimik2.5" ]]; then - git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" - cd "$SRT_REPO_DIR" - git checkout sa-submission-q2-2026 + DEFAULT_SRT_REPO="https://github.com/NVIDIA/srt-slurm.git" + DEFAULT_SRT_REF="sa-submission-q2-2026" else - git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" - cd "$SRT_REPO_DIR" - git checkout sa-submission-q1-2026 + DEFAULT_SRT_REPO="https://github.com/ishandhanani/srt-slurm.git" + DEFAULT_SRT_REF="sa-submission-q1-2026" fi +SRT_SLURM_REPO="${SRT_SLURM_REPO:-$DEFAULT_SRT_REPO}" +SRT_SLURM_REF="${SRT_SLURM_REF:-$DEFAULT_SRT_REF}" + +echo "Cloning ${SRT_SLURM_REPO} @ ${SRT_SLURM_REF}" +git clone "$SRT_SLURM_REPO" "$SRT_REPO_DIR" +cd "$SRT_REPO_DIR" +git checkout "$SRT_SLURM_REF" + echo "Installing srtctl..." curl -LsSf https://astral.sh/uv/install.sh | sh source $HOME/.local/bin/env @@ -197,7 +204,10 @@ cat srtslurm.yaml echo "Running make setup..." make setup ARCH=aarch64 -# Export eval-related env vars for srt-slurm post-benchmark eval +# Export eval-related env vars for srt-slurm post-benchmark eval. +# LM_EVAL_WORKSPACE is what the generalized srt-slurm reads; INFMAX_WORKSPACE +# is kept for compatibility with older srt-slurm branches (sa-submission-*). +export LM_EVAL_WORKSPACE="$GITHUB_WORKSPACE" export INFMAX_WORKSPACE="$GITHUB_WORKSPACE" echo "Submitting job with srtctl..." From 2bcdb2ac5e4ef03930d90ac581402d568be02d6c Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 22 Apr 2026 16:28:01 -0700 Subject: [PATCH 2/3] Add minimal srt-slurm validation config One-entry sweep config used by the manual workflow_dispatch that validates NVIDIA/srt-slurm#41 on GB200. Mirrors the cheapest entry from dsr1-fp8-gb200-dynamo-trt (8k1k stp, eval-conc=63, 1P/3D) so the end-to-end eval path is exercised without running the full gb200 sweep. Not referenced by any automated workflow; picked up only when passed explicitly via --config-files. --- .github/configs/srt-slurm-validation.yaml | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/configs/srt-slurm-validation.yaml diff --git a/.github/configs/srt-slurm-validation.yaml b/.github/configs/srt-slurm-validation.yaml new file mode 100644 index 000000000..20d7de752 --- /dev/null +++ b/.github/configs/srt-slurm-validation.yaml @@ -0,0 +1,31 @@ +# Minimal config for validating NVIDIA/srt-slurm#41 end-to-end on GB200. +# Referenced explicitly via --config-files on a one-off workflow_dispatch; +# NOT picked up by normal sweeps (they run against nvidia-master.yaml). +# Mirrors the cheapest entry from dsr1-fp8-gb200-dynamo-trt in nvidia-master.yaml. +dsr1-fp8-gb200-dynamo-trt: + image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 + model: deepseek-ai/DeepSeek-R1-0528 + model-prefix: dsr1 + runner: gb200 + precision: fp8 + framework: dynamo-trt + multinode: true + disagg: true + seq-len-configs: + - isl: 8192 + osl: 1024 + search-space: + - conc-list: [63] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false From e117c076d62965b9c38b22431ddbf5439d3bde40 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Wed, 22 Apr 2026 19:13:44 -0700 Subject: [PATCH 3/3] Switch srt-slurm-validation to dsr1-fp4-trt The fp8 variant mapped to /mnt/numa1/.../deepseek-r1-0528/, which is only visible from SLURM compute nodes and not from the gb200 GH runners, so the new preflight validation in NVIDIA/srt-slurm@lm-eval-main (not present in sa-submission-q2-2026) rejected the submission on the runner side before sbatch. The dsr1-fp4 path in the launcher maps to /mnt/lustre01/models/deepseek-r1-0528-fp4-v2/, which is on the shared lustre mount accessible from both the runner and compute nodes. Recipe ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml is the smallest 8k1k gb200 trtllm fp4 recipe (1P+8D=9 nodes, single variant, nginx-sqsh container alias matches the launcher's srtslurm.yaml). --- .github/configs/srt-slurm-validation.yaml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/configs/srt-slurm-validation.yaml b/.github/configs/srt-slurm-validation.yaml index 20d7de752..6396b9758 100644 --- a/.github/configs/srt-slurm-validation.yaml +++ b/.github/configs/srt-slurm-validation.yaml @@ -1,13 +1,16 @@ # Minimal config for validating NVIDIA/srt-slurm#41 end-to-end on GB200. # Referenced explicitly via --config-files on a one-off workflow_dispatch; # NOT picked up by normal sweeps (they run against nvidia-master.yaml). -# Mirrors the cheapest entry from dsr1-fp8-gb200-dynamo-trt in nvidia-master.yaml. -dsr1-fp8-gb200-dynamo-trt: +# Uses dsr1-fp4-dynamo-trt, which the launcher maps to +# /mnt/lustre01/models/deepseek-r1-0528-fp4-v2/ (present on both the GH +# runner and compute nodes, so preflight passes). Smallest 8k1k trtllm +# recipe at 1P+8D (no zip_override, nginx-sqsh alias matches launcher). +dsr1-fp4-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 - model: deepseek-ai/DeepSeek-R1-0528 + model: nvidia/DeepSeek-R1-0528-NVFP4-v2 model-prefix: dsr1 runner: gb200 - precision: fp8 + precision: fp4 framework: dynamo-trt multinode: true disagg: true @@ -15,17 +18,17 @@ dsr1-fp8-gb200-dynamo-trt: - isl: 8192 osl: 1024 search-space: - - conc-list: [63] + - conc-list: [24] prefill: num-worker: 1 - tp: 8 - ep: 8 + tp: 4 + ep: 4 dp-attn: true additional-settings: - # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml - - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml" + # https://github.com/NVIDIA/srt-slurm/blob/main/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" decode: - num-worker: 3 + num-worker: 4 tp: 8 ep: 8 dp-attn: false