From 2cbe46e7ef9599eb8c74eedc5f88923862c4bd35 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 22 Apr 2026 15:50:06 -0700
Subject: [PATCH 1/3] Allow overriding srt-slurm repo/ref at the launcher level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds optional SRT_SLURM_REPO / SRT_SLURM_REF env vars that the GB200
launcher reads to choose a non-default srt-slurm fork/branch. Defaults
are unchanged — production runs that don't set these still clone
NVIDIA/srt-slurm@sa-submission-q2-2026 (dynamo-vllm, dynamo-trt+kimik2.5)
or ishandhanani/srt-slurm@sa-submission-q1-2026 (everything else) as
before.

Also exports LM_EVAL_WORKSPACE=$GITHUB_WORKSPACE alongside the existing
INFMAX_WORKSPACE. The generalized lm-eval support in NVIDIA/srt-slurm#41
reads LM_EVAL_WORKSPACE; the legacy sa-submission branches read
INFMAX_WORKSPACE. Exporting both keeps a single launcher working with
either version.

Plumbs srt-slurm-repo and srt-slurm-ref as optional inputs through
e2e-tests.yml -> benchmark-multinode-tmpl.yml so workflow_dispatch runs
can target a specific srt-slurm fork/branch for validation without
touching production defaults.
---
 .../workflows/benchmark-multinode-tmpl.yml    | 15 ++++++++++
 .github/workflows/e2e-tests.yml               | 24 +++++++++++++++
 runners/launch_gb200-nv.sh                    | 30 ++++++++++++-------
 3 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
index 13ee99618..353fe98a4 100644
--- a/.github/workflows/benchmark-multinode-tmpl.yml
+++ b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -95,6 +95,16 @@ on:
         description: "Git ref (branch/sha) to checkout"
         required: false
         type: string
+      srt-slurm-repo:
+        description: "Override srt-slurm clone URL (leave empty to use launcher default)"
+        required: false
+        type: string
+        default: ""
+      srt-slurm-ref:
+        description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)"
+        required: false
+        type: string
+        default: ""
 
 env:
   RANDOM_RANGE_RATIO: 0.8
@@ -126,6 +136,11 @@ env:
   DECODE_EP: ${{ inputs.decode-ep }}
   DECODE_DP_ATTN: ${{ inputs.decode-dp-attn }}
 
+  # Optional override for which srt-slurm repo/ref the launcher clones.
+  # Leave empty to use the launcher's built-in defaults per framework.
+  SRT_SLURM_REPO: ${{ inputs.srt-slurm-repo }}
+  SRT_SLURM_REF: ${{ inputs.srt-slurm-ref }}
+
 permissions:
   contents: read
 
diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
index 19a60b9ea..78ad78724 100644
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -16,6 +16,16 @@ on:
                 description: "Ref (branch/sha) to checkout for generating configs"
                 required: false
                 type: string
+            srt-slurm-repo:
+                description: "Override srt-slurm clone URL (leave empty to use launcher default)"
+                required: false
+                type: string
+                default: ""
+            srt-slurm-ref:
+                description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)"
+                required: false
+                type: string
+                default: ""
     workflow_call:
         inputs:
             generate-cli-command:
@@ -30,6 +40,16 @@ on:
                 description: "Ref (branch/sha) to checkout for generating configs"
                 required: false
                 type: string
+            srt-slurm-repo:
+                description: "Override srt-slurm clone URL (leave empty to use launcher default)"
+                required: false
+                type: string
+                default: ""
+            srt-slurm-ref:
+                description: "Override srt-slurm git ref (branch/sha; leave empty to use launcher default)"
+                required: false
+                type: string
+                default: ""
 
 jobs:
     get-jobs:
@@ -102,6 +122,8 @@ jobs:
             decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
             run-eval: false
             ref: ${{ inputs.ref }}
+            srt-slurm-repo: ${{ inputs.srt-slurm-repo }}
+            srt-slurm-ref: ${{ inputs.srt-slurm-ref }}
 
     test-sweep-multi-node-evals:
         needs: get-jobs
@@ -143,6 +165,8 @@ jobs:
             eval-only: true
             eval-conc: ${{ matrix.config.eval-conc }}
             ref: ${{ inputs.ref }}
+            srt-slurm-repo: ${{ inputs.srt-slurm-repo }}
+            srt-slurm-ref: ${{ inputs.srt-slurm-ref }}
 
     test-sweep-single-node:
         needs: get-jobs
diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
index b746e4a24..b37b78d64 100755
--- a/runners/launch_gb200-nv.sh
+++ b/runners/launch_gb200-nv.sh
@@ -134,20 +134,27 @@ if [ -d "$SRT_REPO_DIR" ]; then
     rm -rf "$SRT_REPO_DIR"
 fi
 
+# Allow SRT_SLURM_REPO / SRT_SLURM_REF to override the default clone source
+# (useful for testing WIP branches like the generalized lm-eval-main).
 if [[ $FRAMEWORK == "dynamo-vllm" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q2-2026
+    DEFAULT_SRT_REPO="https://github.com/NVIDIA/srt-slurm.git"
+    DEFAULT_SRT_REF="sa-submission-q2-2026"
 elif [[ $FRAMEWORK == "dynamo-trt" && $MODEL_PREFIX == "kimik2.5" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q2-2026
+    DEFAULT_SRT_REPO="https://github.com/NVIDIA/srt-slurm.git"
+    DEFAULT_SRT_REF="sa-submission-q2-2026"
 else
-    git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q1-2026
+    DEFAULT_SRT_REPO="https://github.com/ishandhanani/srt-slurm.git"
+    DEFAULT_SRT_REF="sa-submission-q1-2026"
 fi
 
+SRT_SLURM_REPO="${SRT_SLURM_REPO:-$DEFAULT_SRT_REPO}"
+SRT_SLURM_REF="${SRT_SLURM_REF:-$DEFAULT_SRT_REF}"
+
+echo "Cloning ${SRT_SLURM_REPO} @ ${SRT_SLURM_REF}"
+git clone "$SRT_SLURM_REPO" "$SRT_REPO_DIR"
+cd "$SRT_REPO_DIR"
+git checkout "$SRT_SLURM_REF"
+
 echo "Installing srtctl..."
 curl -LsSf https://astral.sh/uv/install.sh | sh
 source $HOME/.local/bin/env
@@ -197,7 +204,10 @@ cat srtslurm.yaml
 echo "Running make setup..."
 make setup ARCH=aarch64
 
-# Export eval-related env vars for srt-slurm post-benchmark eval
+# Export eval-related env vars for srt-slurm post-benchmark eval.
+# LM_EVAL_WORKSPACE is what the generalized srt-slurm reads; INFMAX_WORKSPACE
+# is kept for compatibility with older srt-slurm branches (sa-submission-*).
+export LM_EVAL_WORKSPACE="$GITHUB_WORKSPACE"
 export INFMAX_WORKSPACE="$GITHUB_WORKSPACE"
 
 echo "Submitting job with srtctl..."

From 2bcdb2ac5e4ef03930d90ac581402d568be02d6c Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 22 Apr 2026 16:28:01 -0700
Subject: [PATCH 2/3] Add minimal srt-slurm validation config

One-entry sweep config used by the manual workflow_dispatch that
validates NVIDIA/srt-slurm#41 on GB200. Mirrors the cheapest entry
from dsr1-fp8-gb200-dynamo-trt (8k1k stp, eval-conc=63, 1P/3D) so
the end-to-end eval path is exercised without running the full
gb200 sweep.

Not referenced by any automated workflow; picked up only when passed
explicitly via --config-files.
---
 .github/configs/srt-slurm-validation.yaml | 31 +++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 .github/configs/srt-slurm-validation.yaml

diff --git a/.github/configs/srt-slurm-validation.yaml b/.github/configs/srt-slurm-validation.yaml
new file mode 100644
index 000000000..20d7de752
--- /dev/null
+++ b/.github/configs/srt-slurm-validation.yaml
@@ -0,0 +1,31 @@
+# Minimal config for validating NVIDIA/srt-slurm#41 end-to-end on GB200.
+# Referenced explicitly via --config-files on a one-off workflow_dispatch;
+# NOT picked up by normal sweeps (they run against nvidia-master.yaml).
+# Mirrors the cheapest entry from dsr1-fp8-gb200-dynamo-trt in nvidia-master.yaml.
+dsr1-fp8-gb200-dynamo-trt:
+  image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
+  model: deepseek-ai/DeepSeek-R1-0528
+  model-prefix: dsr1
+  runner: gb200
+  precision: fp8
+  framework: dynamo-trt
+  multinode: true
+  disagg: true
+  seq-len-configs:
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - conc-list: [63]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml"
+      decode:
+        num-worker: 3
+        tp: 8
+        ep: 8
+        dp-attn: false

From e117c076d62965b9c38b22431ddbf5439d3bde40 Mon Sep 17 00:00:00 2001
From: Oseltamivir <bryansg2013@gmail.com>
Date: Wed, 22 Apr 2026 19:13:44 -0700
Subject: [PATCH 3/3] Switch srt-slurm-validation to dsr1-fp4-trt

The fp8 variant mapped to /mnt/numa1/.../deepseek-r1-0528/, which is only
visible from SLURM compute nodes and not from the gb200 GH runners, so the
new preflight validation in NVIDIA/srt-slurm@lm-eval-main (not present in
sa-submission-q2-2026) rejected the submission on the runner side before
sbatch. The dsr1-fp4 path in the launcher maps to
/mnt/lustre01/models/deepseek-r1-0528-fp4-v2/, which is on the shared
lustre mount accessible from both the runner and compute nodes.

Recipe ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml is the smallest 8k1k gb200
trtllm fp4 recipe (1P+8D=9 nodes, single variant, nginx-sqsh container
alias matches the launcher's srtslurm.yaml).
---
 .github/configs/srt-slurm-validation.yaml | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/.github/configs/srt-slurm-validation.yaml b/.github/configs/srt-slurm-validation.yaml
index 20d7de752..6396b9758 100644
--- a/.github/configs/srt-slurm-validation.yaml
+++ b/.github/configs/srt-slurm-validation.yaml
@@ -1,13 +1,16 @@
 # Minimal config for validating NVIDIA/srt-slurm#41 end-to-end on GB200.
 # Referenced explicitly via --config-files on a one-off workflow_dispatch;
 # NOT picked up by normal sweeps (they run against nvidia-master.yaml).
-# Mirrors the cheapest entry from dsr1-fp8-gb200-dynamo-trt in nvidia-master.yaml.
-dsr1-fp8-gb200-dynamo-trt:
+# Uses dsr1-fp4-dynamo-trt, which the launcher maps to
+# /mnt/lustre01/models/deepseek-r1-0528-fp4-v2/ (present on both the GH
+# runner and compute nodes, so preflight passes). Smallest 8k1k trtllm
+# recipe at 1P+8D (no zip_override, nginx-sqsh alias matches launcher).
+dsr1-fp4-gb200-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
-  model: deepseek-ai/DeepSeek-R1-0528
+  model: nvidia/DeepSeek-R1-0528-NVFP4-v2
   model-prefix: dsr1
   runner: gb200
-  precision: fp8
+  precision: fp4
   framework: dynamo-trt
   multinode: true
   disagg: true
@@ -15,17 +18,17 @@ dsr1-fp8-gb200-dynamo-trt:
   - isl: 8192
     osl: 1024
     search-space:
-    - conc-list: [63]
+    - conc-list: [24]
       prefill:
         num-worker: 1
-        tp: 8
-        ep: 8
+        tp: 4
+        ep: 4
         dp-attn: true
         additional-settings:
-        # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml
-        - "CONFIG_FILE=recipes/trtllm/gb200-fp8/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0_63.yaml"
+        # https://github.com/NVIDIA/srt-slurm/blob/main/recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml
+        - "CONFIG_FILE=recipes/trtllm/gb200-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml"
       decode:
-        num-worker: 3
+        num-worker: 4
         tp: 8
         ep: 8
         dp-attn: false