diff --git a/examples/profiling-configs/rocprofv3_multi_gpu.json b/examples/profiling-configs/rocprofv3_multi_gpu.json
index f8e16f89..2b2b250d 100644
--- a/examples/profiling-configs/rocprofv3_multi_gpu.json
+++ b/examples/profiling-configs/rocprofv3_multi_gpu.json
@@ -12,7 +12,7 @@
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": false
   },
   
diff --git a/examples/profiling-configs/rocprofv3_multi_node.json b/examples/profiling-configs/rocprofv3_multi_node.json
index 935f969a..ad87c814 100644
--- a/examples/profiling-configs/rocprofv3_multi_node.json
+++ b/examples/profiling-configs/rocprofv3_multi_node.json
@@ -12,7 +12,7 @@
     "nodes": 2,
     "gpus_per_node": 4,
     "time": "04:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/README.md b/examples/slurm-configs/README.md
index 6bc69264..8b3afe9d 100644
--- a/examples/slurm-configs/README.md
+++ b/examples/slurm-configs/README.md
@@ -379,7 +379,7 @@ madengine uses intelligent multi-layer configuration merging:
     "nodelist": "node01,node02",     // Optional: restrict job to these nodes (skips node health preflight)
     "gpus_per_node": 8,             // GPUs per node
     "time": "24:00:00",             // Wall time (HH:MM:SS)
-    "output_dir": "./slurm_output", // Local output directory
+    "output_dir": "./slurm_results", // Local output directory
     "results_dir": "/shared/results", // Shared results collection
     "shared_workspace": "/shared/workspace", // Shared workspace (NFS/Lustre)
     "exclusive": true,               // Exclusive node access
@@ -584,10 +584,10 @@ squeue -u $USER
 scontrol show job <job_id>
 
 # View output logs (real-time)
-tail -f slurm_output/madengine-*_<job_id>_*.out
+tail -f slurm_results/madengine-*_<job_id>_*.out
 
 # View error logs
-tail -f slurm_output/madengine-*_<job_id>_*.err
+tail -f slurm_results/madengine-*_<job_id>_*.err
 
 # Cancel job if needed
 scancel <job_id>
@@ -600,7 +600,7 @@ scancel <job_id>
 - Check SLURM partition exists: `sinfo`
 - Verify GPU resources available: `sinfo -o "%P %.5a %.10l %.6D %.6t %N %G"`
 - Check SLURM account/QoS settings
-- Review job script: `slurm_output/madengine_*.sh`
+- Review job script: `slurm_results/madengine_*.sh`
 
 ### Out of Memory Errors
 
@@ -715,8 +715,8 @@ MODEL_DIR=models/my-model madengine run \
 watch squeue -u $USER
 
 # 6. Check logs when complete
-ls -lh slurm_output/
-tail -f slurm_output/madengine-*_<job_id>_*.out
+ls -lh slurm_results/
+tail -f slurm_results/madengine-*_<job_id>_*.out
 ```
 
 ### vLLM Inference Workflow
@@ -739,7 +739,7 @@ MODEL_DIR=models/llama2-70b madengine run \
   --manifest-file build_manifest.json
 
 # 5. Monitor for OOM errors
-tail -f slurm_output/madengine-*_<job_id>_*.err | grep -i "memory"
+tail -f slurm_results/madengine-*_<job_id>_*.err | grep -i "memory"
 
 # 6. If OOM occurs, adjust config and rebuild
 # Edit your config file to set VLLM_KV_CACHE_SIZE to 0.6 or 0.7
diff --git a/examples/slurm-configs/basic/01-single-node-single-gpu.json b/examples/slurm-configs/basic/01-single-node-single-gpu.json
index 52324c1a..c0877717 100644
--- a/examples/slurm-configs/basic/01-single-node-single-gpu.json
+++ b/examples/slurm-configs/basic/01-single-node-single-gpu.json
@@ -12,7 +12,7 @@
     "nodes": 1,
     "gpus_per_node": 1,
     "time": "01:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": false
   },
   
diff --git a/examples/slurm-configs/basic/02-single-node-multi-gpu.json b/examples/slurm-configs/basic/02-single-node-multi-gpu.json
index f72fd0e2..a0e5b6ae 100644
--- a/examples/slurm-configs/basic/02-single-node-multi-gpu.json
+++ b/examples/slurm-configs/basic/02-single-node-multi-gpu.json
@@ -12,7 +12,7 @@
     "nodes": 1,
     "gpus_per_node": 8,
     "time": "12:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/03-multi-node-basic-nodelist.json b/examples/slurm-configs/basic/03-multi-node-basic-nodelist.json
index b8a7ea0a..8a0323ad 100644
--- a/examples/slurm-configs/basic/03-multi-node-basic-nodelist.json
+++ b/examples/slurm-configs/basic/03-multi-node-basic-nodelist.json
@@ -10,7 +10,7 @@
     "nodelist": "node01,node02",
     "gpus_per_node": 8,
     "time": "24:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true,
     "network_interface": "eth0"
   },
diff --git a/examples/slurm-configs/basic/03-multi-node-basic.json b/examples/slurm-configs/basic/03-multi-node-basic.json
index 9c1f2de0..006890a7 100644
--- a/examples/slurm-configs/basic/03-multi-node-basic.json
+++ b/examples/slurm-configs/basic/03-multi-node-basic.json
@@ -12,7 +12,7 @@
     "nodes": 2,
     "gpus_per_node": 8,
     "time": "24:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true,
     "network_interface": "eth0"
   },
diff --git a/examples/slurm-configs/basic/04-multi-node-advanced.json b/examples/slurm-configs/basic/04-multi-node-advanced.json
index bf30fb0b..1708f078 100644
--- a/examples/slurm-configs/basic/04-multi-node-advanced.json
+++ b/examples/slurm-configs/basic/04-multi-node-advanced.json
@@ -12,7 +12,7 @@
     "nodes": 4,
     "gpus_per_node": 8,
     "time": "48:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "results_dir": "/shared/results",
     "shared_workspace": "/shared/workspace",
     "exclusive": true,
diff --git a/examples/slurm-configs/basic/05-vllm-single-node.json b/examples/slurm-configs/basic/05-vllm-single-node.json
index a632bc0d..7d77c4df 100644
--- a/examples/slurm-configs/basic/05-vllm-single-node.json
+++ b/examples/slurm-configs/basic/05-vllm-single-node.json
@@ -12,7 +12,7 @@
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/06-vllm-multi-node.json b/examples/slurm-configs/basic/06-vllm-multi-node.json
index b76d34b9..d51262db 100644
--- a/examples/slurm-configs/basic/06-vllm-multi-node.json
+++ b/examples/slurm-configs/basic/06-vllm-multi-node.json
@@ -19,7 +19,7 @@
     "nodes": 2,
     "gpus_per_node": 4,
     "time": "00:45:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true,
     
     "_comment_node_check": "Preflight GPU health check (helps avoid OOM from stale processes)",
diff --git a/examples/slurm-configs/basic/07-sglang-single-node.json b/examples/slurm-configs/basic/07-sglang-single-node.json
index ad82947a..8aaae928 100644
--- a/examples/slurm-configs/basic/07-sglang-single-node.json
+++ b/examples/slurm-configs/basic/07-sglang-single-node.json
@@ -12,7 +12,7 @@
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/08-sglang-multi-node.json b/examples/slurm-configs/basic/08-sglang-multi-node.json
index 7519b513..8485b93c 100644
--- a/examples/slurm-configs/basic/08-sglang-multi-node.json
+++ b/examples/slurm-configs/basic/08-sglang-multi-node.json
@@ -12,7 +12,7 @@
     "nodes": 2,
     "gpus_per_node": 4,
     "time": "04:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/cluster-amd-rccl.json b/examples/slurm-configs/basic/cluster-amd-rccl.json
index e15c0a6f..e70f8721 100644
--- a/examples/slurm-configs/basic/cluster-amd-rccl.json
+++ b/examples/slurm-configs/basic/cluster-amd-rccl.json
@@ -19,7 +19,7 @@
     "nodes": 1,
     "gpus_per_node": 8,
     "time": "12:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/sglang-disagg-custom-split.json b/examples/slurm-configs/basic/sglang-disagg-custom-split.json
index 291a5938..f38bcf64 100644
--- a/examples/slurm-configs/basic/sglang-disagg-custom-split.json
+++ b/examples/slurm-configs/basic/sglang-disagg-custom-split.json
@@ -20,7 +20,7 @@
     "nodes": 7,
     "gpus_per_node": 8,
     "time": "04:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/examples/slurm-configs/basic/sglang-disagg-multi-node.json b/examples/slurm-configs/basic/sglang-disagg-multi-node.json
index 0c5ec00d..7dfbae19 100644
--- a/examples/slurm-configs/basic/sglang-disagg-multi-node.json
+++ b/examples/slurm-configs/basic/sglang-disagg-multi-node.json
@@ -19,7 +19,7 @@
     "nodes": 5,
     "gpus_per_node": 8,
     "time": "04:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true
   },
   
diff --git a/src/madengine/cli/utils.py b/src/madengine/cli/utils.py
index a63d3658..214b966c 100644
--- a/src/madengine/cli/utils.py
+++ b/src/madengine/cli/utils.py
@@ -352,7 +352,6 @@ def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row
             console.print("[yellow]⚠️  Performance CSV is empty[/yellow]")
             return
         
-        # Get session_start_row to mark current runs (don't filter, just mark)
         total_rows = len(df)
         
         # Try parameter first, then fall back to marker file
@@ -378,7 +377,7 @@ def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row
         perf_table.add_column("Index", justify="right", style="dim")
         perf_table.add_column("Model", style="cyan")
         perf_table.add_column("Topology", justify="center", style="blue")
-        perf_table.add_column("Launcher", justify="center", style="magenta")  # Distributed launcher
+        perf_table.add_column("Launcher", justify="center", style="magenta")
         perf_table.add_column("Deployment", justify="center", style="cyan")
         perf_table.add_column("GPU Arch", style="yellow")
         perf_table.add_column("Performance", justify="right", style="green")
@@ -388,12 +387,15 @@ def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row
         perf_table.add_column("Data Name", style="magenta")
         perf_table.add_column("Data Provider", style="magenta")        
         
-        # Helper function to format duration
+        # Helper function to format duration (accepts float seconds or "Xs" string)
         def format_duration(duration):
-            if pd.isna(duration) or duration == "":
+            if pd.isna(duration) or duration == "" or duration is None:
                 return "N/A"
             try:
-                dur = float(duration)
+                if isinstance(duration, str) and duration.strip().endswith("s"):
+                    dur = float(duration.strip()[:-1])
+                else:
+                    dur = float(duration)
                 if dur < 1:
                     return f"{dur*1000:.0f}ms"
                 elif dur < 60:
@@ -401,7 +403,7 @@ def format_duration(duration):
                 else:
                     return f"{dur/60:.1f}m"
             except (ValueError, TypeError):
-                return "N/A"
+                return str(duration) if duration else "N/A"
         
         # Helper function to format performance
         def format_performance(perf):
diff --git a/src/madengine/core/constants.py b/src/madengine/core/constants.py
index c98980a8..8ca10483 100644
--- a/src/madengine/core/constants.py
+++ b/src/madengine/core/constants.py
@@ -94,42 +94,66 @@ def _load_credentials():
 
 CREDS = _load_credentials()
 
+# Default value used for NAS_NODES when neither env nor creds provide it.
+_DEFAULT_NAS_NODES = [
+    {
+        "NAME": "DEFAULT",
+        "HOST": "localhost",
+        "PORT": 22,
+        "USERNAME": "username",
+        "PASSWORD": "password",
+    }
+]
+
+# Default value used for MAD_AWS_S3 when neither env nor creds provide it.
+_DEFAULT_MAD_AWS_S3 = {"USERNAME": None, "PASSWORD": None}
+
+# Default value used for MAD_MINIO when neither env nor creds provide it.
+_DEFAULT_MAD_MINIO = {
+    "USERNAME": None,
+    "PASSWORD": None,
+    "MINIO_ENDPOINT": "http://localhost:9000",
+    "AWS_ENDPOINT_URL_S3": "http://localhost:9000",
+}
+
+# Default value used for PUBLIC_GITHUB_ROCM_KEY when neither env nor creds provide it.
+_DEFAULT_PUBLIC_GITHUB_ROCM_KEY = {"username": None, "token": None}
+
+
+def _get_env_or_creds_or_default(env_key: str, creds_key: str, default):
+    """Load config from env (JSON), creds file, or default.
+
+    Priority: 1) environment variable (parsed as JSON), 2) credential file, 3) default.
+    Invalid JSON in env falls back to default with logging.
+
+    Args:
+        env_key: Environment variable name (e.g. "NAS_NODES").
+        creds_key: Key in CREDS dict (e.g. "NAS_NODES").
+        default: Default value if env unset and creds missing or env JSON invalid.
+
+    Returns:
+        Loaded value (same type as default, or from env/creds).
+    """
+    if env_key not in os.environ:
+        _log_config_info(f"{env_key} environment variable is not set.")
+        if creds_key in CREDS:
+            _log_config_info(f"{creds_key} loaded from credentials file.")
+            return CREDS[creds_key]
+        _log_config_info(f"{creds_key} is using default values.")
+        return default
+    _log_config_info(f"{env_key} is loaded from env variables.")
+    try:
+        return json.loads(os.environ[env_key])
+    except json.JSONDecodeError as e:
+        _log_config_info(
+            f"Error parsing {env_key} environment variable: {e}, using defaults"
+        )
+        return default
+
 
 def _get_nas_nodes():
     """Initialize NAS_NODES configuration."""
-    if "NAS_NODES" not in os.environ:
-        _log_config_info("NAS_NODES environment variable is not set.")
-        if "NAS_NODES" in CREDS:
-            _log_config_info("NAS_NODES loaded from credentials file.")
-            return CREDS["NAS_NODES"]
-        else:
-            _log_config_info("NAS_NODES is using default values.")
-            return [
-                {
-                    "NAME": "DEFAULT",
-                    "HOST": "localhost",
-                    "PORT": 22,
-                    "USERNAME": "username",
-                    "PASSWORD": "password",
-                }
-            ]
-    else:
-        _log_config_info("NAS_NODES is loaded from env variables.")
-        try:
-            return json.loads(os.environ["NAS_NODES"])
-        except json.JSONDecodeError as e:
-            _log_config_info(
-                f"Error parsing NAS_NODES environment variable: {e}, using defaults"
-            )
-            return [
-                {
-                    "NAME": "DEFAULT",
-                    "HOST": "localhost",
-                    "PORT": 22,
-                    "USERNAME": "username",
-                    "PASSWORD": "password",
-                }
-            ]
+    return _get_env_or_creds_or_default("NAS_NODES", "NAS_NODES", _DEFAULT_NAS_NODES)
 
 
 NAS_NODES = _get_nas_nodes()
@@ -137,94 +161,36 @@ def _get_nas_nodes():
 
 def _get_mad_aws_s3():
     """Initialize MAD_AWS_S3 configuration."""
-    if "MAD_AWS_S3" not in os.environ:
-        _log_config_info("MAD_AWS_S3 environment variable is not set.")
-        if "MAD_AWS_S3" in CREDS:
-            _log_config_info("MAD_AWS_S3 loaded from credentials file.")
-            return CREDS["MAD_AWS_S3"]
-        else:
-            _log_config_info("MAD_AWS_S3 is using default values.")
-            return {
-                "USERNAME": None,
-                "PASSWORD": None,
-            }
-    else:
-        _log_config_info("MAD_AWS_S3 is loaded from env variables.")
-        try:
-            return json.loads(os.environ["MAD_AWS_S3"])
-        except json.JSONDecodeError as e:
-            _log_config_info(
-                f"Error parsing MAD_AWS_S3 environment variable: {e}, using defaults"
-            )
-            return {
-                "USERNAME": None,
-                "PASSWORD": None,
-            }
+    return _get_env_or_creds_or_default("MAD_AWS_S3", "MAD_AWS_S3", _DEFAULT_MAD_AWS_S3)
 
 
 MAD_AWS_S3 = _get_mad_aws_s3()
 
 
-# Check the MAD_MINIO environment variable which is a dict.
 def _get_mad_minio():
-    """Initialize MAD_MINIO configuration."""
-    if "MAD_MINIO" not in os.environ:
-        _log_config_info("MAD_MINIO environment variable is not set.")
-        if "MAD_MINIO" in CREDS:
-            _log_config_info("MAD_MINIO loaded from credentials file.")
-            return CREDS["MAD_MINIO"]
-        else:
-            _log_config_info("MAD_MINIO is using default values.")
-            return {
-                "USERNAME": None,
-                "PASSWORD": None,
-                "MINIO_ENDPOINT": "http://localhost:9000",
-                "AWS_ENDPOINT_URL_S3": "http://localhost:9000",
-            }
-    else:
-        _log_config_info("MAD_MINIO is loaded from env variables.")
-        try:
-            return json.loads(os.environ["MAD_MINIO"])
-        except json.JSONDecodeError as e:
-            _log_config_info(
-                f"Error parsing MAD_MINIO environment variable: {e}, using defaults"
-            )
-            return {
-                "USERNAME": None,
-                "PASSWORD": None,
-                "MINIO_ENDPOINT": "http://localhost:9000",
-                "AWS_ENDPOINT_URL_S3": "http://localhost:9000",
-            }
+    """Initialize MAD_MINIO configuration (dict with USERNAME, PASSWORD, MINIO_ENDPOINT, etc.)."""
+    return _get_env_or_creds_or_default("MAD_MINIO", "MAD_MINIO", _DEFAULT_MAD_MINIO)
 
 
 MAD_MINIO = _get_mad_minio()
 
 
 def _get_public_github_rocm_key():
-    """Initialize PUBLIC_GITHUB_ROCM_KEY configuration."""
-    if "PUBLIC_GITHUB_ROCM_KEY" not in os.environ:
-        _log_config_info("PUBLIC_GITHUB_ROCM_KEY environment variable is not set.")
-        if "PUBLIC_GITHUB_ROCM_KEY" in CREDS:
-            _log_config_info("PUBLIC_GITHUB_ROCM_KEY loaded from credentials file.")
-            return CREDS["PUBLIC_GITHUB_ROCM_KEY"]
-        else:
-            _log_config_info("PUBLIC_GITHUB_ROCM_KEY is using default values.")
-            return {
-                "username": None,
-                "token": None,
-            }
-    else:
-        _log_config_info("PUBLIC_GITHUB_ROCM_KEY is loaded from env variables.")
-        try:
-            return json.loads(os.environ["PUBLIC_GITHUB_ROCM_KEY"])
-        except json.JSONDecodeError as e:
-            _log_config_info(
-                f"Error parsing PUBLIC_GITHUB_ROCM_KEY environment variable: {e}, using defaults"
-            )
-            return {
-                "username": None,
-                "token": None,
-            }
+    """Initialize PUBLIC_GITHUB_ROCM_KEY configuration.
+
+    Returned dict always has keys 'username' and 'token' (public API).
+    Credential files may use 'password' for the token; that is normalized to 'token'.
+    """
+    raw = _get_env_or_creds_or_default(
+        "PUBLIC_GITHUB_ROCM_KEY",
+        "PUBLIC_GITHUB_ROCM_KEY",
+        _DEFAULT_PUBLIC_GITHUB_ROCM_KEY,
+    )
+    if not isinstance(raw, dict):
+        return dict(_DEFAULT_PUBLIC_GITHUB_ROCM_KEY)
+    # Normalize so public API always has username + token (accept creds that use "password")
+    token = raw.get("token") or raw.get("password")
+    return {"username": raw.get("username"), "token": token}
 
 
 PUBLIC_GITHUB_ROCM_KEY = _get_public_github_rocm_key()
diff --git a/src/madengine/core/context.py b/src/madengine/core/context.py
index e1d93b61..836ab81e 100644
--- a/src/madengine/core/context.py
+++ b/src/madengine/core/context.py
@@ -17,6 +17,7 @@
 import collections.abc
 import os
 import re
+import sys
 import typing
 
 # third-party modules
@@ -397,8 +398,11 @@ def get_gpu_vendor(self) -> str:
         for amd_smi_path in amd_smi_paths:
             if os.path.exists(amd_smi_path):
                 try:
+                    # Debug: log to stderr so SLURM node .err captures where we are if killed
+                    print(f"[DEBUG] get_gpu_vendor: trying amd-smi at {amd_smi_path}", file=sys.stderr, flush=True)
                     # Verify amd-smi actually works (180s timeout for slow GPU initialization)
                     result = self.console.sh(f"{amd_smi_path} list > /dev/null 2>&1 && echo 'AMD' || echo ''", timeout=180)
+                    print(f"[DEBUG] get_gpu_vendor: amd-smi returned", file=sys.stderr, flush=True)
                     if result and result.strip() == "AMD":
                         return "AMD"
                 except Exception as e:
@@ -408,7 +412,9 @@ def get_gpu_vendor(self) -> str:
         rocm_smi_path = os.path.join(self._rocm_path, "bin", "rocm-smi")
         if os.path.exists(rocm_smi_path):
             try:
+                print(f"[DEBUG] get_gpu_vendor: trying rocm-smi at {rocm_smi_path}", file=sys.stderr, flush=True)
                 result = self.console.sh(f"{rocm_smi_path} --showid > /dev/null 2>&1 && echo 'AMD' || echo ''", timeout=180)
+                print(f"[DEBUG] get_gpu_vendor: rocm-smi returned", file=sys.stderr, flush=True)
                 if result and result.strip() == "AMD":
                     return "AMD"
             except Exception as e:
diff --git a/src/madengine/deployment/base.py b/src/madengine/deployment/base.py
index 33a338a9..52bbe02f 100644
--- a/src/madengine/deployment/base.py
+++ b/src/madengine/deployment/base.py
@@ -14,11 +14,27 @@
 from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
+from jinja2 import Environment, FileSystemLoader
 from rich.console import Console
 
 
+def create_jinja_env(template_dir: Path) -> Environment:
+    """Create a Jinja2 Environment with common filters for deployment templates.
+
+    Args:
+        template_dir: Path to the template directory (e.g. deployment/templates/slurm).
+
+    Returns:
+        Jinja2 Environment with dirname and basename filters registered.
+    """
+    env = Environment(loader=FileSystemLoader(str(template_dir)))
+    env.filters["dirname"] = lambda path: str(Path(path).parent)
+    env.filters["basename"] = lambda path: str(Path(path).name)
+    return env
+
+
 class DeploymentStatus(Enum):
     """Deployment status enumeration."""
 
@@ -133,6 +149,7 @@ def execute(self) -> DeploymentResult:
         Returns:
             DeploymentResult with status and metrics
         """
+        result = None
         try:
             # Step 1: Validate
             self.console.print(
@@ -179,6 +196,11 @@ def execute(self) -> DeploymentResult:
 
             return result
 
+        except KeyboardInterrupt:
+            if result is not None and getattr(result, "deployment_id", None):
+                self.cleanup(result.deployment_id)
+                self.console.print("\n[yellow]Cancelled deployment and cleaned up resources.[/yellow]")
+            raise
         except Exception as e:
             self.console.print(f"[red]Deployment error: {e}[/red]")
             return DeploymentResult(
@@ -314,3 +336,306 @@ def cleanup(self, deployment_id: str) -> bool:
         """
         pass
 
+    # -------------------------------------------------------------------------
+    # Shared multi-node metrics aggregation (used by SLURM and Kubernetes)
+    # -------------------------------------------------------------------------
+
+    def _parse_performance_from_log(
+        self, log_content: str, model_name: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Parse node-local performance from log content.
+
+        Expected format (from training scripts):
+            performance: <value> <metric>
+            node_id: <id>
+            local_gpus: <num_gpus>
+            test_duration: <value>s
+
+        Args:
+            log_content: Raw log text (e.g. node stdout)
+            model_name: Model name for the record
+
+        Returns:
+            Dict with performance, metric, node_id, local_gpus, duration, gpu_architecture, etc.
+            None if no performance line found.
+        """
+        import re
+
+        perf_pattern = r"performance:\s*([\d.]+)\s+(\S+)"
+        match = re.search(perf_pattern, log_content)
+        if not match:
+            return None
+
+        value = float(match.group(1))
+        metric = match.group(2)
+
+        node_id_pattern = r"node_id:\s*(\d+)"
+        node_match = re.search(node_id_pattern, log_content)
+        node_id = int(node_match.group(1)) if node_match else None
+
+        local_gpus_pattern = r"local_gpus:\s*(\d+)"
+        gpus_match = re.search(local_gpus_pattern, log_content)
+        local_gpus = int(gpus_match.group(1)) if gpus_match else 1
+
+        duration_pattern = r"test_duration:\s*([\d.]+)s"
+        duration_match = re.search(duration_pattern, log_content)
+        if duration_match:
+            duration = f"{duration_match.group(1)}s"
+        else:
+            # Also match container output: "Test Duration: 12.34 seconds"
+            alt_duration_pattern = r"Test Duration:\s*([\d.]+)\s*seconds"
+            alt_match = re.search(alt_duration_pattern, log_content, re.IGNORECASE)
+            duration = f"{alt_match.group(1)}s" if alt_match else "N/A"
+
+        gpu_arch_pattern = r"(?:🔹\s*)?Name\s*:\s*(gfx\w+)"
+        gpu_arch_match = re.search(gpu_arch_pattern, log_content)
+        gpu_arch = gpu_arch_match.group(1) if gpu_arch_match else "N/A"
+
+        return {
+            "model": model_name,
+            "performance": value,
+            "metric": metric,
+            "node_id": node_id,
+            "local_gpus": local_gpus,
+            "duration": duration,
+            "test_duration": duration,
+            "gpu_architecture": gpu_arch,
+            "data_name": "N/A",
+            "data_provider": "N/A",
+        }
+
+    def _determine_aggregation_method(self, metric_name: str) -> str:
+        """
+        Determine how to aggregate a metric based on its name/type.
+
+        Returns:
+            "sum", "average", or "max"
+        """
+        metric_lower = metric_name.lower()
+        if any(
+            keyword in metric_lower
+            for keyword in [
+                "throughput",
+                "samples_per_second",
+                "tokens_per_second",
+                "images_per_second",
+                "requests_per_second",
+                "qps",
+                "bandwidth",
+                "ops_per_second",
+                "samples/sec",
+                "tokens/sec",
+            ]
+        ):
+            return "sum"
+        if any(
+            keyword in metric_lower
+            for keyword in [
+                "latency",
+                "time",
+                "duration",
+                "milliseconds",
+                "seconds",
+                "ttft",
+                "tpot",
+                "response_time",
+                "accuracy",
+                "precision",
+                "recall",
+                "f1",
+                "loss",
+            ]
+        ):
+            return "average"
+        if any(
+            keyword in metric_lower
+            for keyword in ["memory", "bytes", "ram", "vram", "gb", "mb"]
+        ):
+            return "max"
+        self.console.print(
+            f"[yellow]⚠ Unknown metric type '{metric_name}', using sum aggregation[/yellow]"
+        )
+        return "sum"
+
+    def _aggregate_node_metrics(
+        self,
+        per_node_metrics: List[Dict[str, Any]],
+        nnodes: int,
+        launcher_type: str,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Aggregate per-node metrics into a single job-level record.
+
+        Throughput metrics are summed; latency/accuracy averaged; memory max.
+        Used by both SLURM and Kubernetes for multi-node result aggregation.
+        """
+        import statistics
+
+        if not per_node_metrics:
+            return None
+
+        first_metric = per_node_metrics[0]
+        metric_name = first_metric["metric"]
+        aggregation_method = self._determine_aggregation_method(metric_name)
+
+        if aggregation_method == "sum":
+            aggregated_value = sum(m["performance"] for m in per_node_metrics)
+            method_desc = "sum_across_nodes"
+        elif aggregation_method == "average":
+            aggregated_value = statistics.mean(m["performance"] for m in per_node_metrics)
+            method_desc = "average_across_nodes"
+        elif aggregation_method == "max":
+            aggregated_value = max(m["performance"] for m in per_node_metrics)
+            method_desc = "max_across_nodes"
+        else:
+            aggregated_value = sum(m["performance"] for m in per_node_metrics)
+            method_desc = "sum_across_nodes (default)"
+
+        perfs = [m["performance"] for m in per_node_metrics]
+        if len(perfs) > 1:
+            statistics_dict = {
+                "mean": statistics.mean(perfs),
+                "std_dev": statistics.stdev(perfs),
+                "min": min(perfs),
+                "max": max(perfs),
+                "coefficient_variation": (
+                    statistics.stdev(perfs) / statistics.mean(perfs)
+                    if statistics.mean(perfs) > 0
+                    else 0
+                ),
+            }
+        else:
+            statistics_dict = {
+                "mean": perfs[0],
+                "std_dev": 0,
+                "min": perfs[0],
+                "max": perfs[0],
+                "coefficient_variation": 0,
+            }
+
+        gpu_arch = "N/A"
+        for m in per_node_metrics:
+            if m.get("gpu_architecture") and m["gpu_architecture"] != "N/A":
+                gpu_arch = m["gpu_architecture"]
+                break
+
+        durations = [
+            m.get("duration", m.get("test_duration", "N/A"))
+            for m in per_node_metrics
+            if m.get("duration", "N/A") != "N/A" or m.get("test_duration", "N/A") != "N/A"
+        ]
+        if durations:
+            duration_values = []
+            for d in durations:
+                if isinstance(d, str) and d.endswith("s"):
+                    try:
+                        duration_values.append(float(d[:-1]))
+                    except ValueError:
+                        pass
+            duration = f"{max(duration_values):.2f}s" if duration_values else "N/A"
+        else:
+            duration = "N/A"
+
+        total_gpus = sum(m.get("local_gpus", 1) for m in per_node_metrics)
+        gpus_per_node = per_node_metrics[0].get("local_gpus", 1) if per_node_metrics else 1
+
+        aggregated_record = {
+            "model": first_metric["model"],
+            "n_gpus": total_gpus,
+            "nnodes": nnodes,
+            "gpus_per_node": gpus_per_node,
+            "performance": aggregated_value,
+            "metric": metric_name,
+            "status": "SUCCESS",
+            "topology": f"{nnodes}N×{gpus_per_node}G",
+            "launcher": launcher_type or "N/A",
+            "deployment_type": getattr(self, "DEPLOYMENT_TYPE", "base"),
+            "gpu_architecture": gpu_arch,
+            "test_duration": duration,
+            "data_name": first_metric.get("data_name", "N/A"),
+            "data_provider": first_metric.get("data_provider", "N/A"),
+            "aggregation_method": method_desc,
+            "nodes_contributing": len(per_node_metrics),
+            "per_node_mean": statistics_dict["mean"],
+            "per_node_std_dev": statistics_dict["std_dev"],
+            "per_node_cv": statistics_dict["coefficient_variation"],
+        }
+        return aggregated_record
+
+    def _ensure_perf_csv_exists(self) -> None:
+        """Ensure perf.csv exists with standard header (for appending aggregated rows)."""
+        perf_csv_path = Path("perf.csv")
+        if perf_csv_path.exists():
+            return
+        standard_header = (
+            "model,n_gpus,nnodes,gpus_per_node,training_precision,pipeline,args,tags,"
+            "docker_file,base_docker,docker_sha,docker_image,git_commit,machine_name,"
+            "deployment_type,launcher,gpu_architecture,performance,metric,relative_change,"
+            "status,build_duration,test_duration,dataname,data_provider_type,data_size,"
+            "data_download_duration,build_number,additional_docker_run_options"
+        )
+        perf_csv_path.write_text(standard_header + "\n", encoding="utf-8")
+
+    def _write_to_perf_csv(self, perf_data: Dict[str, Any]) -> None:
+        """
+        Append one row to perf.csv. Uses existing header if file exists for column order.
+        """
+        import csv
+
+        perf_csv_path = Path("perf.csv")
+        default_headers = [
+            "model",
+            "n_gpus",
+            "nnodes",
+            "gpus_per_node",
+            "training_precision",
+            "pipeline",
+            "args",
+            "tags",
+            "docker_file",
+            "base_docker",
+            "docker_sha",
+            "docker_image",
+            "git_commit",
+            "machine_name",
+            "deployment_type",
+            "launcher",
+            "gpu_architecture",
+            "performance",
+            "metric",
+            "relative_change",
+            "status",
+            "build_duration",
+            "test_duration",
+            "dataname",
+            "data_provider_type",
+            "data_size",
+            "data_download_duration",
+            "build_number",
+            "additional_docker_run_options",
+        ]
+        file_exists = perf_csv_path.exists()
+        existing_header = None
+        if file_exists:
+            with open(
+                perf_csv_path, "r", newline="", encoding="utf-8", errors="replace"
+            ) as rf:
+                reader = csv.reader(rf)
+                existing_header = next(reader, None)
+        headers = existing_header if existing_header else default_headers
+        if file_exists and existing_header:
+            row_by_name = {k: perf_data.get(k, "") for k in headers}
+            row_to_write = [str(row_by_name.get(h, "")) for h in headers]
+        else:
+            row_to_write = perf_data
+
+        with open(perf_csv_path, "a", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=headers, extrasaction="ignore")
+            if not file_exists:
+                writer.writeheader()
+            if file_exists and existing_header:
+                csv.writer(f).writerow(row_to_write)
+            else:
+                writer.writerow(row_to_write)
+
diff --git a/src/madengine/deployment/common.py b/src/madengine/deployment/common.py
new file mode 100644
index 00000000..a122818c
--- /dev/null
+++ b/src/madengine/deployment/common.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+Shared deployment utilities used by both SLURM and Kubernetes deployments.
+
+Provides launcher normalization, ROCm profiling checks, and multi-node
+profiling configuration so logic is not duplicated across deployment modules.
+
+Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
+"""
+
+import subprocess
+from typing import Any, Dict, List, Optional
+
+# Valid distributed launchers (used by normalize_launcher)
+VALID_LAUNCHERS = [
+    "torchrun",
+    "torchtitan",
+    "deepspeed",
+    "megatron-lm",
+    "vllm",
+    "sglang",
+    "sglang-disagg"
+]
+
+
+def normalize_launcher(launcher_type: Optional[str], deployment_type: str) -> str:
+    """
+    Normalize launcher field based on deployment type and launcher value.
+
+    Logic:
+    - If launcher is in VALID_LAUNCHERS: keep as-is
+    - If launcher is None/empty/invalid:
+        * local → "docker" (runs in Docker container)
+        * slurm → "docker" (typically uses containers on compute nodes)
+        * kubernetes → "native" (pod itself is the container)
+
+    Args:
+        launcher_type: Raw launcher type from config (may be None)
+        deployment_type: "local", "slurm", or "kubernetes"
+
+    Returns:
+        Normalized launcher string
+    """
+    if launcher_type and launcher_type in VALID_LAUNCHERS:
+        return launcher_type
+    if deployment_type == "local":
+        return "docker"
+    if deployment_type == "slurm":
+        return "docker"
+    if deployment_type == "kubernetes":
+        return "native"
+    return "docker"
+
+
+def is_rocprofv3_available() -> bool:
+    """
+    Check if rocprofv3 is available on the system.
+
+    rocprofv3 is required for multi-node profiling with MPI support.
+    It's part of rocprofiler-sdk package in ROCm >= 6.4.1.
+
+    Returns:
+        True if rocprofv3 is available and executable, False otherwise
+    """
+    try:
+        result = subprocess.run(
+            ["rocprofv3", "--help"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return False
+
+
+def configure_multi_node_profiling(
+    nnodes: int,
+    tools_config: List[Dict],
+    logger: Any
+) -> Dict[str, Any]:
+    """
+    Configure profiling for multi-node runs with rocprofv3 support.
+
+    Industry best practice for multi-node profiling:
+    - Profile ALL nodes to detect stragglers, load imbalances, and communication bottlenecks
+    - Use rocprofv3 (MPI-aware) for distributed profiling
+    - Collect per-node outputs for detailed analysis
+
+    Logic:
+    1. Single node (nnodes == 1): Use existing tool behavior
+    2. Multi-node (nnodes > 1):
+       a. Check if rocprofv3 is available
+       b. If available: Enable per-node profiling, upgrade "rocprof" to "rocprofv3"
+       c. If not available: Log warning and skip profiling
+
+    Args:
+        nnodes: Number of nodes in the deployment
+        tools_config: List of tool configurations from user
+        logger: Logger instance for messages
+
+    Returns:
+        Dictionary with profiling configuration:
+        - enabled: bool
+        - mode: str - "single_node", "multi_node", or "multi_node_unsupported"
+        - tools: List[Dict]
+        - per_node_collection: bool
+    """
+    if nnodes == 1:
+        return {
+            "enabled": True,
+            "mode": "single_node",
+            "tools": tools_config,
+            "per_node_collection": False
+        }
+
+    if not is_rocprofv3_available():
+        logger.warning(
+            "╔════════════════════════════════════════════════════════════════════════════╗\n"
+            "║ Multi-Node Profiling Requirements Not Met                                 ║\n"
+            "╠════════════════════════════════════════════════════════════════════════════╣\n"
+            "║ Multi-node profiling requires rocprofv3 (MPI-aware profiling support).    ║\n"
+            "║                                                                            ║\n"
+            "║ Current Status: rocprofv3 NOT FOUND on system                             ║\n"
+            "║                                                                            ║\n"
+            "║ Profiling will be SKIPPED for this multi-node run.                        ║\n"
+            "║                                                                            ║\n"
+            "║ To enable multi-node profiling:                                           ║\n"
+            "║   • Install rocprofiler-sdk package (ROCm >= 6.4.1)                       ║\n"
+            "║   • Command: apt install rocprofiler-sdk                                  ║\n"
+            "║   • Or upgrade to ROCm 6.4.1 or later                                     ║\n"
+            "║                                                                            ║\n"
+            "║ Note: Single-node profiling uses rocprof (no rocprofv3 required)          ║\n"
+            "╚════════════════════════════════════════════════════════════════════════════╝"
+        )
+        return {
+            "enabled": False,
+            "mode": "multi_node_unsupported",
+            "tools": [],
+            "per_node_collection": False
+        }
+
+    logger.info(f"✓ Multi-node profiling enabled for {nnodes} nodes (rocprofv3 detected)")
+
+    upgraded_tools: List[Dict] = []
+    for tool in tools_config:
+        if not isinstance(tool, dict):
+            upgraded_tools.append(tool)
+            continue
+        tool_name = tool.get("name")
+        if tool_name == "rocprof":
+            logger.info(
+                "  → Upgrading 'rocprof' to 'rocprofv3' for multi-node MPI compatibility"
+            )
+            upgraded_tool = tool.copy()
+            upgraded_tool["name"] = "rocprofv3"
+            upgraded_tools.append(upgraded_tool)
+        else:
+            upgraded_tools.append(tool)
+
+    if upgraded_tools:
+        tool_names = [
+            t.get("name") if isinstance(t, dict) else str(t) for t in upgraded_tools
+        ]
+        logger.info(f"  → Multi-node profiling tools: {', '.join(filter(None, tool_names))}")
+        if "rccl_trace" in tool_names:
+            logger.info("  → ✓ rccl_trace enabled (critical for multi-node communication profiling)")
+
+    return {
+        "enabled": True,
+        "mode": "multi_node",
+        "tools": upgraded_tools,
+        "per_node_collection": True,
+        "profiler": "rocprofv3",
+        "wrapper_mode": "launcher"
+    }
diff --git a/src/madengine/deployment/config_loader.py b/src/madengine/deployment/config_loader.py
index 5afbe7b7..fdbf3c94 100644
--- a/src/madengine/deployment/config_loader.py
+++ b/src/madengine/deployment/config_loader.py
@@ -13,10 +13,27 @@
 import json
 import os
 from pathlib import Path
-from typing import Dict, Any, Optional
+from typing import Any, Callable, Dict, Optional
 from copy import deepcopy
 
 
+def apply_deployment_config(config: Any, load_fn: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
+    """Apply deployment defaults via a loader and set config.additional_context.
+
+    Used by SLURM and Kubernetes deployment classes before calling super().__init__(config).
+
+    Args:
+        config: DeploymentConfig (or any object with additional_context attribute).
+        load_fn: ConfigLoader.load_slurm_config or ConfigLoader.load_k8s_config.
+
+    Returns:
+        The merged full configuration dict.
+    """
+    full_config = load_fn(config.additional_context or {})
+    config.additional_context = full_config
+    return full_config
+
+
 class ConfigLoader:
     """Smart configuration loader with preset support."""
     
diff --git a/src/madengine/deployment/kubernetes.py b/src/madengine/deployment/kubernetes.py
index 29c9874e..d84f4384 100644
--- a/src/madengine/deployment/kubernetes.py
+++ b/src/madengine/deployment/kubernetes.py
@@ -33,14 +33,22 @@
 except ImportError:
     YAML_AVAILABLE = False
 
-from jinja2 import Environment, FileSystemLoader, Template
-
-from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus
-from .config_loader import ConfigLoader
+from jinja2 import Template
+
+from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus, create_jinja_env
+from .common import (
+    VALID_LAUNCHERS,
+    configure_multi_node_profiling,
+    is_rocprofv3_available,
+    normalize_launcher,
+)
+from .config_loader import ConfigLoader, apply_deployment_config
 from madengine.core.dataprovider import Data
 from madengine.core.context import Context
 from madengine.core.errors import ConfigurationError, create_error_context
 from madengine.utils.gpu_config import resolve_runtime_gpus
+from madengine.utils.path_utils import get_madengine_root, scripts_base_dir_from
+from madengine.utils.run_details import flatten_tags_in_place, get_build_number, get_pipeline
 
 try:
     from madengine.reporting.update_perf_csv import update_perf_csv
@@ -50,184 +58,6 @@
     REPORTING_AVAILABLE = False
 
 
-# Valid distributed launchers
-VALID_LAUNCHERS = [
-    "torchrun",
-    "torchtitan",
-    "deepspeed",
-    "megatron-lm",
-    "vllm",
-    "sglang",
-    "sglang-disagg"
-]
-
-
-def normalize_launcher(launcher_type: Optional[str], deployment_type: str) -> str:
-    """
-    Normalize launcher field based on deployment type and launcher value.
-    
-    Logic:
-    - If launcher is in VALID_LAUNCHERS: keep as-is
-    - If launcher is None/empty/invalid:
-        * local → "docker" (runs in Docker container)
-        * slurm → "docker" (typically uses containers on compute nodes)
-        * kubernetes → "native" (pod itself is the container)
-    
-    Args:
-        launcher_type: Raw launcher type from config (may be None)
-        deployment_type: "local", "slurm", or "kubernetes"
-        
-    Returns:
-        Normalized launcher string
-    """
-    # If launcher is valid, keep it
-    if launcher_type and launcher_type in VALID_LAUNCHERS:
-        return launcher_type
-    
-    # Otherwise, default based on deployment type
-    if deployment_type == "local":
-        return "docker"
-    elif deployment_type == "slurm":
-        return "docker"
-    elif deployment_type == "kubernetes":
-        return "native"
-    else:
-        # Fallback for unknown deployment types
-        return "docker"
-
-
-def is_rocprofv3_available() -> bool:
-    """
-    Check if rocprofv3 is available on the system.
-    
-    rocprofv3 is required for multi-node profiling with MPI support.
-    It's part of rocprofiler-sdk package in ROCm >= 6.4.1.
-    
-    Returns:
-        True if rocprofv3 is available and executable, False otherwise
-    """
-    try:
-        # Note: rocprofv3 doesn't support --version, use --help instead
-        result = subprocess.run(
-            ["rocprofv3", "--help"],
-            capture_output=True,
-            text=True,
-            timeout=5
-        )
-        return result.returncode == 0
-    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
-        return False
-
-
-def configure_multi_node_profiling(
-    nnodes: int,
-    tools_config: List[Dict],
-    logger
-) -> Dict[str, Any]:
-    """
-    Configure profiling for multi-node runs with rocprofv3 support.
-    
-    Industry best practice for multi-node profiling:
-    - Profile ALL nodes to detect stragglers, load imbalances, and communication bottlenecks
-    - Use rocprofv3 (MPI-aware) for distributed profiling
-    - Collect per-node outputs for detailed analysis
-    
-    Logic:
-    1. Single node (nnodes == 1): Use existing tool behavior
-    2. Multi-node (nnodes > 1):
-       a. Check if rocprofv3 is available
-       b. If available: Enable per-node profiling, upgrade "rocprof" to "rocprofv3"
-       c. If not available: Log warning and skip profiling
-    
-    Args:
-        nnodes: Number of nodes in the deployment
-        tools_config: List of tool configurations from user
-        logger: Logger instance for messages
-        
-    Returns:
-        Dictionary with profiling configuration:
-        - enabled: bool - Whether profiling is enabled
-        - mode: str - "single_node", "multi_node", or "multi_node_unsupported"
-        - tools: List[Dict] - Processed tool configurations
-        - per_node_collection: bool - Whether to collect from all nodes
-    """
-    if nnodes == 1:
-        # Single node - existing behavior works fine
-        return {
-            "enabled": True,
-            "mode": "single_node",
-            "tools": tools_config,
-            "per_node_collection": False
-        }
-    
-    # Multi-node case - check rocprofv3 availability
-    if not is_rocprofv3_available():
-        logger.warning(
-            "╔════════════════════════════════════════════════════════════════════════════╗\n"
-            "║ Multi-Node Profiling Requirements Not Met                                 ║\n"
-            "╠════════════════════════════════════════════════════════════════════════════╣\n"
-            "║ Multi-node profiling requires rocprofv3 (MPI-aware profiling support).    ║\n"
-            "║                                                                            ║\n"
-            "║ Current Status: rocprofv3 NOT FOUND on system                             ║\n"
-            "║                                                                            ║\n"
-            "║ Profiling will be SKIPPED for this multi-node run.                        ║\n"
-            "║                                                                            ║\n"
-            "║ To enable multi-node profiling:                                           ║\n"
-            "║   • Install rocprofiler-sdk package (ROCm >= 6.4.1)                       ║\n"
-            "║   • Command: apt install rocprofiler-sdk                                  ║\n"
-            "║   • Or upgrade to ROCm 6.4.1 or later                                     ║\n"
-            "║                                                                            ║\n"
-            "║ Note: Single-node profiling uses rocprof (no rocprofv3 required)          ║\n"
-            "╚════════════════════════════════════════════════════════════════════════════╝"
-        )
-        return {
-            "enabled": False,
-            "mode": "multi_node_unsupported",
-            "tools": [],
-            "per_node_collection": False
-        }
-    
-    # rocprofv3 is available - enable full multi-node profiling
-    logger.info(f"✓ Multi-node profiling enabled for {nnodes} nodes (rocprofv3 detected)")
-    
-    # Upgrade "rocprof" tools to "rocprofv3" for multi-node compatibility
-    upgraded_tools = []
-    rocprof_upgraded = False
-    
-    for tool in tools_config:
-        tool_name = tool.get("name")
-        
-        if tool_name == "rocprof":
-            # Upgrade to rocprofv3 for multi-node MPI support
-            logger.info(
-                f"  → Upgrading 'rocprof' to 'rocprofv3' for multi-node MPI compatibility"
-            )
-            upgraded_tool = tool.copy()
-            upgraded_tool["name"] = "rocprofv3"
-            upgraded_tools.append(upgraded_tool)
-            rocprof_upgraded = True
-        else:
-            upgraded_tools.append(tool)
-    
-    # Log profiling tools being used
-    if upgraded_tools:
-        tool_names = [t.get("name") for t in upgraded_tools]
-        logger.info(f"  → Multi-node profiling tools: {', '.join(tool_names)}")
-        
-        # Highlight RCCL trace if present (critical for multi-node communication)
-        if "rccl_trace" in tool_names:
-            logger.info("  → ✓ rccl_trace enabled (critical for multi-node communication profiling)")
-    
-    return {
-        "enabled": True,
-        "mode": "multi_node",
-        "tools": upgraded_tools,
-        "per_node_collection": True,
-        "profiler": "rocprofv3",
-        "wrapper_mode": "launcher"
-    }
-
-
 class KubernetesDeployment(BaseDeployment):
     """
     Kubernetes cluster deployment using Python client library.
@@ -271,11 +101,7 @@ def __init__(self, config: DeploymentConfig):
                 "Install with: pip install pyyaml"
             )
 
-        # Apply intelligent defaults using ConfigLoader
-        # This merges built-in presets with user configuration
-        full_config = ConfigLoader.load_k8s_config(config.additional_context)
-        config.additional_context = full_config
-
+        apply_deployment_config(config, ConfigLoader.load_k8s_config)
         super().__init__(config)
 
         # Parse K8s configuration (now with defaults applied)
@@ -288,11 +114,8 @@ def __init__(self, config: DeploymentConfig):
 
         # Setup Jinja2 template environment
         template_dir = Path(__file__).parent / "templates" / "kubernetes"
-        self.jinja_env = Environment(loader=FileSystemLoader(str(template_dir)))
-        
-        # Register custom Jinja2 filters
-        self.jinja_env.filters['dirname'] = lambda path: str(Path(path).parent)
-        
+        self.jinja_env = create_jinja_env(template_dir)
+
         # Initialize data provider (will be used if models need data)
         self.data = None
         self.context_for_data = None
@@ -454,7 +277,7 @@ def _add_tool_scripts(self, pre_scripts: List[Dict], post_scripts: List[Dict]) -
             return
         
         # Load tools.json to get pre/post script definitions
-        tools_json_path = Path(__file__).parent.parent / "scripts" / "common" / "tools.json"
+        tools_json_path = get_madengine_root() / "scripts" / "common" / "tools.json"
         if not tools_json_path.exists():
             return
         
@@ -492,7 +315,7 @@ def _load_common_scripts(self, script_list: List[Dict]) -> Dict[str, str]:
         """
         import os
         script_contents = {}
-        madengine_root = Path(__file__).parent.parent  # Go up to madengine/ directory
+        madengine_root = get_madengine_root()
         
         for script_config in script_list:
             script_path = script_config.get("path", "")
@@ -784,7 +607,7 @@ def _prepare_template_context(
                 data_provider_script = k8s_tools_config["data_providers"][provider_type]
                 
                 # Load K8s data provider script content
-                k8s_script_path = Path(__file__).parent.parent / data_provider_script["script"]
+                k8s_script_path = get_madengine_root() / data_provider_script["script"]
                 if k8s_script_path.exists():
                     with open(k8s_script_path, "r") as f:
                         data_provider_script_content = f.read()
@@ -2855,7 +2678,9 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     })
                     
                     # 2. Parse NODE-LOCAL performance from log
-                    perf_data = self._parse_node_performance(log, model_info, build_info)
+                    perf_data = self._parse_performance_from_log(
+                        log, model_info.get("name", "")
+                    )
                     
                     # Get pod exit status
                     pod_status = pod.status.phase
@@ -2967,21 +2792,51 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     )
                 
                 if aggregated_record:
-                    # Write ONE aggregated row to perf.csv (CRITICAL for database)
-                    self._write_to_perf_csv(aggregated_record)
-                    
+                    # Full reporting pipeline: perf_entry at project root, then update_* (same as local/SLURM)
+                    self._ensure_perf_csv_exists()
+                    run_details_dict = self._build_perf_entry_from_aggregated(
+                        aggregated_record, model_info, build_info, deployment_id
+                    )
+                    perf_entry_path = Path("perf_entry.json")
+                    with open(perf_entry_path, "w", encoding="utf-8") as f:
+                        json.dump(run_details_dict, f, indent=2)
+                    if run_details_dict.get("status") == "SUCCESS":
+                        update_perf_csv(perf_csv="perf.csv", single_result=str(perf_entry_path))
+                    else:
+                        update_perf_csv(perf_csv="perf.csv", exception_result=str(perf_entry_path))
+                    scripts_path = model_info.get("scripts", "")
+                    scripts_base_dir = scripts_base_dir_from(scripts_path)
+                    try:
+                        if run_details_dict.get("status") == "SUCCESS":
+                            num_entries = update_perf_super_json(
+                                single_result=str(perf_entry_path),
+                                perf_super_json="perf_super.json",
+                                scripts_base_dir=scripts_base_dir,
+                            )
+                        else:
+                            num_entries = update_perf_super_json(
+                                exception_result=str(perf_entry_path),
+                                perf_super_json="perf_super.json",
+                                scripts_base_dir=scripts_base_dir,
+                            )
+                        update_perf_super_csv(
+                            perf_super_json="perf_super.json",
+                            perf_super_csv="perf_super.csv",
+                            num_entries=num_entries,
+                        )
+                    except Exception as e:
+                        self.console.print(f"[yellow]⚠ Could not update perf_super: {e}[/yellow]")
                     results["successful_runs"].append({
                         "model": model_info.get("name"),
                         "perf_data": aggregated_record,
-                        "nodes": results["nodes"],  # Include per-node details
-                        "per_node_metrics": per_node_metrics  # For detailed analysis
+                        "nodes": results["nodes"],
+                        "per_node_metrics": per_node_metrics
                     })
-                    
                     self.console.print(
                         f"[green]✓ Aggregated performance from {len(per_node_metrics)} nodes[/green]"
                     )
                     self.console.print(
-                        f"[green]✓ Updated local perf.csv[/green]"
+                        f"[green]✓ Updated perf_entry.json, perf.csv, perf_super.* (Docker-compatible)[/green]"
                     )
             else:
                 # No performance from log: try multiple_results CSV (same contract as local Docker)
@@ -3012,7 +2867,7 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                         model_name=model_info.get("name", ""),
                     )
                     scripts_path = model_info.get("scripts", "")
-                    scripts_base_dir = os.path.dirname(scripts_path) if scripts_path else None
+                    scripts_base_dir = scripts_base_dir_from(scripts_path)
                     num_entries = update_perf_super_json(
                         perf_super_json="perf_super.json",
                         multiple_results=str(resolved_csv_path),
@@ -3483,48 +3338,44 @@ def _create_failure_record(self, model_info: Dict, build_info: Dict, pod_name: s
             
             # Model configuration
             "training_precision": model_info.get("training_precision", ""),
-            "pipeline": os.environ.get("pipeline", ""),
+            "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
             "tags": model_info.get("tags", ""),
-            
+
             # Build information
             "docker_file": build_info.get("dockerfile", ""),
             "base_docker": build_info.get("base_docker", ""),
             "docker_sha": build_info.get("docker_sha", ""),
             "docker_image": build_info.get("docker_image", ""),
-            
+
             # Runtime information
             "git_commit": "",
             "machine_name": pod_name,
             "deployment_type": "kubernetes",
             "launcher": launcher,
             "gpu_architecture": "",
-            
+
             # Performance metrics - FAILED
             "performance": "0",
             "metric": error_msg,  # Store error message in metric field
             "relative_change": "",
             "status": "FAILURE",  # Use "FAILURE" to match CSV schema
-            
+
             # Timing
             "build_duration": build_info.get("build_duration", ""),
             "test_duration": "",
-            
+
             # Data information
             "dataname": model_info.get("data", ""),
             "data_provider_type": "",
             "data_size": "",
             "data_download_duration": "",
-            
+
             # Build tracking
-            "build_number": os.environ.get("BUILD_NUMBER", "0"),
+            "build_number": get_build_number(),
             "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
         }
-        
-        # Flatten tags if they are in list format
-        if isinstance(result["tags"], list):
-            result["tags"] = ",".join(str(item) for item in result["tags"])
-        
+        flatten_tags_in_place(result)
         return result
 
     # Standard perf.csv header (must match container_runner.ensure_perf_csv_exists)
@@ -3543,6 +3394,67 @@ def _ensure_perf_csv_exists(self) -> None:
             perf_csv_path.write_text(self._PERF_CSV_HEADER + "\n", encoding="utf-8")
             self.console.print("[dim]Created perf.csv with standard header[/dim]")
 
+    def _build_perf_entry_from_aggregated(
+        self,
+        aggregated_record: Dict[str, Any],
+        model_info: Dict[str, Any],
+        build_info: Dict[str, Any],
+        deployment_id: str,
+    ) -> Dict[str, Any]:
+        """Build full run_details dict from aggregated record for perf_entry and update_* pipeline."""
+        from madengine.utils.config_parser import ConfigParser
+
+        deployment_config = self.manifest.get("deployment_config", {})
+        distributed_config = deployment_config.get("distributed", {})
+        nnodes = distributed_config.get("nnodes", 1)
+        nproc_per_node = distributed_config.get("nproc_per_node")
+        if nproc_per_node is None:
+            nproc_per_node = int(model_info.get("n_gpus", 1))
+        launcher = normalize_launcher(distributed_config.get("launcher"), "kubernetes")
+        test_duration = aggregated_record.get("test_duration") or aggregated_record.get("duration", "")
+        run_details = {
+            "model": model_info.get("name", aggregated_record.get("model", "")),
+            "n_gpus": str(aggregated_record.get("n_gpus", nnodes * nproc_per_node)),
+            "nnodes": str(aggregated_record.get("nnodes", nnodes)),
+            "gpus_per_node": str(aggregated_record.get("gpus_per_node", nproc_per_node)),
+            "training_precision": model_info.get("training_precision", ""),
+            "pipeline": get_pipeline(),
+            "args": model_info.get("args", ""),
+            "tags": model_info.get("tags", ""),
+            "docker_file": build_info.get("dockerfile", ""),
+            "base_docker": build_info.get("base_docker", ""),
+            "docker_sha": build_info.get("docker_sha", ""),
+            "docker_image": build_info.get("docker_image", ""),
+            "git_commit": "",
+            "machine_name": deployment_id,
+            "deployment_type": "kubernetes",
+            "launcher": launcher,
+            "gpu_architecture": aggregated_record.get("gpu_architecture", ""),
+            "performance": str(aggregated_record.get("performance", "")),
+            "metric": aggregated_record.get("metric", ""),
+            "relative_change": "",
+            "status": aggregated_record.get("status", "SUCCESS"),
+            "build_duration": build_info.get("build_duration", ""),
+            "test_duration": test_duration,
+            "dataname": aggregated_record.get("data_name", model_info.get("data", "")),
+            "data_provider_type": aggregated_record.get("data_provider", ""),
+            "data_size": "",
+            "data_download_duration": "",
+            "build_number": get_build_number(),
+            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+        }
+        flatten_tags_in_place(run_details)
+        try:
+            scripts_path = model_info.get("scripts", "")
+            scripts_base_dir = scripts_base_dir_from(scripts_path)
+            config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
+            run_details["configs"] = config_parser.parse_and_load(
+                model_info.get("args", ""), scripts_path
+            )
+        except Exception:
+            run_details["configs"] = None
+        return run_details
+
     def _build_common_info_dict(
         self,
         model_info: Dict,
@@ -3571,7 +3483,7 @@ def _build_common_info_dict(
             "nnodes": nnodes_str,
             "gpus_per_node": gpus_per_node,
             "training_precision": model_info.get("training_precision", ""),
-            "pipeline": os.environ.get("pipeline", ""),
+            "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
             "tags": model_info.get("tags", ""),
             "docker_file": build_info.get("dockerfile", ""),
@@ -3590,11 +3502,10 @@ def _build_common_info_dict(
             "data_provider_type": "",
             "data_size": "",
             "data_download_duration": "",
-            "build_number": os.environ.get("BUILD_NUMBER", "0"),
+            "build_number": get_build_number(),
             "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
         }
-        if isinstance(result["tags"], list):
-            result["tags"] = ",".join(str(t) for t in result["tags"])
+        flatten_tags_in_place(result)
         return result
 
     def _create_multiple_result_row_record(
@@ -3625,7 +3536,7 @@ def _create_multiple_result_row_record(
             "nnodes": str(nnodes),
             "gpus_per_node": str(nproc_per_node),
             "training_precision": model_info.get("training_precision", ""),
-            "pipeline": os.environ.get("pipeline", ""),
+            "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
             "tags": model_info.get("tags", ""),
             "docker_file": build_info.get("dockerfile", ""),
@@ -3647,82 +3558,12 @@ def _create_multiple_result_row_record(
             "data_provider_type": "",
             "data_size": "",
             "data_download_duration": "",
-            "build_number": os.environ.get("BUILD_NUMBER", "0"),
+            "build_number": get_build_number(),
             "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
         }
-        if isinstance(result["tags"], list):
-            result["tags"] = ",".join(str(t) for t in result["tags"])
+        flatten_tags_in_place(result)
         return result
     
-    def _parse_node_performance(
-        self, 
-        log_content: str, 
-        model_info: Dict, 
-        build_info: Dict
-    ) -> Optional[Dict]:
-        """
-        Parse node-local performance from log.
-        
-        Expected format in log (from updated run scripts):
-            performance: <value> <metric>
-            node_id: <id>
-            local_gpus: <num_gpus>
-        
-        Args:
-            log_content: Pod log content
-            model_info: Model information dict
-            build_info: Build information dict
-            
-        Returns:
-            Dict with node performance data, or None if parsing failed
-        """
-        import re
-        
-        perf_data = None
-        
-        # Parse performance line
-        perf_pattern = r"performance:\s*([\d.]+)\s+(\S+)"
-        match = re.search(perf_pattern, log_content)
-        
-        if match:
-            value = float(match.group(1))
-            metric = match.group(2)
-            
-            # Try to extract node_id for validation
-            node_id_pattern = r"node_id:\s*(\d+)"
-            node_match = re.search(node_id_pattern, log_content)
-            node_id = int(node_match.group(1)) if node_match else None
-            
-            # Try to extract local_gpus
-            local_gpus_pattern = r"local_gpus:\s*(\d+)"
-            gpus_match = re.search(local_gpus_pattern, log_content)
-            local_gpus = int(gpus_match.group(1)) if gpus_match else 1
-            
-            # Extract duration if available
-            duration_pattern = r"test_duration:\s*([\d.]+)s"
-            duration_match = re.search(duration_pattern, log_content)
-            duration = f"{duration_match.group(1)}s" if duration_match else "N/A"
-            
-            # Extract GPU architecture from rocEnvTool runtime detection
-            # Look for pattern: 🔹 Name : gfx942 or Name : gfx942
-            gpu_arch_pattern = r"(?:🔹\s*)?Name\s*:\s*(gfx\w+)"
-            gpu_arch_match = re.search(gpu_arch_pattern, log_content)
-            gpu_arch = gpu_arch_match.group(1) if gpu_arch_match else "N/A"
-            
-            perf_data = {
-                "model": model_info.get("name"),
-                "performance": value,
-                "metric": metric,
-                "node_id": node_id,
-                "local_gpus": local_gpus,
-                "duration": duration,
-                "gpu_architecture": gpu_arch,
-                "data_name": "N/A",
-                "data_provider": "N/A"
-            }
-        
-        return perf_data
-    
     def _parse_multiple_results_from_artifacts(
         self,
         results_dir: Path,
@@ -3968,247 +3809,6 @@ def _resolve_multiple_results_csv(
             return merged_path
         return csv_paths[0]
 
-    def _determine_aggregation_method(self, metric_name: str) -> str:
-        """
-        Determine how to aggregate a metric based on its name/type.
-        
-        Args:
-            metric_name: Name of the performance metric
-            
-        Returns:
-            "sum", "average", "max", or "unknown"
-        """
-        metric_lower = metric_name.lower()
-        
-        # Throughput metrics - SUM
-        if any(keyword in metric_lower for keyword in [
-            "throughput", "samples_per_second", "tokens_per_second", 
-            "images_per_second", "requests_per_second", "qps",
-            "bandwidth", "ops_per_second", "samples/sec", "tokens/sec"
-        ]):
-            return "sum"
-        
-        # Latency metrics - AVERAGE
-        elif any(keyword in metric_lower for keyword in [
-            "latency", "time", "duration", "milliseconds", "seconds",
-            "ttft", "tpot", "response_time"
-        ]):
-            return "average"
-        
-        # Accuracy metrics - AVERAGE
-        elif any(keyword in metric_lower for keyword in [
-            "accuracy", "precision", "recall", "f1", "loss"
-        ]):
-            return "average"
-        
-        # Memory metrics - MAX
-        elif any(keyword in metric_lower for keyword in [
-            "memory", "bytes", "ram", "vram", "gb", "mb"
-        ]):
-            return "max"
-        
-        else:
-            # Unknown - default to sum for throughput-like metrics (conservative)
-            self.console.print(f"[yellow]⚠ Unknown metric type '{metric_name}', using sum aggregation[/yellow]")
-            return "sum"
-    
-    def _aggregate_node_metrics(
-        self, 
-        per_node_metrics: List[Dict], 
-        nnodes: int,
-        launcher_type: str
-    ) -> Optional[Dict]:
-        """
-        Aggregate per-node metrics into single job-level metric.
-        
-        Aggregation Strategy:
-        - Throughput (samples/sec, tokens/sec, images/sec): SUM
-        - Latency (ms, seconds): AVERAGE
-        - Accuracy (%, ratio): AVERAGE or LAST
-        - Memory (bytes, GB): MAX or SUM
-        
-        Args:
-            per_node_metrics: List of performance dicts from each node
-            nnodes: Number of nodes
-            launcher_type: Type of launcher (torchrun, deepspeed, etc.)
-            
-        Returns:
-            Dict with aggregated performance data for perf.csv
-        """
-        import statistics
-        
-        if not per_node_metrics:
-            return None
-        
-        # Get metric type from first node
-        first_metric = per_node_metrics[0]
-        metric_name = first_metric["metric"]
-        
-        # Determine aggregation strategy based on metric type
-        aggregation_method = self._determine_aggregation_method(metric_name)
-        
-        if aggregation_method == "sum":
-            # Sum throughput metrics
-            aggregated_value = sum(m["performance"] for m in per_node_metrics)
-            method_desc = "sum_across_nodes"
-        elif aggregation_method == "average":
-            # Average latency/accuracy metrics
-            aggregated_value = statistics.mean(m["performance"] for m in per_node_metrics)
-            method_desc = "average_across_nodes"
-        elif aggregation_method == "max":
-            # Max for memory usage
-            aggregated_value = max(m["performance"] for m in per_node_metrics)
-            method_desc = "max_across_nodes"
-        else:
-            # Unknown - conservative sum
-            aggregated_value = sum(m["performance"] for m in per_node_metrics)
-            method_desc = "sum_across_nodes (default)"
-        
-        # Compute statistics for validation
-        perfs = [m["performance"] for m in per_node_metrics]
-        if len(perfs) > 1:
-            statistics_dict = {
-                "mean": statistics.mean(perfs),
-                "std_dev": statistics.stdev(perfs),
-                "min": min(perfs),
-                "max": max(perfs),
-                "coefficient_variation": statistics.stdev(perfs) / statistics.mean(perfs) if statistics.mean(perfs) > 0 else 0
-            }
-        else:
-            statistics_dict = {
-                "mean": perfs[0],
-                "std_dev": 0,
-                "min": perfs[0],
-                "max": perfs[0],
-                "coefficient_variation": 0
-            }
-        
-        # Get GPU architecture from any successful node
-        gpu_arch = "N/A"
-        for m in per_node_metrics:
-            if m.get("gpu_architecture") and m["gpu_architecture"] != "N/A":
-                gpu_arch = m["gpu_architecture"]
-                break
-        
-        # Get duration (use max across nodes - slowest determines job time)
-        durations = [m.get("duration", "N/A") for m in per_node_metrics if m.get("duration") != "N/A"]
-        if durations:
-            # Extract numeric value and find max
-            duration_values = []
-            for d in durations:
-                if isinstance(d, str) and d.endswith("s"):
-                    try:
-                        duration_values.append(float(d[:-1]))
-                    except ValueError:
-                        pass
-            duration = f"{max(duration_values):.2f}s" if duration_values else "N/A"
-        else:
-            duration = "N/A"
-        
-        # Get total GPUs
-        total_gpus = sum(m.get("local_gpus", 1) for m in per_node_metrics)
-        gpus_per_node = per_node_metrics[0].get("local_gpus", 1) if per_node_metrics else 1
-        
-        # Build aggregated record (matches perf.csv schema)
-        aggregated_record = {
-            "model": first_metric["model"],
-            "performance": aggregated_value,
-            "metric": metric_name,
-            "status": "SUCCESS",
-            "topology": f"{nnodes}N×{gpus_per_node}G",
-            "nnodes": nnodes,
-            "launcher": launcher_type or "N/A",
-            "deployment_type": "kubernetes",
-            "gpu_architecture": gpu_arch,
-            "test_duration": duration,  # FIXED: Must match CSV header name
-            "data_name": first_metric.get("data_name", "N/A"),
-            "data_provider": first_metric.get("data_provider", "N/A"),
-            
-            # NEW: Aggregation metadata (for results_summary.json)
-            "aggregation_method": method_desc,
-            "nodes_contributing": len(per_node_metrics),
-            "per_node_mean": statistics_dict["mean"],
-            "per_node_std_dev": statistics_dict["std_dev"],
-            "per_node_cv": statistics_dict["coefficient_variation"]
-        }
-        
-        return aggregated_record
-    
-    def _write_to_perf_csv(self, perf_data: Dict):
-        """
-        Write performance data to local perf.csv file.
-
-        Uses the same format as local execution for consistency.
-        Matches the schema from container_runner.py's create_run_details_dict().
-
-        When appending to an existing file, uses the file's existing header so that
-        column order matches (e.g. MAD-private uses a different schema with model/performance
-        in different positions). Values are mapped by column name so model, performance,
-        metric, status etc. always land in the correct columns.
-        """
-        import csv
-        from pathlib import Path
-
-        perf_csv_path = Path("perf.csv")
-
-        # CSV headers for NEW files (madengine standard order)
-        default_headers = [
-            "model",
-            "n_gpus",
-            "nnodes",
-            "gpus_per_node",
-            "training_precision",
-            "pipeline",
-            "args",
-            "tags",
-            "docker_file",
-            "base_docker",
-            "docker_sha",
-            "docker_image",
-            "git_commit",
-            "machine_name",
-            "deployment_type",
-            "launcher",
-            "gpu_architecture",
-            "performance",
-            "metric",
-            "relative_change",
-            "status",
-            "build_duration",
-            "test_duration",
-            "dataname",
-            "data_provider_type",
-            "data_size",
-            "data_download_duration",
-            "build_number",
-            "additional_docker_run_options",
-        ]
-
-        file_exists = perf_csv_path.exists()
-        existing_header = None
-        if file_exists:
-            # Read existing header so we write in the same column order (fixes MAD-private
-            # and other repos that use a different perf.csv schema with more/different columns)
-            with open(perf_csv_path, "r", newline="", encoding="utf-8", errors="replace") as rf:
-                reader = csv.reader(rf)
-                existing_header = next(reader, None)
-        headers = existing_header if existing_header else default_headers
-        if file_exists and existing_header:
-            # Build row by name so model/performance/metric/status go to correct columns
-            row_by_name = {k: perf_data.get(k, "") for k in headers}
-            row_to_write = [str(row_by_name.get(h, "")) for h in headers]
-        else:
-            row_to_write = perf_data
-
-        with open(perf_csv_path, "a", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=headers, extrasaction="ignore")
-            if not file_exists:
-                writer.writeheader()
-            if file_exists and existing_header:
-                csv.writer(f).writerow(row_to_write)
-            else:
-                writer.writerow(row_to_write)
-
     def cleanup(self, deployment_id: str) -> bool:
         """Delete Job, ConfigMap, Service and associated pods."""
         success = True
diff --git a/src/madengine/deployment/presets/slurm/defaults.json b/src/madengine/deployment/presets/slurm/defaults.json
index cf4ba978..aa98b06f 100644
--- a/src/madengine/deployment/presets/slurm/defaults.json
+++ b/src/madengine/deployment/presets/slurm/defaults.json
@@ -13,7 +13,7 @@
     "nodes": 1,
     "gpus_per_node": 8,
     "time": "24:00:00",
-    "output_dir": "./slurm_output",
+    "output_dir": "./slurm_results",
     "exclusive": true,
     "modules": []
   },
diff --git a/src/madengine/deployment/slurm.py b/src/madengine/deployment/slurm.py
index 8a4a1f7b..7849e911 100644
--- a/src/madengine/deployment/slurm.py
+++ b/src/madengine/deployment/slurm.py
@@ -14,193 +14,16 @@
 import os
 import subprocess
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
 
-from jinja2 import Environment, FileSystemLoader
-
-from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus
-from .config_loader import ConfigLoader
+from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus, create_jinja_env
+from .common import configure_multi_node_profiling, normalize_launcher
+from .config_loader import ConfigLoader, apply_deployment_config
 from .slurm_node_selector import SlurmNodeSelector
 from madengine.utils.gpu_config import resolve_runtime_gpus
-from typing import Optional
-
-
-# Valid distributed launchers
-VALID_LAUNCHERS = [
-    "torchrun",
-    "torchtitan",
-    "deepspeed",
-    "megatron-lm",
-    "vllm",
-    "sglang",
-    "sglang-disagg"
-]
-
-
-def normalize_launcher(launcher_type: Optional[str], deployment_type: str) -> str:
-    """
-    Normalize launcher field based on deployment type and launcher value.
-    
-    Logic:
-    - If launcher is in VALID_LAUNCHERS: keep as-is
-    - If launcher is None/empty/invalid:
-        * local → "docker" (runs in Docker container)
-        * slurm → "docker" (typically uses containers on compute nodes)
-        * kubernetes → "native" (pod itself is the container)
-    
-    Args:
-        launcher_type: Raw launcher type from config (may be None)
-        deployment_type: "local", "slurm", or "kubernetes"
-        
-    Returns:
-        Normalized launcher string
-    """
-    # If launcher is valid, keep it
-    if launcher_type and launcher_type in VALID_LAUNCHERS:
-        return launcher_type
-    
-    # Otherwise, default based on deployment type
-    if deployment_type == "local":
-        return "docker"
-    elif deployment_type == "slurm":
-        return "docker"
-    elif deployment_type == "kubernetes":
-        return "native"
-    else:
-        # Fallback for unknown deployment types
-        return "docker"
-
-
-def is_rocprofv3_available() -> bool:
-    """
-    Check if rocprofv3 is available on the system.
-    
-    rocprofv3 is required for multi-node profiling with MPI support.
-    It's part of rocprofiler-sdk package in ROCm >= 6.4.1.
-    
-    Returns:
-        True if rocprofv3 is available and executable, False otherwise
-    """
-    try:
-        # Note: rocprofv3 doesn't support --version, use --help instead
-        result = subprocess.run(
-            ["rocprofv3", "--help"],
-            capture_output=True,
-            text=True,
-            timeout=5
-        )
-        return result.returncode == 0
-    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
-        return False
-
-
-def configure_multi_node_profiling(
-    nnodes: int,
-    tools_config: list,
-    logger
-) -> Dict[str, Any]:
-    """
-    Configure profiling for multi-node SLURM runs with rocprofv3 support.
-    
-    Industry best practice for multi-node profiling:
-    - Profile ALL nodes to detect stragglers, load imbalances, and communication bottlenecks
-    - Use rocprofv3 (MPI-aware) for distributed profiling
-    - Collect per-node outputs for detailed analysis
-    
-    Logic:
-    1. Single node (nnodes == 1): Use existing tool behavior
-    2. Multi-node (nnodes > 1):
-       a. Check if rocprofv3 is available
-       b. If available: Enable per-node profiling, upgrade "rocprof" to "rocprofv3"
-       c. If not available: Log warning and skip profiling
-    
-    Args:
-        nnodes: Number of nodes in the SLURM deployment
-        tools_config: List of tool configurations from user
-        logger: Logger instance for messages
-        
-    Returns:
-        Dictionary with profiling configuration:
-        - enabled: bool - Whether profiling is enabled
-        - mode: str - "single_node", "multi_node", or "multi_node_unsupported"
-        - tools: list - Processed tool configurations
-        - per_node_collection: bool - Whether to collect from all nodes
-    """
-    if nnodes == 1:
-        # Single node - existing behavior works fine
-        return {
-            "enabled": True,
-            "mode": "single_node",
-            "tools": tools_config,
-            "per_node_collection": False
-        }
-    
-    # Multi-node case - check rocprofv3 availability
-    if not is_rocprofv3_available():
-        logger.warning(
-            "╔════════════════════════════════════════════════════════════════════════════╗\n"
-            "║ Multi-Node Profiling Requirements Not Met                                 ║\n"
-            "╠════════════════════════════════════════════════════════════════════════════╣\n"
-            "║ Multi-node profiling requires rocprofv3 (MPI-aware profiling support).    ║\n"
-            "║                                                                            ║\n"
-            "║ Current Status: rocprofv3 NOT FOUND on system                             ║\n"
-            "║                                                                            ║\n"
-            "║ Profiling will be SKIPPED for this multi-node run.                        ║\n"
-            "║                                                                            ║\n"
-            "║ To enable multi-node profiling:                                           ║\n"
-            "║   • Install rocprofiler-sdk package (ROCm >= 6.4.1)                       ║\n"
-            "║   • Command: apt install rocprofiler-sdk                                  ║\n"
-            "║   • Or upgrade to ROCm 6.4.1 or later                                     ║\n"
-            "║                                                                            ║\n"
-            "║ Note: Single-node profiling uses rocprof (no rocprofv3 required)          ║\n"
-            "╚════════════════════════════════════════════════════════════════════════════╝"
-        )
-        return {
-            "enabled": False,
-            "mode": "multi_node_unsupported",
-            "tools": [],
-            "per_node_collection": False
-        }
-    
-    # rocprofv3 is available - enable full multi-node profiling
-    logger.info(f"✓ Multi-node profiling enabled for {nnodes} nodes (rocprofv3 detected)")
-    
-    # Upgrade "rocprof" tools to "rocprofv3" for multi-node compatibility
-    upgraded_tools = []
-    rocprof_upgraded = False
-    
-    for tool in tools_config:
-        tool_name = tool.get("name") if isinstance(tool, dict) else None
-        
-        if tool_name == "rocprof":
-            # Upgrade to rocprofv3 for multi-node MPI support
-            logger.info(
-                f"  → Upgrading 'rocprof' to 'rocprofv3' for multi-node MPI compatibility"
-            )
-            upgraded_tool = tool.copy() if isinstance(tool, dict) else {"name": "rocprofv3"}
-            upgraded_tool["name"] = "rocprofv3"
-            upgraded_tools.append(upgraded_tool)
-            rocprof_upgraded = True
-        else:
-            upgraded_tools.append(tool)
-    
-    # Log profiling tools being used
-    if upgraded_tools:
-        tool_names = [t.get("name") if isinstance(t, dict) else str(t) for t in upgraded_tools]
-        logger.info(f"  → Multi-node profiling tools: {', '.join(filter(None, tool_names))}")
-        
-        # Highlight RCCL trace if present (critical for multi-node communication)
-        if "rccl_trace" in tool_names:
-            logger.info("  → ✓ rccl_trace enabled (critical for multi-node communication profiling)")
-    
-    return {
-        "enabled": True,
-        "mode": "multi_node",
-        "tools": upgraded_tools,
-        "per_node_collection": True,
-        "profiler": "rocprofv3",
-        "wrapper_mode": "launcher"
-    }
+from madengine.utils.run_details import get_build_number, get_pipeline
+from madengine.utils.path_utils import scripts_base_dir_from
+import json
 
 
 class SlurmDeployment(BaseDeployment):
@@ -232,11 +55,7 @@ def __init__(self, config: DeploymentConfig):
         Args:
             config: Deployment configuration
         """
-        # Apply intelligent defaults using ConfigLoader
-        # This merges built-in presets with user configuration
-        full_config = ConfigLoader.load_slurm_config(config.additional_context)
-        config.additional_context = full_config
-
+        apply_deployment_config(config, ConfigLoader.load_slurm_config)
         super().__init__(config)
 
         # Parse SLURM configuration (now with defaults applied)
@@ -248,15 +67,11 @@ def __init__(self, config: DeploymentConfig):
         self.nodes = self.slurm_config.get("nodes", 1)
         self.gpus_per_node = self.slurm_config.get("gpus_per_node", 8)
         self.time_limit = self.slurm_config.get("time", "24:00:00")
-        self.output_dir = Path(self.slurm_config.get("output_dir", "./slurm_output"))
+        self.output_dir = Path(self.slurm_config.get("output_dir", "./slurm_results"))
 
         # Setup Jinja2 template engine
         template_dir = Path(__file__).parent / "templates" / "slurm"
-        self.jinja_env = Environment(loader=FileSystemLoader(str(template_dir)))
-        
-        # Register custom Jinja2 filters
-        self.jinja_env.filters['dirname'] = lambda path: str(Path(path).parent)
-        self.jinja_env.filters['basename'] = lambda path: str(Path(path).name)
+        self.jinja_env = create_jinja_env(template_dir)
 
         # Generated script path
         self.script_path = None
@@ -487,6 +302,7 @@ def debug(self, msg):
             "timeout": self.config.timeout,
             "live_output": self.config.additional_context.get("live_output", False),
             "tags": " ".join(model_info.get("tags", [])),
+            "multiple_results": model_info.get("multiple_results"),
             "credential_file": "credential.json"
             if Path("credential.json").exists()
             else None,
@@ -852,42 +668,69 @@ def deploy(self) -> DeploymentResult:
             )
 
         # ==================== PREFLIGHT NODE SELECTION ====================
-        # For multi-node jobs with Ray/vLLM, check for clean nodes first
-        # to avoid OOM errors from stale processes
+        # For single- and multi-node jobs, check for clean nodes and exclude bad ones.
+        # Single-node: we still run the check so bad nodes (e.g. Docker broken) get excluded;
+        # we never gate submission for nodes==1 so behavior stays backward compatible.
+        # Health-check srun invocations create SLURM jobs; we cancel them after preflight.
         enable_preflight = self.slurm_config.get("enable_node_check", True)
         auto_cleanup = self.slurm_config.get("auto_cleanup_nodes", False)
-        
-        if enable_preflight and self.nodes > 1 and not self.slurm_config.get("nodelist"):
+        allow_submit_without_clean = self.slurm_config.get("allow_submit_without_clean_nodes", False)
+        clean_nodes: List[str] = []
+        health_check_job_name: Optional[str] = None
+
+        if enable_preflight and self.nodes >= 1 and not self.slurm_config.get("nodelist"):
             try:
                 selector = SlurmNodeSelector(
                     console=self.console,
                     auto_cleanup=auto_cleanup,
                     verbose=self.slurm_config.get("verbose_node_check", False),
                 )
-                
-                # Select clean nodes and get updated exclude list
                 clean_nodes, updated_exclude = selector.select_nodes(
                     partition=self.partition,
                     nodes_needed=self.nodes,
                     exclude=self.slurm_config.get("exclude"),
                     constraint=self.slurm_config.get("constraint"),
                 )
-                
-                # Update exclude list if dirty nodes found
+                health_check_job_name = getattr(selector, "_health_check_job_name", None)
+
+                # Update exclude list if we found dirty/unreachable/unknown nodes
                 if updated_exclude and updated_exclude != self.slurm_config.get("exclude", ""):
                     self.console.print(
                         f"[dim]Updated exclude list for sbatch: {updated_exclude}[/dim]\n"
                     )
-                    # Re-generate script with updated exclude list
                     self.slurm_config["exclude"] = updated_exclude
-                    self.prepare()  # Re-generate sbatch script
-                    
+                    self.prepare()
+
+                # Gate: do not submit if not enough clean nodes (multi-node only; single-node always allowed)
+                if (
+                    self.nodes > 1
+                    and not allow_submit_without_clean
+                    and len(clean_nodes) < self.nodes
+                ):
+                    SlurmNodeSelector.cancel_health_check_jobs(health_check_job_name, self.console)
+                    return DeploymentResult(
+                        status=DeploymentStatus.FAILED,
+                        deployment_id="",
+                        message=(
+                            f"Not enough clean nodes: need {self.nodes}, found {len(clean_nodes)}. "
+                            "Set slurm.allow_submit_without_clean_nodes=true to submit anyway."
+                        ),
+                    )
+
+                # When we have enough clean nodes, pin the job to them via nodelist
+                if len(clean_nodes) >= self.nodes:
+                    nodelist_str = ",".join(clean_nodes[: self.nodes])
+                    self.slurm_config["nodelist"] = nodelist_str
+                    self.console.print(f"[dim]Using nodelist: {nodelist_str}[/dim]\n")
+                    self.prepare()
             except Exception as e:
-                # Don't fail deployment if preflight fails
                 self.console.print(
                     f"[yellow]⚠ Node health check failed: {e}[/yellow]"
                 )
                 self.console.print("[dim]Continuing with job submission[/dim]\n")
+            finally:
+                # Always cancel health-check jobs so they do not stay in the queue
+                SlurmNodeSelector.cancel_health_check_jobs(health_check_job_name, self.console)
         # ==================== END PREFLIGHT ====================
 
         try:
@@ -1004,7 +847,7 @@ def _stream_job_output(self, job_id: str, final: bool = False):
             self._output_positions = {}
         
         # Find output file
-        output_dir = self.slurm_config.get("output_dir", "./slurm_output")
+        output_dir = str(self.output_dir)
         output_pattern = f"{output_dir}/madengine-*_{job_id}_*.out"
         
         try:
@@ -1045,7 +888,7 @@ def _stream_job_output(self, job_id: str, final: bool = False):
 
     def _show_log_summary(self, job_id: str, success: bool = True):
         """Show a summary with pointers to log files instead of streaming verbose output."""
-        output_dir = self.slurm_config.get("output_dir", "./slurm_output")
+        output_dir = str(self.output_dir)
         
         try:
             import glob
@@ -1143,23 +986,128 @@ def _check_job_completion(self, job_id: str) -> DeploymentResult:
                 message=f"Job {job_id} completed (status unavailable)",
             )
 
+    def _build_perf_entry_from_aggregated(
+        self,
+        aggregated_record: Dict[str, Any],
+        model_info: Dict[str, Any],
+        build_info: Dict[str, Any],
+        deployment_id: str,
+    ) -> Dict[str, Any]:
+        """
+        Build a full run-details dict (same shape as container_runner create_run_details_dict)
+        from an aggregated multi-node record or single-node parsed record, for use with
+        update_perf_csv and update_perf_super_*.
+        """
+        from madengine.reporting.update_perf_csv import flatten_tags
+        from madengine.utils.config_parser import ConfigParser
+
+        launcher_type = self.distributed_config.get("launcher", "torchrun")
+        launcher = normalize_launcher(launcher_type, "slurm")
+
+        run_details = {
+            "model": model_info.get("name", aggregated_record.get("model", "")),
+            "n_gpus": str(aggregated_record.get("n_gpus", self.nodes * self.gpus_per_node)),
+            "nnodes": str(aggregated_record.get("nnodes", self.nodes)),
+            "gpus_per_node": str(aggregated_record.get("gpus_per_node", self.gpus_per_node)),
+            "training_precision": model_info.get("training_precision", ""),
+            "pipeline": get_pipeline(),
+            "args": model_info.get("args", ""),
+            "tags": model_info.get("tags", ""),
+            "docker_file": build_info.get("dockerfile", ""),
+            "base_docker": build_info.get("base_docker", ""),
+            "docker_sha": build_info.get("docker_sha", ""),
+            "docker_image": build_info.get("docker_image", ""),
+            "git_commit": "",
+            "machine_name": deployment_id,
+            "deployment_type": self.DEPLOYMENT_TYPE,
+            "launcher": launcher,
+            "gpu_architecture": aggregated_record.get("gpu_architecture", ""),
+            "performance": str(aggregated_record.get("performance", "")),
+            "metric": aggregated_record.get("metric", ""),
+            "relative_change": "",
+            "status": aggregated_record.get("status", "SUCCESS"),
+            "build_duration": build_info.get("build_duration", ""),
+            "test_duration": aggregated_record.get("test_duration", ""),
+            "dataname": aggregated_record.get("data_name", ""),
+            "data_provider_type": aggregated_record.get("data_provider", ""),
+            "data_size": "",
+            "data_download_duration": "",
+            "build_number": get_build_number(),
+            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+        }
+        flatten_tags(run_details)
+
+        # Configs for perf_super (optional)
+        try:
+            scripts_path = model_info.get("scripts", "")
+            scripts_base_dir = scripts_base_dir_from(scripts_path)
+            config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
+            run_details["configs"] = config_parser.parse_and_load(
+                model_info.get("args", ""), scripts_path
+            )
+        except Exception:
+            run_details["configs"] = None
+
+        return run_details
+
+    def _build_common_info_dict(
+        self,
+        model_info: Dict[str, Any],
+        build_info: Dict[str, Any],
+        deployment_id: str,
+        gpu_architecture: str = "",
+    ) -> Dict[str, Any]:
+        """Build common_info dict for update_perf_csv/update_perf_super (multiple_results path)."""
+        from madengine.reporting.update_perf_csv import flatten_tags
+
+        launcher_type = self.distributed_config.get("launcher", "torchrun")
+        launcher = normalize_launcher(launcher_type, "slurm")
+        total_gpus = self.nodes * self.gpus_per_node
+        result = {
+            "n_gpus": str(total_gpus),
+            "nnodes": str(self.nodes),
+            "gpus_per_node": str(self.gpus_per_node),
+            "training_precision": model_info.get("training_precision", ""),
+            "pipeline": get_pipeline(),
+            "args": model_info.get("args", ""),
+            "tags": model_info.get("tags", ""),
+            "docker_file": build_info.get("dockerfile", ""),
+            "base_docker": build_info.get("base_docker", ""),
+            "docker_sha": build_info.get("docker_sha", ""),
+            "docker_image": build_info.get("docker_image", ""),
+            "git_commit": "",
+            "machine_name": deployment_id,
+            "deployment_type": self.DEPLOYMENT_TYPE,
+            "launcher": launcher,
+            "gpu_architecture": gpu_architecture,
+            "relative_change": "",
+            "build_duration": build_info.get("build_duration", ""),
+            "test_duration": "",
+            "dataname": model_info.get("data", ""),
+            "data_provider_type": "",
+            "data_size": "",
+            "data_download_duration": "",
+            "build_number": get_build_number(),
+            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+        }
+        flatten_tags(result)
+        return result
+
     def collect_results(self, deployment_id: str) -> Dict[str, Any]:
         """Collect performance results from SLURM output files.
-        
-        NOTE: Current implementation works with single-node jobs where perf.csv
-        is written to shared storage. For multi-node jobs with per-node metrics,
-        this would need enhancement to:
-        1. Read all node output files (madengine-*_jobid_noderank.out)
-        2. Parse per-node metrics from each file
-        3. Aggregate using _aggregate_node_metrics() (similar to kubernetes.py)
-        4. Write aggregated result to perf.csv
-        
-        Args:
-            deployment_id: SLURM job ID
+
+        Option (1): slurm_results holds only collected inputs; login node builds one run
+        record and runs the same reporting pipeline as local (perf_entry -> update_perf_csv
+        / update_perf_super_*) so project root has a single cumulative perf for both local
+        and distributed runs.
         """
-        # Get session_start_row from config (passed from orchestrator)
+        from madengine.reporting.update_perf_csv import update_perf_csv
+        from madengine.reporting.update_perf_super import (
+            update_perf_super_csv,
+            update_perf_super_json,
+        )
+
         session_start_row = self.config.additional_context.get("session_start_row")
-        
         results = {
             "job_id": deployment_id,
             "nodes": self.nodes,
@@ -1168,86 +1116,368 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
             "logs": [],
             "successful_runs": [],
             "failed_runs": [],
-            "session_start_row": session_start_row,  # Track for downstream filtering
+            "session_start_row": session_start_row,
         }
 
-        try:
-            # Find output files
-            output_pattern = f"madengine-*_{deployment_id}_*.out"
-            output_files = list(self.output_dir.glob(output_pattern))
-
-            results["logs"] = [str(f) for f in output_files]
-
-            # Find performance CSV files
-            # Strategy 1: Check results_dir if configured
-            if self.slurm_config.get("results_dir"):
-                results_dir = Path(self.slurm_config["results_dir"])
-                perf_pattern = f"perf_{deployment_id}_*.csv"
-                perf_files = list(results_dir.glob(perf_pattern))
-                results["perf_files"] = [str(f) for f in perf_files]
-            
-            # Strategy 2: Check shared workspace (NFS) for perf.csv
-            # When using shared storage, perf.csv is written directly to workspace
-            if not results["perf_files"]:
+        model_keys = list(self.manifest.get("built_models") or {})
+        model_key = model_keys[0] if model_keys else None
+        # Use logical model name for job_dir so it matches the task script (which uses model_info["name"]).
+        # built_models is keyed by image name; value has "name" = logical model name.
+        built_models_dict = self.manifest.get("built_models") or {}
+        model_info_for_path = built_models_dict.get(model_key, {}) if model_key else {}
+        model_name_for_path = model_info_for_path.get("name", model_key or "unknown")
+        model_name = model_key or "unknown"  # image key for build_info / model_info_for_entry lookups
+
+        build_info = {}
+        built_images = self.manifest.get("built_images") or {}
+        if built_images:
+            # First image or one keyed by model key (image name)
+            if model_name in built_images:
+                build_info = built_images[model_name]
+            else:
+                build_info = next(iter(built_images.values()), {})
+
+        job_dir = self.output_dir / model_name_for_path / deployment_id
+        job_dir.mkdir(parents=True, exist_ok=True)
+
+        # Gather log content per node: from job_dir/node_N/ (new) or flat output_dir .out files
+        per_node_log_contents: List[tuple] = []
+        flat_out_files = sorted(self.output_dir.glob(f"madengine-*_{deployment_id}_*.out"))
+        # Multi-node: only use explicit node logs (_node_N.out) to avoid also picking up
+        # SBATCH %t output (madengine-*_<jobid>_0.out, _1.out), which would duplicate metrics.
+        if self.nodes > 1:
+            flat_out_files = [f for f in flat_out_files if "_node_" in f.name]
+        results["logs"] = [str(f) for f in flat_out_files]
+
+        for i, out_path in enumerate(flat_out_files):
+            content = out_path.read_text(encoding="utf-8", errors="ignore")
+            per_node_log_contents.append((i, content))
+
+        # If we have node subdirs (multi-node job script wrote them), prefer stdout.out there
+        for node_dir in sorted(job_dir.glob("node_*")):
+            stdout_path = node_dir / "stdout.out"
+            if stdout_path.exists():
+                try:
+                    idx = int(node_dir.name.replace("node_", ""))
+                    if idx >= self.nodes:
+                        continue  # ignore stale node_N dirs from previous runs
+                    content = stdout_path.read_text(encoding="utf-8", errors="ignore")
+                    # Replace or append for this index
+                    per_node_log_contents = [
+                        (n, c) for n, c in per_node_log_contents if n != idx
+                    ]
+                    per_node_log_contents.append((idx, content))
+                    per_node_log_contents.sort(key=lambda x: x[0])
+                except (ValueError, OSError):
+                    pass
+
+        # Multi-node: keep only log entries for actual node indices [0, nodes-1]
+        if self.nodes > 1:
+            per_node_log_contents = [(n, c) for n, c in per_node_log_contents if n < self.nodes]
+
+        # Copy flat logs into job_dir/node_<task>/ for consistency if not already there.
+        # Only create dirs for indices in [0, nodes-1] so we never create extra node_2, etc.
+        for idx, content in per_node_log_contents:
+            if idx >= self.nodes:
+                continue
+            node_subdir = job_dir / f"node_{idx}"
+            node_subdir.mkdir(parents=True, exist_ok=True)
+            if not (node_subdir / "stdout.out").exists():
+                (node_subdir / "stdout.out").write_text(content)
+
+        # Parse performance from each node's log
+        all_parsed: List[Dict[str, Any]] = []
+        for idx, content in sorted(per_node_log_contents, key=lambda x: x[0]):
+            perf_data = self._parse_performance_from_log(content, model_name)
+            if perf_data:
+                all_parsed.append(perf_data)
+
+        # Deduplicate by node_id so each node is only counted once (avoids double-counting
+        # when multiple log sources exist for the same node, e.g. SBATCH vs node_*.out).
+        per_node_metrics: List[Dict[str, Any]] = []
+        if self.nodes > 1 and all_parsed:
+            seen_node_ids: set = set()
+            for m in all_parsed:
+                nid = m.get("node_id")
+                if nid is not None and nid in seen_node_ids:
+                    continue
+                if nid is not None:
+                    seen_node_ids.add(nid)
+                per_node_metrics.append(m)
+                self.console.print(
+                    f"[dim]  Parsed node: {m.get('performance')} "
+                    f"{m.get('metric', '')} (node_id={m.get('node_id')})[/dim]"
+                )
+        else:
+            per_node_metrics = all_parsed
+            for m in per_node_metrics:
+                self.console.print(
+                    f"[dim]  Parsed node: {m.get('performance')} "
+                    f"{m.get('metric', '')} (node_id={m.get('node_id')})[/dim]"
+                )
+
+        run_details_dict: Optional[Dict[str, Any]] = None
+        model_info_for_entry = (self.manifest.get("built_models") or {}).get(
+            model_key, {}
+        ) if model_key else {}
+
+        # Multiple results path: resolve CSV from job_dir/node_*, then cwd/run_directory
+        mult_res = model_info_for_entry.get("multiple_results")
+        if mult_res:
+            resolved_csv: Optional[Path] = None
+            if (job_dir / mult_res).is_file():
+                resolved_csv = job_dir / mult_res
+            else:
+                for i in range(self.nodes):
+                    candidate = job_dir / f"node_{i}" / mult_res
+                    if candidate.is_file():
+                        resolved_csv = candidate
+                        break
+            if not resolved_csv and Path(mult_res).is_file():
+                resolved_csv = Path(mult_res)
+            if not resolved_csv and Path("run_directory", mult_res).is_file():
+                resolved_csv = Path("run_directory", mult_res)
+            if resolved_csv:
+                self._ensure_perf_csv_exists()
+                gpu_arch = ""
+                if per_node_metrics:
+                    gpu_arch = per_node_metrics[0].get("gpu_architecture", "") or ""
+                common_info = self._build_common_info_dict(
+                    model_info_for_entry, build_info, deployment_id, gpu_arch
+                )
+                common_info_path = Path("common_info.json")
+                with open(common_info_path, "w", encoding="utf-8") as f:
+                    json.dump(common_info, f, indent=2)
+                update_perf_csv(
+                    perf_csv="perf.csv",
+                    multiple_results=str(resolved_csv),
+                    common_info=str(common_info_path),
+                    model_name=model_info_for_entry.get("name", model_name),
+                )
+                scripts_path = model_info_for_entry.get("scripts", "")
+                scripts_base_dir = scripts_base_dir_from(scripts_path)
+                num_entries = update_perf_super_json(
+                    perf_super_json="perf_super.json",
+                    multiple_results=str(resolved_csv),
+                    common_info=str(common_info_path),
+                    model_name=model_info_for_entry.get("name", model_name),
+                    scripts_base_dir=scripts_base_dir,
+                )
+                update_perf_super_csv(
+                    perf_super_json="perf_super.json",
+                    perf_super_csv="perf_super.csv",
+                    num_entries=num_entries,
+                )
+                results["perf_files"] = [str(Path("perf.csv").resolve())]
+                import csv as _csv
+                try:
+                    with open(resolved_csv, "r", encoding="utf-8", errors="ignore") as f:
+                        reader = _csv.DictReader(f)
+                        for row in reader:
+                            row = {k.strip(): v for k, v in row.items() if k}
+                            if row.get("performance") and row.get("metric"):
+                                results["successful_runs"].append({
+                                    "model": model_info_for_entry.get("name", "") + "_" + row.get("model", ""),
+                                    "status": "SUCCESS",
+                                    "performance": str(row.get("performance", "")),
+                                    "metric": row.get("metric", ""),
+                                    "duration": row.get("test_duration", ""),
+                                    "gpu_arch": gpu_arch,
+                                    "deployment": "slurm",
+                                    "machine": deployment_id,
+                                })
+                except Exception:
+                    pass
+                self.console.print(
+                    f"[green]✓ Updated perf.csv, perf_super.* from multiple_results (Docker-compatible)[/green]"
+                )
+                return results
+            # multiple_results set but CSV not found: fall through to single-result path (may write FAILURE)
+
+        if self.nodes > 1 and per_node_metrics:
+            launcher_type = self.distributed_config.get("launcher", "torchrun")
+            aggregated = self._aggregate_node_metrics(
+                per_node_metrics, self.nodes, launcher_type
+            )
+            if aggregated and model_info_for_entry:
+                run_details_dict = self._build_perf_entry_from_aggregated(
+                    aggregated, model_info_for_entry, build_info, deployment_id
+                )
+                self.console.print(
+                    f"[green]✓ Aggregated from {len(per_node_metrics)} nodes "
+                    f"({aggregated.get('aggregation_method', '')}): "
+                    f"{aggregated.get('performance')} {aggregated.get('metric', '')}[/green]"
+                )
+        elif self.nodes == 1 and per_node_metrics and model_info_for_entry:
+            single = per_node_metrics[0]
+            single_record = {
+                "model": single.get("model", model_name),
+                "n_gpus": self.gpus_per_node,
+                "nnodes": 1,
+                "gpus_per_node": self.gpus_per_node,
+                "performance": single.get("performance"),
+                "metric": single.get("metric", ""),
+                "status": "SUCCESS",
+                "test_duration": single.get("test_duration", ""),
+                "gpu_architecture": single.get("gpu_architecture", ""),
+                "data_name": single.get("data_name", ""),
+                "data_provider": single.get("data_provider", ""),
+            }
+            run_details_dict = self._build_perf_entry_from_aggregated(
+                single_record, model_info_for_entry, build_info, deployment_id
+            )
+        elif self.nodes == 1:
+            # Single-node but no parsed metric (log parse failed or run failed before metrics).
+            # In-job run skips perf write (skip_perf_collection); write a FAILURE row so run appears in perf.
+            if model_info_for_entry:
+                single_record = {
+                    "model": model_info_for_entry.get("name", model_name),
+                    "n_gpus": self.gpus_per_node,
+                    "nnodes": 1,
+                    "gpus_per_node": self.gpus_per_node,
+                    "performance": "",
+                    "metric": "",
+                    "status": "FAILURE",
+                    "test_duration": "",
+                    "gpu_architecture": "",
+                    "data_name": "",
+                    "data_provider": "",
+                }
+                run_details_dict = self._build_perf_entry_from_aggregated(
+                    single_record, model_info_for_entry, build_info, deployment_id
+                )
+            else:
                 workspace_perf = Path("perf.csv")
                 if workspace_perf.exists():
                     results["perf_files"] = [str(workspace_perf)]
-                    self.console.print("[dim]Note: Using perf.csv from shared workspace[/dim]")
-            
-            # Parse perf.csv to populate successful_runs and failed_runs
-            # Filter based on session_start_row passed as parameter (no external files!)
-            if results["perf_files"]:
-                perf_file = Path(results["perf_files"][0])
-                try:
-                    import csv
-                    
-                    with open(perf_file, 'r') as f:
-                        reader = csv.DictReader(f)
-                        rows = list(reader)
-                        
-                        # Filter to only include rows from current session if session_start_row provided
-                        if session_start_row is not None and session_start_row < len(rows):
-                            rows = rows[session_start_row:]
-                            self.console.print(f"[cyan]📊 Filtered to current session: {len(rows)} runs (from row {session_start_row} of {len(rows) + session_start_row} total)[/cyan]")
-                        elif session_start_row is not None:
-                            # Session start equals or exceeds current rows - no new runs yet
-                            self.console.print(f"[yellow]⚠️  No new runs in this session (session started at row {session_start_row}, CSV has {len(rows)} rows)[/yellow]")
-                            rows = []
-                        else:
-                            # No session info provided - show all rows (for backward compatibility)
-                            self.console.print(f"[dim]Showing all {len(rows)} runs from perf.csv (no session filtering)[/dim]")
-                        
-                        for row in rows:
-                            run_data = {
-                                "model": row.get("model", ""),
-                                "status": row.get("status", ""),
-                                "performance": row.get("performance", ""),
-                                "metric": row.get("metric", ""),
-                                "duration": row.get("test_duration", ""),
-                                "gpu_arch": row.get("gpu_architecture", ""),
-                                "deployment": row.get("deployment_type", ""),
-                                "machine": row.get("machine_name", ""),
-                            }
-                            
-                            if row.get("status") == "SUCCESS":
-                                results["successful_runs"].append(run_data)
-                            else:
-                                results["failed_runs"].append(run_data)
-                except Exception as parse_error:
-                    import traceback
-                    self.console.print(f"[red]ERROR parsing perf.csv: {parse_error}[/red]")
-                    self.console.print(f"[dim]{traceback.format_exc()}[/dim]")
+                self.console.print(
+                    f"[green]✓ Collected results: {len(results['perf_files'])} perf files, "
+                    f"{len(results['logs'])} log files[/green]"
+                )
+                self._collect_results_parse_perf_csv(results, session_start_row)
+                return results
+        else:
+            # Multi-node but no metrics parsed - optional failure record
+            if per_node_metrics and model_info_for_entry:
+                launcher_type = self.distributed_config.get("launcher", "torchrun")
+                aggregated = self._aggregate_node_metrics(
+                    per_node_metrics, self.nodes, launcher_type
+                )
+                if aggregated:
+                    aggregated["status"] = "FAILURE"
+                    run_details_dict = self._build_perf_entry_from_aggregated(
+                        aggregated, model_info_for_entry, build_info, deployment_id
+                    )
 
-            self.console.print(
-                f"[green]✓ Collected results: {len(results['perf_files'])} perf files, "
-                f"{len(results['logs'])} log files[/green]"
+        if run_details_dict is not None:
+            perf_entry_path = Path("perf_entry.json")
+            with open(perf_entry_path, "w") as f:
+                json.dump(run_details_dict, f, indent=2)
+            perf_csv_path = "perf.csv"
+            self._ensure_perf_csv_exists()
+            if run_details_dict.get("status") == "SUCCESS":
+                update_perf_csv(perf_csv=perf_csv_path, single_result=str(perf_entry_path))
+            else:
+                update_perf_csv(perf_csv=perf_csv_path, exception_result=str(perf_entry_path))
+            try:
+                scripts_path = model_info_for_entry.get("scripts", "")
+                scripts_base_dir = scripts_base_dir_from(scripts_path)
+                if run_details_dict.get("status") == "SUCCESS":
+                    num_entries = update_perf_super_json(
+                        single_result=str(perf_entry_path),
+                        perf_super_json="perf_super.json",
+                        scripts_base_dir=scripts_base_dir,
+                    )
+                else:
+                    num_entries = update_perf_super_json(
+                        exception_result=str(perf_entry_path),
+                        perf_super_json="perf_super.json",
+                        scripts_base_dir=scripts_base_dir,
+                    )
+                update_perf_super_csv(
+                    perf_super_json="perf_super.json",
+                    perf_super_csv="perf_super.csv",
+                    num_entries=num_entries,
+                )
+            except Exception as e:
+                self.console.print(f"[yellow]⚠ Could not update perf_super: {e}[/yellow]")
+            results["perf_files"] = [str(Path(perf_csv_path).resolve())]
+            run_data = {
+                "model": run_details_dict.get("model", ""),
+                "status": run_details_dict.get("status", ""),
+                "performance": str(run_details_dict.get("performance", "")),
+                "metric": run_details_dict.get("metric", ""),
+                "duration": run_details_dict.get("test_duration", ""),
+                "gpu_arch": run_details_dict.get("gpu_architecture", ""),
+                "deployment": run_details_dict.get("deployment_type", ""),
+                "machine": run_details_dict.get("machine_name", ""),
+            }
+            if run_details_dict.get("status") == "SUCCESS":
+                results["successful_runs"].append(run_data)
+            else:
+                results["failed_runs"].append(run_data)
+            summary = {
+                "job_id": deployment_id,
+                "model": model_name,
+                "nodes": self.nodes,
+                "per_node_metrics": per_node_metrics,
+                "final_metric": run_details_dict.get("metric", ""),
+                "final_performance": str(run_details_dict.get("performance", "")),
+                "perf_entry_path": str(perf_entry_path),
+            }
+            (job_dir / "results_summary.json").write_text(
+                json.dumps(summary, indent=2), encoding="utf-8"
             )
 
-        except Exception as e:
-            self.console.print(f"[yellow]⚠ Results collection incomplete: {e}[/yellow]")
-
+        if not results["perf_files"]:
+            workspace_perf = Path("perf.csv")
+            if workspace_perf.exists():
+                results["perf_files"] = [str(workspace_perf)]
+        # When we already appended the current run from run_details_dict, skip re-parsing
+        # the whole perf.csv so Execution Results shows only the current run.
+        if run_details_dict is None:
+            self._collect_results_parse_perf_csv(results, session_start_row)
+        self.console.print(
+            f"[green]✓ Collected results: {len(results['perf_files'])} perf files, "
+            f"{len(results['logs'])} log files[/green]"
+        )
         return results
 
+    def _collect_results_parse_perf_csv(
+        self, results: Dict[str, Any], session_start_row: Optional[int]
+    ) -> None:
+        """Parse perf.csv to populate results['successful_runs'] and results['failed_runs']."""
+        if not results.get("perf_files"):
+            return
+        import csv
+
+        perf_file = Path(results["perf_files"][0])
+        try:
+            with open(perf_file, "r") as f:
+                reader = csv.DictReader(f)
+                rows = list(reader)
+            if session_start_row is not None and session_start_row < len(rows):
+                rows = rows[session_start_row:]
+            elif session_start_row is not None and session_start_row >= len(rows):
+                rows = []
+            for row in rows:
+                run_data = {
+                    "model": row.get("model", ""),
+                    "status": row.get("status", ""),
+                    "performance": row.get("performance", ""),
+                    "metric": row.get("metric", ""),
+                    "duration": row.get("test_duration", ""),
+                    "gpu_arch": row.get("gpu_architecture", ""),
+                    "deployment": row.get("deployment_type", ""),
+                    "machine": row.get("machine_name", ""),
+                }
+                if row.get("status") == "SUCCESS":
+                    results["successful_runs"].append(run_data)
+                else:
+                    results["failed_runs"].append(run_data)
+        except Exception as e:
+            self.console.print(f"[yellow]⚠ Could not parse perf.csv: {e}[/yellow]")
+
     def cleanup(self, deployment_id: str) -> bool:
         """Cancel SLURM job if still running (locally)."""
         try:
diff --git a/src/madengine/deployment/slurm_node_selector.py b/src/madengine/deployment/slurm_node_selector.py
index b52f53d7..408e8d3c 100644
--- a/src/madengine/deployment/slurm_node_selector.py
+++ b/src/madengine/deployment/slurm_node_selector.py
@@ -10,6 +10,7 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+import os
 import subprocess
 import time
 from dataclasses import dataclass
@@ -86,6 +87,9 @@ def __init__(
         self.verbose = verbose
         self.timeout = timeout
     
+    # Max candidates to check (avoids excessive checks on large clusters)
+    MAX_CANDIDATES_CAP = 100
+
     def get_candidate_nodes(
         self,
         partition: str,
@@ -94,16 +98,16 @@ def get_candidate_nodes(
         constraint: Optional[str] = None,
     ) -> Optional[List[str]]:
         """
-        Query SLURM for candidate nodes in partition.
-        
+        Query SLURM for idle candidate nodes in partition.
+
         Args:
             partition: SLURM partition name
-            count: Number of nodes needed
+            count: Number of nodes needed (used for optional cap)
             exclude: Comma-separated nodes to exclude
             constraint: SLURM constraint filter
-            
+
         Returns:
-            List of candidate node names (2x count for redundancy)
+            List of idle node names (all idle, up to MAX_CANDIDATES_CAP)
         """
         cmd = [
             "sinfo",
@@ -111,12 +115,12 @@ def get_candidate_nodes(
             "-N",  # Node-oriented format
             "-h",  # No header
             "-o", "%N",  # Node name only
-            "-t", "idle,alloc,mix",  # Available states
+            "-t", "idle",  # Idle nodes only
         ]
-        
+
         if constraint:
             cmd.extend(["-C", constraint])
-        
+
         try:
             result = subprocess.run(
                 cmd,
@@ -124,32 +128,30 @@ def get_candidate_nodes(
                 text=True,
                 timeout=10,
             )
-            
+
             if result.returncode != 0:
                 if self.verbose:
                     self.console.print(
                         f"[yellow]⚠ sinfo failed: {result.stderr}[/yellow]"
                     )
                 return None
-            
+
             # Parse nodes
             all_nodes = set()
             for line in result.stdout.strip().split('\n'):
                 line = line.strip()
                 if line:
-                    # Handle node ranges like "node[01-04]"
                     all_nodes.add(line)
-            
+
             # Remove excluded nodes
             if exclude:
                 excluded = set(exclude.split(','))
                 all_nodes -= excluded
-            
-            # Return 2x count for redundancy (check more nodes than needed)
-            candidates = sorted(list(all_nodes))[:(count * 2)]
-            
+
+            # Return all idle nodes, capped to avoid excessive checks
+            candidates = sorted(list(all_nodes))[: self.MAX_CANDIDATES_CAP]
             return candidates
-            
+
         except subprocess.TimeoutExpired:
             self.console.print("[yellow]⚠ sinfo timed out[/yellow]")
             return None
@@ -158,16 +160,17 @@ def get_candidate_nodes(
                 self.console.print(f"[yellow]⚠ Query failed: {e}[/yellow]")
             return None
     
-    def check_node_health(self, node: str) -> NodeStatus:
+    def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeStatus:
         """
         Check GPU health on a node using srun.
-        
+
         Uses srun to execute GPU check on the node without SSH.
         Checks for stale Ray/vLLM processes and GPU memory usage.
-        
+
         Args:
             node: Node name to check
-            
+            job_name: Optional SLURM job name for this srun (enables cleanup of health-check jobs)
+
         Returns:
             NodeStatus with health information
         """
@@ -196,19 +199,21 @@ def check_node_health(self, node: str) -> NodeStatus:
 ps aux | grep -E "(ray::|RayWorkerWrapper|raylet|vllm)" | grep -v grep || echo "NO_PROCESSES"
 echo "===END_PROCESSES==="
 """
-        
+        srun_cmd = [
+            "srun",
+            f"--nodelist={node}",
+            "--ntasks=1",
+            "--time=00:01:00",
+            "--overlap",  # Allow overlap with running jobs
+            "--quiet",
+        ]
+        if job_name:
+            srun_cmd.append(f"--job-name={job_name}")
+        srun_cmd.extend(["bash", "-c", check_script])
+
         try:
-            # Use srun to execute check on specific node
             result = subprocess.run(
-                [
-                    "srun",
-                    f"--nodelist={node}",
-                    "--ntasks=1",
-                    "--time=00:01:00",
-                    "--overlap",  # Allow overlap with running jobs
-                    "--quiet",
-                    "bash", "-c", check_script
-                ],
+                srun_cmd,
                 capture_output=True,
                 text=True,
                 timeout=self.timeout,
@@ -279,13 +284,14 @@ def check_node_health(self, node: str) -> NodeStatus:
                 error_message=str(e)[:100],
             )
     
-    def cleanup_node(self, node: str) -> bool:
+    def cleanup_node(self, node: str, job_name: Optional[str] = None) -> bool:
         """
         Clean up stale processes on a node using srun.
-        
+
         Args:
             node: Node name to clean
-            
+            job_name: Optional SLURM job name for this srun (enables cleanup of health-check jobs)
+
         Returns:
             True if cleanup successful
         """
@@ -307,18 +313,21 @@ def cleanup_node(self, node: str) -> bool:
 
 echo "CLEANUP_OK"
 """
-        
+        srun_cmd = [
+            "srun",
+            f"--nodelist={node}",
+            "--ntasks=1",
+            "--time=00:01:00",
+            "--overlap",
+            "--quiet",
+        ]
+        if job_name:
+            srun_cmd.append(f"--job-name={job_name}")
+        srun_cmd.extend(["bash", "-c", cleanup_script])
+
         try:
             result = subprocess.run(
-                [
-                    "srun",
-                    f"--nodelist={node}",
-                    "--ntasks=1",
-                    "--time=00:01:00",
-                    "--overlap",
-                    "--quiet",
-                    "bash", "-c", cleanup_script
-                ],
+                srun_cmd,
                 capture_output=True,
                 text=True,
                 timeout=self.timeout,
@@ -345,16 +354,16 @@ def select_nodes(
     ) -> Tuple[List[str], str]:
         """
         Select clean nodes for SLURM job.
-        
-        This is the main entry point. Checks candidate nodes and returns
-        a list of clean nodes plus an updated exclude list.
-        
+
+        Checks idle nodes on-demand and stops as soon as enough clean nodes
+        are found. Excludes dirty, unreachable, and unknown nodes from allocation.
+
         Args:
             partition: SLURM partition name
             nodes_needed: Number of nodes required for job
             exclude: Current exclude list (comma-separated)
             constraint: SLURM constraint filter
-            
+
         Returns:
             Tuple of (clean_nodes, updated_exclude_list)
             - clean_nodes: List of clean node names (may be empty)
@@ -365,28 +374,31 @@ def select_nodes(
             f"Partition: [cyan]{partition}[/cyan] | "
             f"Nodes needed: [cyan]{nodes_needed}[/cyan]\n"
         )
-        
-        # Get candidate nodes
+
+        # Unique job name for all health-check srun invocations (enables cleanup)
+        self._health_check_job_name = f"madengine_nodecheck_{os.getpid()}_{int(time.time())}"
+
+        # Get all idle candidate nodes
         candidates = self.get_candidate_nodes(partition, nodes_needed, exclude, constraint)
-        
+
         if not candidates:
             self.console.print(
                 "[yellow]⚠ Cannot query candidate nodes, skipping preflight check[/yellow]\n"
             )
+            self._health_check_job_name = None
             return [], exclude or ""
-        
+
         if self.verbose:
-            self.console.print(f"[dim]Checking {len(candidates)} candidate nodes...[/dim]\n")
-        
-        # Check health of each candidate
-        statuses = []
+            self.console.print(f"[dim]Idle candidates: {len(candidates)} (checking on-demand until {nodes_needed} clean)[/dim]\n")
+
+        # On-demand check: stop as soon as we have enough clean nodes
+        statuses: List[NodeStatus] = []
+        clean_nodes: List[str] = []
         for node in candidates:
             if self.verbose:
                 self.console.print(f"  Checking {node}...", end="")
-            
-            status = self.check_node_health(node)
+            status = self.check_node_health(node, job_name=self._health_check_job_name)
             statuses.append(status)
-            
             if self.verbose:
                 emoji = {
                     NodeHealth.CLEAN: "✓",
@@ -395,56 +407,61 @@ def select_nodes(
                     NodeHealth.UNKNOWN: "?",
                 }[status.health]
                 self.console.print(f" {emoji}")
-        
-        # Display summary table
+            if status.health == NodeHealth.CLEAN:
+                clean_nodes.append(node)
+                if len(clean_nodes) >= nodes_needed:
+                    break
+
+        # Display summary table (only nodes we checked)
         self._display_status_table(statuses)
-        
-        # Identify dirty nodes
+
+        # Nodes to exclude: DIRTY, UNREACHABLE, and UNKNOWN
         dirty_nodes = [s for s in statuses if s.health == NodeHealth.DIRTY]
-        clean_nodes = [s.node for s in statuses if s.health == NodeHealth.CLEAN]
-        
-        # Handle dirty nodes
+        unreachable_nodes = [s for s in statuses if s.health == NodeHealth.UNREACHABLE]
+        unknown_nodes = [s for s in statuses if s.health == NodeHealth.UNKNOWN]
+        nodes_to_exclude = set()
+        nodes_to_exclude.update(s.node for s in dirty_nodes)
+        nodes_to_exclude.update(s.node for s in unreachable_nodes)
+        nodes_to_exclude.update(s.node for s in unknown_nodes)
+
+        # Handle dirty nodes (optional auto-cleanup)
         if dirty_nodes:
             self.console.print(
                 f"\n[yellow]⚠ Found {len(dirty_nodes)} dirty node(s) "
                 f"with stale Ray/vLLM processes[/yellow]"
             )
-            
             if self.auto_cleanup:
                 self.console.print("[yellow]Running automatic cleanup...[/yellow]\n")
-                
                 for status in dirty_nodes:
                     self.console.print(f"  Cleaning {status.node}...")
-                    if self.cleanup_node(status.node):
-                        # Re-check after cleanup
+                    if self.cleanup_node(status.node, job_name=self._health_check_job_name):
                         time.sleep(2)
-                        new_status = self.check_node_health(status.node)
+                        new_status = self.check_node_health(status.node, job_name=self._health_check_job_name)
                         if new_status.health == NodeHealth.CLEAN:
                             clean_nodes.append(new_status.node)
+                            nodes_to_exclude.discard(status.node)
                             self.console.print(f"    [green]✓ {status.node} is now clean[/green]")
                         else:
                             self.console.print(f"    [red]✗ {status.node} still dirty[/red]")
                     else:
                         self.console.print(f"    [red]✗ Cleanup failed[/red]")
-                
-                # Update dirty nodes list
-                dirty_nodes = [s for s in statuses 
-                              if s.health == NodeHealth.DIRTY and s.node not in clean_nodes]
-            
-            # Build updated exclude list
-            dirty_node_names = [s.node for s in dirty_nodes]
-            existing_exclude = set(exclude.split(',')) if exclude else set()
-            existing_exclude.update(dirty_node_names)
-            updated_exclude = ','.join(sorted(existing_exclude))
-            
-            if dirty_node_names:
-                self.console.print(
-                    f"\n[yellow]Adding dirty nodes to exclude list: "
-                    f"{', '.join(dirty_node_names)}[/yellow]"
-                )
-        else:
-            updated_exclude = exclude or ""
-        
+
+        # Build updated exclude list (dirty + unreachable + unknown)
+        existing_exclude = set(exclude.split(',')) if exclude else set()
+        existing_exclude.update(nodes_to_exclude)
+        updated_exclude = ','.join(sorted(existing_exclude))
+
+        if unreachable_nodes or unknown_nodes:
+            bad = [s.node for s in unreachable_nodes] + [s.node for s in unknown_nodes]
+            self.console.print(
+                f"\n[yellow]Excluding unreachable/unknown nodes: {', '.join(bad)}[/yellow]"
+            )
+        if dirty_nodes and not self.auto_cleanup:
+            self.console.print(
+                f"\n[yellow]Adding dirty nodes to exclude list: "
+                f"{', '.join(s.node for s in dirty_nodes)}[/yellow]"
+            )
+
         # Final summary
         if len(clean_nodes) >= nodes_needed:
             self.console.print(
@@ -464,7 +481,7 @@ def select_nodes(
             self.console.print(
                 "[yellow]Recommendation: Wait for nodes to be cleaned or run manual cleanup[/yellow]\n"
             )
-        
+
         return clean_nodes, updated_exclude
     
     def _extract_section(self, text: str, start_marker: str, end_marker: str) -> str:
@@ -516,3 +533,41 @@ def _display_status_table(self, statuses: List[NodeStatus]):
         self.console.print(table)
         self.console.print()
 
+    @staticmethod
+    def cancel_health_check_jobs(job_name: Optional[str], console: Optional[Console] = None) -> None:
+        """
+        Cancel any SLURM jobs created by the node health check (srun invocations).
+
+        Call this after select_nodes() so pending health-check jobs do not stay in the queue.
+
+        Args:
+            job_name: Job name used for health-check srun (e.g. selector._health_check_job_name)
+            console: Optional Rich console for messages
+        """
+        if not job_name:
+            return
+        _console = console or Console()
+        try:
+            user = os.environ.get("USER", "")
+            if not user:
+                return
+            result = subprocess.run(
+                ["squeue", "-u", user, "-n", job_name, "-h", "-o", "%i"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode != 0 or not result.stdout.strip():
+                return
+            job_ids = result.stdout.strip().split()
+            for jid in job_ids:
+                if jid.isdigit():
+                    subprocess.run(
+                        ["scancel", jid],
+                        capture_output=True,
+                        timeout=5,
+                    )
+            if job_ids and _console:
+                _console.print(f"[dim]Cancelled {len(job_ids)} health-check job(s)[/dim]")
+        except Exception:
+            pass
diff --git a/src/madengine/deployment/templates/slurm/job.sh.j2 b/src/madengine/deployment/templates/slurm/job.sh.j2
index 06ee4c64..46a59b20 100644
--- a/src/madengine/deployment/templates/slurm/job.sh.j2
+++ b/src/madengine/deployment/templates/slurm/job.sh.j2
@@ -275,6 +275,11 @@ if 'deployment_config' in manifest:
     if gpus_per_node:
         manifest['deployment_config']['gpus_per_node'] = gpus_per_node
 
+# SLURM (single- and multi-node): skip in-job perf write; collect_results will add one record
+if manifest.get('context') is None:
+    manifest['context'] = {}
+manifest['context']['skip_perf_collection'] = True
+
 with open(output_file, 'w') as f:
     json.dump(manifest, f, indent=2)
 print('Created Docker execution manifest for SLURM compute node')
@@ -352,6 +357,10 @@ fi
 # =============================================================================
 # Multi-node: Execute with per-task setup
 # =============================================================================
+# PyTorch elastic rendezvous: allow time for all nodes to finish image pull/startup.
+# Default ~900s is too short when each node does a fresh pull; 3600s avoids "Timed out waiting for clients".
+export TORCH_ELASTIC_RDZV_TIMEOUT=3600
+
 # For multi-node distributed execution:
 # 1. Each srun task runs on a separate node with unique SLURM_PROCID
 # 2. All nodes participate in workload via launcher (torchrun/vLLM/SGLang/etc.)
@@ -369,7 +378,7 @@ fi
 
 # Use submission directory (shared filesystem) for task script
 # /tmp is local to each node and won't be accessible by srun on other nodes
-TASK_SCRIPT="{{ manifest_file | dirname }}/slurm_output/madengine_task_${SLURM_JOB_ID}.sh"
+TASK_SCRIPT="{{ manifest_file | dirname }}/{{ output_dir }}/madengine_task_${SLURM_JOB_ID}.sh"
 
 cat > "$TASK_SCRIPT" << 'TASK_SCRIPT_EOF'
 #!/bin/bash
@@ -453,13 +462,22 @@ rsync -a --quiet \
       --exclude='.venv' \
       --exclude='env' \
       --exclude='.env' \
-      --exclude='slurm_output/*.out' \
-      --exclude='slurm_output/*.log' \
+      --exclude='{{ output_dir }}/*.out' \
+      --exclude='{{ output_dir }}/*.log' \
       "$SUBMISSION_DIR/" "$WORKSPACE/"
 
 echo "  ✓ Project copied to local workspace"
 echo ""
 
+# =============================================================================
+# Debug: set node log paths early and trap EXIT so we always see which node failed
+# =============================================================================
+RESULTS_DIR="$SUBMISSION_DIR"
+NODE_LOG_OUT="${RESULTS_DIR}/{{ output_dir }}/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.out"
+NODE_LOG_ERR="${RESULTS_DIR}/{{ output_dir }}/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.err"
+echo "[DEBUG] $(date -Iseconds) Node ${SLURM_PROCID} ($(hostname)): task script started" >> "${NODE_LOG_ERR}"
+trap 'ec=$?; echo "[DEBUG] $(date -Iseconds) Node ${SLURM_PROCID} ($(hostname)): task script exiting with code $ec" >> "${NODE_LOG_ERR}"' EXIT
+
 # =============================================================================
 # Verify madengine Availability
 # =============================================================================
@@ -536,6 +554,11 @@ if 'deployment_config' in manifest:
     if gpus_per_node:
         manifest['deployment_config']['gpus_per_node'] = gpus_per_node
 
+# SLURM (single- and multi-node): skip in-job perf write; collect_results will add one record
+if manifest.get('context') is None:
+    manifest['context'] = {}
+manifest['context']['skip_perf_collection'] = True
+
 with open(output_file, 'w') as f:
     json.dump(manifest, f, indent=2)
 print('✓ Created Docker execution manifest for SLURM compute node')
@@ -604,8 +627,8 @@ echo ""
 
 # Create node-specific log files in results directory
 RESULTS_DIR={{ manifest_file | dirname }}
-NODE_LOG_OUT="${RESULTS_DIR}/slurm_output/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.out"
-NODE_LOG_ERR="${RESULTS_DIR}/slurm_output/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.err"
+NODE_LOG_OUT="${RESULTS_DIR}/{{ output_dir }}/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.out"
+NODE_LOG_ERR="${RESULTS_DIR}/{{ output_dir }}/madengine-{{ model_name }}_${SLURM_JOB_ID}_node_${SLURM_PROCID}.err"
 
 echo "Node ${SLURM_PROCID} logs:"
 echo "  stdout: ${NODE_LOG_OUT}"
@@ -619,85 +642,65 @@ else
     export MAD_SKIP_PERF_COLLECTION="false"
 fi
 
+# Debug: log immediately before madengine so we know which node reached this point
+echo "[DEBUG] $(date -Iseconds) Node ${SLURM_PROCID} ($(hostname)): about to run madengine" >> "${NODE_LOG_ERR}"
+
 # Run madengine with output redirected to node-specific log files
+# Use append (2>>) for stderr so debug lines written above are not overwritten
 # Environment variables (MASTER_ADDR, MAD_MULTI_NODE_RUNNER, etc.) are inherited
 $MAD_CLI_COMMAND run \
     --manifest-file "$EXEC_MANIFEST" \
     --timeout {{ timeout | default(3600) }} \
     {% if shared_data %}--force-mirror-local {{ shared_data }}{% endif %} \
     {% if live_output %}--live-output{% endif %} \
-    > "${NODE_LOG_OUT}" 2> "${NODE_LOG_ERR}"
+    > "${NODE_LOG_OUT}" 2>> "${NODE_LOG_ERR}"
 
 TASK_EXIT=$?
+echo "[DEBUG] $(date -Iseconds) Node ${SLURM_PROCID} ($(hostname)): madengine exited with code $TASK_EXIT" >> "${NODE_LOG_ERR}"
 echo ""
 echo "Task completed with exit code: $TASK_EXIT"
 
 # =============================================================================
-# Multi-Node Result Collection (Best Practice: Master Node Only)
+# Multi-Node Per-Node Artifact Collection (Option 1: login node builds perf record)
 # =============================================================================
-# For distributed workloads, only the master node (SLURM_PROCID=0) should
-# collect and report performance metrics to avoid:
-# - Duplicate data in perf.csv
-# - Race conditions in metric extraction
-# - Failures from non-master nodes trying to report identical global metrics
-#
-# This follows distributed execution best practices where only rank 0
-# reports final metrics.
+# Every node copies its workspace artifacts to submission dir for collect_results.
+# Do NOT copy perf.csv/perf_entry.json/perf_super.* from nodes; login node will
+# parse logs, aggregate, and run update_perf_csv/update_perf_super at project root.
 # =============================================================================
 
+SUBMISSION_DIR={{ manifest_file | dirname }}
+JOB_COLLECTION_DIR="${SUBMISSION_DIR}/{{ output_dir }}/{{ model_name }}/${SLURM_JOB_ID}"
+NODE_COLLECTION_DIR="${JOB_COLLECTION_DIR}/node_${SLURM_PROCID}"
+mkdir -p "$NODE_COLLECTION_DIR"
+
 if [ $TASK_EXIT -eq 0 ]; then
-    if [ "${SLURM_PROCID}" = "0" ]; then
-        # Master node: Collect and report results
-        RESULTS_DIR={{ manifest_file | dirname }}
-        echo ""
-        echo "========================================================================"
-        echo "Master Node (SLURM_PROCID=0): Collecting results"
-        echo "========================================================================"
-        echo "Copying results back to: $RESULTS_DIR"
-        
-        # Copy performance results (main metric file)
-        if [ -f "$WORKSPACE/perf.csv" ]; then
-            cp "$WORKSPACE/perf.csv" "$RESULTS_DIR/perf.csv" 2>/dev/null || true
-            echo "  ✓ Copied: perf.csv (global metrics)"
-        fi
-        
-        # Copy log files
-        for log in "$WORKSPACE"/*.log; do
-            if [ -f "$log" ]; then
-                log_basename=$(basename "$log")
-                cp "$log" "$RESULTS_DIR/${log_basename}" 2>/dev/null || true
-                echo "  ✓ Copied: ${log_basename}"
-            fi
-        done
-        
-        # Copy any workload results files
-        if [ -f "$WORKSPACE/results.txt" ]; then
-            cp "$WORKSPACE/results.txt" "$RESULTS_DIR/" 2>/dev/null || true
-            echo "  ✓ Copied: results.txt"
-        fi
-        # Legacy support for training_results.txt
-        if [ -f "$WORKSPACE/training_results.txt" ]; then
-            cp "$WORKSPACE/training_results.txt" "$RESULTS_DIR/" 2>/dev/null || true
-            echo "  ✓ Copied: training_results.txt"
-        fi
-        
-        echo "  ✓ Master node results collection complete"
-        echo "========================================================================"
-    else
-        # Worker nodes: Exit cleanly without collecting results
-        echo ""
-        echo "========================================================================"
-        echo "Worker Node (SLURM_PROCID=${SLURM_PROCID}): Exiting cleanly"
-        echo "========================================================================"
-        echo "  Note: Performance metrics collected by master node only (best practice)"
-        echo "========================================================================"
-    fi
+    echo ""
+    echo "========================================================================"
+    echo "Node ${SLURM_PROCID}: Copying artifacts to ${NODE_COLLECTION_DIR}"
+    echo "========================================================================"
+    # Copy node stdout/stderr into node dir for collect_results
+    if [ -f "${NODE_LOG_OUT}" ]; then cp "${NODE_LOG_OUT}" "$NODE_COLLECTION_DIR/stdout.out" 2>/dev/null || true; fi
+    if [ -f "${NODE_LOG_ERR}" ]; then cp "${NODE_LOG_ERR}" "$NODE_COLLECTION_DIR/stderr.err" 2>/dev/null || true; fi
+    # Copy env, logs, profiling, multi_results, etc. (not perf.csv/perf_entry/perf_super)
+    for f in "$WORKSPACE"/*_env.csv "$WORKSPACE"/*.log "$WORKSPACE"/multi_results*.csv "$WORKSPACE"/results* "$WORKSPACE"/gpu_info_*.csv "$WORKSPACE"/library_trace.csv; do
+        if [ -f "$f" ]; then cp "$f" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+    done
+    {% if multiple_results %}
+    if [ -f "$WORKSPACE/run_directory/{{ multiple_results }}" ]; then cp "$WORKSPACE/run_directory/{{ multiple_results }}" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+    if [ -f "$WORKSPACE/{{ multiple_results }}" ]; then cp "$WORKSPACE/{{ multiple_results }}" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+    {% endif %}
+    if [ -d "$WORKSPACE/rocprof_output" ]; then cp -r "$WORKSPACE/rocprof_output" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+    echo "  ✓ Node ${SLURM_PROCID} artifacts copied"
+    echo "========================================================================"
 else
     echo ""
     echo "========================================================================"
     echo "Task FAILED with exit code: $TASK_EXIT"
     echo "  Node: SLURM_PROCID=${SLURM_PROCID}"
     echo "========================================================================"
+    # Copy node logs so collect_results can still inspect them
+    if [ -f "${NODE_LOG_OUT}" ]; then cp "${NODE_LOG_OUT}" "$NODE_COLLECTION_DIR/stdout.out" 2>/dev/null || true; fi
+    if [ -f "${NODE_LOG_ERR}" ]; then cp "${NODE_LOG_ERR}" "$NODE_COLLECTION_DIR/stderr.err" 2>/dev/null || true; fi
 fi
 
 exit $TASK_EXIT
@@ -741,17 +744,27 @@ $MAD_CLI_COMMAND run \
     {% if live_output %}--live-output{% endif %}
 
 EXIT_CODE=$?
+
+# Single-node: copy multiple_results CSV to job collection dir so collect_results finds it
+{% if multiple_results %}
+if [ $EXIT_CODE -eq 0 ]; then
+    SUBMISSION_DIR={{ manifest_file | dirname }}
+    JOB_COLLECTION_DIR="${SUBMISSION_DIR}/{{ output_dir }}/{{ model_name }}/${SLURM_JOB_ID}"
+    NODE_COLLECTION_DIR="${JOB_COLLECTION_DIR}/node_0"
+    mkdir -p "$NODE_COLLECTION_DIR"
+    if [ -f "$WORKSPACE/run_directory/{{ multiple_results }}" ]; then cp "$WORKSPACE/run_directory/{{ multiple_results }}" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+    if [ -f "$WORKSPACE/{{ multiple_results }}" ]; then cp "$WORKSPACE/{{ multiple_results }}" "$NODE_COLLECTION_DIR/" 2>/dev/null || true; fi
+fi
+{% endif %}
 {% endif %}
 
 # =============================================================================
 # Job Completion
 # =============================================================================
-# Note: For multi-node jobs, only the master node (SLURM_PROCID=0) collects
-# and reports performance metrics. This follows distributed execution best
-# practices where:
-# - Global metrics are identical across all nodes (computed via all_reduce)
-# - Only rank 0 should report to avoid duplicate/conflicting data
-# - Worker nodes exit cleanly after workload completes
+# Note: For multi-node jobs, each node copies its artifacts to slurm_results/
+# <model>/<job_id>/node_<rank>/. The login node runs collect_results to parse
+# logs, aggregate metrics, and update project-root perf.csv/perf_super via
+# the same reporting pipeline as local runs.
 
 echo ""
 if [ $EXIT_CODE -eq 0 ]; then
diff --git a/src/madengine/execution/container_runner.py b/src/madengine/execution/container_runner.py
index c3299049..0bc53335 100644
--- a/src/madengine/execution/container_runner.py
+++ b/src/madengine/execution/container_runner.py
@@ -27,6 +27,24 @@
 from madengine.reporting.update_perf_super import update_perf_super_json, update_perf_super_csv
 from madengine.utils.gpu_config import resolve_runtime_gpus
 from madengine.utils.config_parser import ConfigParser
+from madengine.utils.path_utils import scripts_base_dir_from
+from madengine.utils.run_details import get_build_number, get_pipeline
+from madengine.execution.container_runner_helpers import (
+    make_run_log_file_path,
+    resolve_run_timeout,
+)
+
+
+def _resolve_multiple_results_path(multiple_results: str, model_dir: str) -> typing.Optional[str]:
+    """Resolve multiple_results CSV path: try cwd then model_dir. Return first that exists."""
+    if not multiple_results:
+        return None
+    if os.path.isfile(multiple_results):
+        return multiple_results
+    path_in_model_dir = os.path.join(model_dir, multiple_results)
+    if os.path.isfile(path_in_model_dir):
+        return path_in_model_dir
+    return None
 
 
 class ContainerRunner:
@@ -202,7 +220,7 @@ def create_run_details_dict(
             "nnodes": nnodes,
             "gpus_per_node": gpus_per_node,
             "training_precision": model_info.get("training_precision", ""),
-            "pipeline": os.environ.get("pipeline", ""),
+            "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
             "tags": model_info.get("tags", ""),
             "docker_file": build_info.get("dockerfile", ""),
@@ -228,7 +246,7 @@ def create_run_details_dict(
             "data_provider_type": run_results.get("data_provider_type", ""),
             "data_size": run_results.get("data_size", ""),
             "data_download_duration": run_results.get("data_download_duration", ""),
-            "build_number": os.environ.get("BUILD_NUMBER", "0"),
+            "build_number": get_build_number(),
             "additional_docker_run_options": model_info.get(
                 "additional_docker_run_options", ""
             ),
@@ -240,7 +258,7 @@ def create_run_details_dict(
         # Parse and load config file if present in args for perf_entry_super.json
         try:
             scripts_path = model_info.get("scripts", "")
-            scripts_base_dir = os.path.dirname(scripts_path) if scripts_path else None
+            scripts_base_dir = scripts_base_dir_from(scripts_path)
             config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
             run_details["configs"] = config_parser.parse_and_load(
                 model_info.get("args", ""),
@@ -654,34 +672,8 @@ def run_container(
         """
         self.rich_console.print(f"[bold green]🏃 Running model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan] [dim]in container[/dim] [yellow]{docker_image}[/yellow]")
 
-        # Apply timeout logic: model timeout can override default timeout
-        # If model has a timeout in models.json and CLI timeout is default (7200), use model's timeout
-        # If CLI timeout is explicitly set (not default), it overrides model timeout
-        if "timeout" in model_info and model_info["timeout"] is not None and model_info["timeout"] > 0 and timeout == 7200:
-            # Model has a timeout and CLI is using default, so use model's timeout
-            timeout = model_info["timeout"]
-
-        # Create log file for this run
-        # Extract dockerfile part from docker image name (remove "ci-" prefix and model name prefix)
-        image_name_without_ci = docker_image.replace("ci-", "")
-        model_name_clean = model_info["name"].replace("/", "_").lower()
-
-        # Remove model name from the beginning to get the dockerfile part
-        if image_name_without_ci.startswith(model_name_clean + "_"):
-            dockerfile_part = image_name_without_ci[len(model_name_clean + "_") :]
-        else:
-            dockerfile_part = image_name_without_ci
-
-        log_file_path = (
-            model_info["name"].replace("/", "_")
-            + "_"
-            + dockerfile_part
-            + phase_suffix
-            + ".live.log"
-        )
-        # Replace / with _ in log file path (already done above, but keeping for safety)
-        log_file_path = log_file_path.replace("/", "_")
-
+        timeout = resolve_run_timeout(model_info, timeout)
+        log_file_path = make_run_log_file_path(model_info, docker_image, phase_suffix)
         print(f"Run log will be written to: {log_file_path}")
 
         # get machine name
@@ -746,7 +738,7 @@ def run_container(
         # Add environment variables
         docker_options += f"--env MAD_MODEL_NAME='{model_info['name']}' "
         docker_options += (
-            f"--env JENKINS_BUILD_NUMBER='{os.environ.get('BUILD_NUMBER','0')}' "
+            f"--env JENKINS_BUILD_NUMBER='{get_build_number()}' "
         )
 
         # Gather data and environment
@@ -782,6 +774,7 @@ def run_container(
             'NNODES', 'NPROC_PER_NODE', 'MAD_MULTI_NODE_RUNNER',
             'MAD_COLLECT_METRICS', 'NCCL_SOCKET_IFNAME', 'GLOO_SOCKET_IFNAME',
             'NCCL_DEBUG', 'NCCL_IB_DISABLE', 'NCCL_NET_GDR_LEVEL',
+            'TORCH_ELASTIC_RDZV_TIMEOUT',  # Rendezvous timeout so all nodes can join after pull
             # GPU visibility variables for Ray-based launchers (vLLM, SGLang)
             # CRITICAL: These must be passed to Docker for proper GPU device mapping
             'HIP_VISIBLE_DEVICES', 'ROCR_VISIBLE_DEVICES', 'CUDA_VISIBLE_DEVICES'
@@ -1071,11 +1064,14 @@ def run_container(
                             f"cd {model_dir} && {script_name} {model_args}",
                             timeout=timeout,
                         )
-                        # Print output to ensure it gets captured in log file
-                        print(model_output)
+                        # When live_output is True, Console.sh() already streamed the output; avoid duplicate print.
+                        if not self.live_output:
+                            print(model_output)
 
                         run_results["test_duration"] = time.time() - test_start_time
                         print(f"Test Duration: {run_results['test_duration']} seconds")
+                        # Parser-friendly line for SLURM log collection (test_duration: Xs)
+                        print(f"test_duration: {run_results['test_duration']:.2f}s")
 
                         # Run post-scripts
                         if pre_encapsulate_post_scripts["post_scripts"]:
@@ -1092,33 +1088,43 @@ def run_container(
                             multiple_results = model_info.get("multiple_results", None)
 
                             if multiple_results:
-                                run_results["performance"] = multiple_results
-                                # Validate multiple results file format using proper CSV parsing
-                                try:
-                                    import csv
-                                    with open(multiple_results, "r") as f:
-                                        csv_reader = csv.DictReader(f)
-                                        
-                                        # Check if 'performance' column exists
-                                        if 'performance' not in csv_reader.fieldnames:
-                                            print("Error: 'performance' column not found in multiple results file.")
-                                            run_results["performance"] = None
-                                        else:
-                                            # Check if at least one row has a non-empty performance value
-                                            has_valid_perf = False
-                                            for row in csv_reader:
-                                                if row.get('performance', '').strip():
-                                                    has_valid_perf = True
-                                                    break
-                                            
-                                            if not has_valid_perf:
-                                                run_results["performance"] = None
-                                                print("Error: Performance metric is empty in all rows of multiple results file.")
-                                except Exception as e:
+                                resolved_path = _resolve_multiple_results_path(
+                                    multiple_results, model_dir
+                                )
+                                if not resolved_path:
                                     self.rich_console.print(
-                                        f"[yellow]Warning: Could not validate multiple results file: {e}[/yellow]"
+                                        f"[yellow]Warning: Could not find multiple results file "
+                                        f"(tried cwd and {model_dir}/): {multiple_results}[/yellow]"
                                     )
                                     run_results["performance"] = None
+                                else:
+                                    run_results["performance"] = resolved_path
+                                    # Validate multiple results file format using proper CSV parsing
+                                    try:
+                                        import csv
+                                        with open(resolved_path, "r") as f:
+                                            csv_reader = csv.DictReader(f)
+                                            
+                                            # Check if 'performance' column exists
+                                            if 'performance' not in csv_reader.fieldnames:
+                                                print("Error: 'performance' column not found in multiple results file.")
+                                                run_results["performance"] = None
+                                            else:
+                                                # Check if at least one row has a non-empty performance value
+                                                has_valid_perf = False
+                                                for row in csv_reader:
+                                                    if row.get('performance', '').strip():
+                                                        has_valid_perf = True
+                                                        break
+                                                
+                                                if not has_valid_perf:
+                                                    run_results["performance"] = None
+                                                    print("Error: Performance metric is empty in all rows of multiple results file.")
+                                    except Exception as e:
+                                        self.rich_console.print(
+                                            f"[yellow]Warning: Could not validate multiple results file: {e}[/yellow]"
+                                        )
+                                        run_results["performance"] = None
                             else:
                                 # Match the actual output format: "performance: 14164 samples_per_second"
                                 # Simple pattern to capture number and metric unit
@@ -1326,8 +1332,13 @@ def run_container(
 
                                 # Handle multiple results if specified
                                 multiple_results = model_info.get("multiple_results", None)
+                                resolved_multiple_results = (
+                                    _resolve_multiple_results_path(multiple_results, model_dir)
+                                    if multiple_results
+                                    else None
+                                )
                                 if (
-                                    multiple_results
+                                    resolved_multiple_results
                                     and run_results.get("status") == "SUCCESS"
                                 ):
                                     # Generate common info JSON for multiple results
@@ -1341,7 +1352,7 @@ def run_container(
 
                                     # Update perf.csv with multiple results
                                     update_perf_csv(
-                                        multiple_results=multiple_results,
+                                        multiple_results=resolved_multiple_results,
                                         perf_csv=self.perf_csv_path,
                                         model_name=run_details_dict["model"],
                                         common_info="common_info.json",
@@ -1353,11 +1364,11 @@ def run_container(
                                     # Update perf_super.json with multiple results
                                     try:
                                         scripts_path = model_info.get("scripts", "")
-                                        scripts_base_dir = os.path.dirname(scripts_path) if scripts_path else None
+                                        scripts_base_dir = scripts_base_dir_from(scripts_path)
                                         
                                         # Reuse common_info.json for super files (no need for duplicate)
                                         num_entries = update_perf_super_json(
-                                            multiple_results=multiple_results,
+                                            multiple_results=resolved_multiple_results,
                                             perf_super_json="perf_super.json",
                                             model_name=run_details_dict["model"],
                                             common_info="common_info.json",
@@ -1395,7 +1406,7 @@ def run_container(
                                     # Update perf_super.json with single result
                                     try:
                                         scripts_path = model_info.get("scripts", "")
-                                        scripts_base_dir = os.path.dirname(scripts_path) if scripts_path else None
+                                        scripts_base_dir = scripts_base_dir_from(scripts_path)
                                         
                                         # Use perf_entry.json as input (already created above)
                                         if run_results.get("status") == "SUCCESS":
@@ -1434,6 +1445,15 @@ def run_container(
                             # Ignore errors if no profiler/trace output files exist
                             pass
 
+                        # Copy multiple_results CSV to workspace root before run_directory is removed
+                        # so SLURM single-node copy can find it at $WORKSPACE/{{ multiple_results }}
+                        mult_res = model_info.get("multiple_results")
+                        if mult_res:
+                            try:
+                                model_docker.sh(f"cp {model_dir}/{mult_res} . 2>/dev/null || true")
+                            except Exception:
+                                pass
+
                         # Cleanup if not keeping alive and not keeping model directory
                         if not keep_alive and not keep_model_dir:
                             model_docker.sh(f"rm -rf {model_dir}", timeout=240)
@@ -1480,7 +1500,7 @@ def run_container(
                 # Update perf_super.json with exception result
                 try:
                     scripts_path = model_info.get("scripts", "")
-                    scripts_base_dir = os.path.dirname(scripts_path) if scripts_path else None
+                    scripts_base_dir = scripts_base_dir_from(scripts_path)
                     
                     # Use perf_entry.json as input (already created above)
                     num_entries = update_perf_super_json(
@@ -1545,7 +1565,10 @@ def run_models_from_manifest(
         # Load deployment_config from manifest for GPU resolution
         if "deployment_config" in manifest and not self.additional_context:
             self.additional_context = {"deployment_config": manifest["deployment_config"]}
-        
+        # Merge manifest context (e.g. skip_perf_collection for multi-node SLURM aggregation)
+        if "context" in manifest and isinstance(manifest["context"], dict):
+            self.additional_context = {**(self.additional_context or {}), **manifest["context"]}
+
         if not built_images:
             self.rich_console.print("[yellow]⚠️  No images found in manifest[/yellow]")
             return {"successful_runs": [], "failed_runs": []}
diff --git a/src/madengine/execution/container_runner_helpers.py b/src/madengine/execution/container_runner_helpers.py
new file mode 100644
index 00000000..c6f7a25f
--- /dev/null
+++ b/src/madengine/execution/container_runner_helpers.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Pure helpers for container run flow (log paths, timeout resolution).
+
+Extracted so run_container logic is easier to test and maintain.
+"""
+
+import typing
+
+
+def resolve_run_timeout(
+    model_info: typing.Dict,
+    cli_timeout: int,
+    default_cli_timeout: int = 7200,
+) -> int:
+    """
+    Resolve effective run timeout from model config and CLI.
+
+    - If model has a timeout and CLI is using default (7200), use model's timeout.
+    - If CLI timeout is explicitly set (not default), it overrides model timeout.
+
+    Args:
+        model_info: Model info dict; may have "timeout" key.
+        cli_timeout: Timeout from CLI.
+        default_cli_timeout: Value considered "default" for CLI (typically 7200).
+
+    Returns:
+        Effective timeout in seconds.
+    """
+    if (
+        "timeout" in model_info
+        and model_info["timeout"] is not None
+        and model_info["timeout"] > 0
+        and cli_timeout == default_cli_timeout
+    ):
+        return model_info["timeout"]
+    return cli_timeout
+
+
+def make_run_log_file_path(
+    model_info: typing.Dict,
+    docker_image: str,
+    phase_suffix: str = "",
+) -> str:
+    """
+    Build the log file path for a container run.
+
+    Derives dockerfile part from image name (strip ci- and model prefix),
+    then: {model_safe}_{dockerfile_part}{phase_suffix}.live.log
+
+    Args:
+        model_info: Must have "name" key.
+        docker_image: Full docker image name (e.g. ci-model_ubuntu.22.04).
+        phase_suffix: Optional suffix (e.g. ".run").
+
+    Returns:
+        Log file path string with "/" replaced by "_".
+    """
+    image_name_without_ci = docker_image.replace("ci-", "")
+    model_name_clean = model_info["name"].replace("/", "_").lower()
+
+    if image_name_without_ci.startswith(model_name_clean + "_"):
+        dockerfile_part = image_name_without_ci[len(model_name_clean + "_") :]
+    else:
+        dockerfile_part = image_name_without_ci
+
+    log_file_path = (
+        model_info["name"].replace("/", "_")
+        + "_"
+        + dockerfile_part
+        + phase_suffix
+        + ".live.log"
+    )
+    return log_file_path.replace("/", "_")
diff --git a/src/madengine/execution/docker_builder.py b/src/madengine/execution/docker_builder.py
index 3901d864..6727ffda 100644
--- a/src/madengine/execution/docker_builder.py
+++ b/src/madengine/execution/docker_builder.py
@@ -17,20 +17,17 @@
 from madengine.core.console import Console
 from madengine.core.context import Context
 from madengine.utils.ops import PythonicTee
+from madengine.execution.dockerfile_utils import (
+    is_compilation_arch_compatible,
+    is_target_arch_compatible_with_variable,
+    parse_dockerfile_gpu_variables,
+    parse_gpu_variable_value,
+)
 
 
 class DockerBuilder:
     """Class responsible for building Docker images for models."""
 
-    # GPU architecture variables used in MAD/DLM Dockerfiles
-    GPU_ARCH_VARIABLES = [
-        "MAD_SYSTEM_GPU_ARCHITECTURE",
-        "PYTORCH_ROCM_ARCH", 
-        "GPU_TARGETS",
-        "GFX_COMPILATION_ARCH",
-        "GPU_ARCHS"
-    ]
-
     def __init__(
         self, context: Context, console: Console = None, live_output: bool = False
     ):
@@ -582,7 +579,7 @@ def _check_dockerfile_has_gpu_variables(self, model_info: typing.Dict) -> typing
                     dockerfile_content = f.read()
                 
                 # Parse GPU architecture variables from Dockerfile
-                dockerfile_gpu_vars = self._parse_dockerfile_gpu_variables(dockerfile_content)
+                dockerfile_gpu_vars = parse_dockerfile_gpu_variables(dockerfile_content)
                 
                 if dockerfile_gpu_vars:
                     return True, dockerfile_path
@@ -633,7 +630,7 @@ def _validate_target_arch_against_dockerfile(self, model_info: typing.Dict, targ
                     dockerfile_content = f.read()
                 
                 # Parse GPU architecture variables from Dockerfile
-                dockerfile_gpu_vars = self._parse_dockerfile_gpu_variables(dockerfile_content)
+                dockerfile_gpu_vars = parse_dockerfile_gpu_variables(dockerfile_content)
                 
                 if not dockerfile_gpu_vars:
                     # No GPU variables found - target arch is acceptable
@@ -643,7 +640,7 @@ def _validate_target_arch_against_dockerfile(self, model_info: typing.Dict, targ
                 
                 # Validate target architecture against each GPU variable
                 for var_name, var_values in dockerfile_gpu_vars.items():
-                    if not self._is_target_arch_compatible_with_variable(
+                    if not is_target_arch_compatible_with_variable(
                         var_name, var_values, target_arch
                     ):
                         self.rich_console.print(f"[red]Error: Target architecture '{target_arch}' is not compatible "
@@ -662,119 +659,6 @@ def _validate_target_arch_against_dockerfile(self, model_info: typing.Dict, targ
             self.rich_console.print(f"[yellow]Warning: Error validating target architecture for model {model_info['name']}: {e}[/yellow]")
             return True  # Assume compatible on parsing errors
 
-    def _parse_dockerfile_gpu_variables(self, dockerfile_content: str) -> typing.Dict[str, typing.List[str]]:
-        """Parse GPU architecture variables from Dockerfile content."""
-        gpu_variables = {}
-        
-        for var_name in self.GPU_ARCH_VARIABLES:
-            # Look for ARG declarations
-            arg_pattern = rf"ARG\s+{var_name}=([^\s\n]+)"
-            arg_matches = re.findall(arg_pattern, dockerfile_content, re.IGNORECASE)
-            
-            # Look for ENV declarations  
-            env_pattern = rf"ENV\s+{var_name}[=\s]+([^\s\n]+)"
-            env_matches = re.findall(env_pattern, dockerfile_content, re.IGNORECASE)
-            
-            # Process found values
-            all_matches = arg_matches + env_matches
-            if all_matches:
-                # Take the last defined value (in case of multiple definitions)
-                raw_value = all_matches[-1].strip('"\'')
-                parsed_values = self._parse_gpu_variable_value(var_name, raw_value)
-                if parsed_values:
-                    gpu_variables[var_name] = parsed_values
-        
-        return gpu_variables
-
-    def _parse_gpu_variable_value(self, var_name: str, raw_value: str) -> typing.List[str]:
-        """Parse GPU variable value based on variable type and format."""
-        architectures = []
-        
-        # Handle different variable formats
-        if var_name in ["GPU_TARGETS", "GPU_ARCHS", "PYTORCH_ROCM_ARCH"]:
-            # These often contain multiple architectures separated by semicolons or commas
-            if ";" in raw_value:
-                architectures = [arch.strip() for arch in raw_value.split(";") if arch.strip()]
-            elif "," in raw_value:
-                architectures = [arch.strip() for arch in raw_value.split(",") if arch.strip()]
-            else:
-                architectures = [raw_value.strip()]
-        else:
-            # Single architecture value (MAD_SYSTEM_GPU_ARCHITECTURE, GFX_COMPILATION_ARCH)
-            architectures = [raw_value.strip()]
-        
-        # Normalize architecture names
-        normalized_archs = []
-        for arch in architectures:
-            normalized = self._normalize_architecture_name(arch)
-            if normalized:
-                normalized_archs.append(normalized)
-        
-        return normalized_archs
-
-    def _normalize_architecture_name(self, arch: str) -> str:
-        """Normalize architecture name to standard format."""
-        arch = arch.lower().strip()
-        
-        # Handle common variations and aliases
-        if arch.startswith("gfx"):
-            return arch
-        elif arch in ["mi100", "mi-100"]:
-            return "gfx908"
-        elif arch in ["mi200", "mi-200", "mi210", "mi250"]:
-            return "gfx90a"
-        elif arch in ["mi300", "mi-300", "mi300a"]:
-            return "gfx940"
-        elif arch in ["mi300x", "mi-300x"]:
-            return "gfx942"
-        elif arch.startswith("mi"):
-            # Unknown MI series - return as is for potential future support
-            return arch
-        
-        return arch if arch else None
-
-    def _is_target_arch_compatible_with_variable(
-        self, 
-        var_name: str, 
-        var_values: typing.List[str], 
-        target_arch: str
-    ) -> bool:
-        """
-        Validate that target architecture is compatible with a specific GPU variable.
-        Used during build phase validation.
-        """
-        if var_name == "MAD_SYSTEM_GPU_ARCHITECTURE":
-            # MAD_SYSTEM_GPU_ARCHITECTURE will be overridden by target_arch, so always compatible
-            return True
-        
-        elif var_name in ["PYTORCH_ROCM_ARCH", "GPU_TARGETS", "GPU_ARCHS"]:
-            # Multi-architecture variables - target arch must be in the list
-            return target_arch in var_values
-        
-        elif var_name == "GFX_COMPILATION_ARCH":
-            # Compilation architecture should be compatible with target arch
-            return len(var_values) == 1 and (
-                var_values[0] == target_arch or
-                self._is_compilation_arch_compatible(var_values[0], target_arch)
-            )
-        
-        # Unknown variable - assume compatible
-        return True
-
-    def _is_compilation_arch_compatible(self, compile_arch: str, target_arch: str) -> bool:
-        """Check if compilation architecture is compatible with target architecture."""
-        # Define compatibility rules for compilation
-        compatibility_matrix = {
-            "gfx908": ["gfx908"],  # MI100 - exact match only
-            "gfx90a": ["gfx90a"],  # MI200 - exact match only
-            "gfx940": ["gfx940"],  # MI300A - exact match only
-            "gfx941": ["gfx941"],  # MI300X - exact match only
-            "gfx942": ["gfx942"],  # MI300X - exact match only
-        }
-        
-        compatible_archs = compatibility_matrix.get(compile_arch, [compile_arch])
-        return target_arch in compatible_archs
-
     def _build_model_single_arch(
         self, 
         model_info: typing.Dict,
@@ -976,20 +860,6 @@ def _determine_registry_image_name(
 
         return registry_image
 
-    def _is_compilation_arch_compatible(self, compile_arch: str, target_arch: str) -> bool:
-        """Check if compilation architecture is compatible with target architecture."""
-        # Define compatibility rules for compilation
-        compatibility_matrix = {
-            "gfx908": ["gfx908"],  # MI100 - exact match only
-            "gfx90a": ["gfx90a"],  # MI200 - exact match only
-            "gfx940": ["gfx940"],  # MI300A - exact match only
-            "gfx941": ["gfx941"],  # MI300X - exact match only
-            "gfx942": ["gfx942"],  # MI300X - exact match only
-        }
-        
-        compatible_archs = compatibility_matrix.get(compile_arch, [compile_arch])
-        return target_arch in compatible_archs
-
     def _build_model_for_arch(
         self, 
         model_info: typing.Dict,
diff --git a/src/madengine/execution/dockerfile_utils.py b/src/madengine/execution/dockerfile_utils.py
new file mode 100644
index 00000000..7a7a1bf9
--- /dev/null
+++ b/src/madengine/execution/dockerfile_utils.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+Pure helpers for parsing Dockerfile GPU variables and validating target architecture.
+
+Used by DockerBuilder for build-phase validation. No I/O or context dependency.
+"""
+
+import re
+import typing
+
+# GPU architecture variables used in MAD/DLM Dockerfiles
+GPU_ARCH_VARIABLES = [
+    "MAD_SYSTEM_GPU_ARCHITECTURE",
+    "PYTORCH_ROCM_ARCH",
+    "GPU_TARGETS",
+    "GFX_COMPILATION_ARCH",
+    "GPU_ARCHS",
+]
+
+
+def parse_dockerfile_gpu_variables(
+    dockerfile_content: str,
+) -> typing.Dict[str, typing.List[str]]:
+    """Parse GPU architecture variables from Dockerfile content."""
+    gpu_variables: typing.Dict[str, typing.List[str]] = {}
+
+    for var_name in GPU_ARCH_VARIABLES:
+        arg_pattern = rf"ARG\s+{var_name}=([^\s\n]+)"
+        arg_matches = re.findall(arg_pattern, dockerfile_content, re.IGNORECASE)
+        env_pattern = rf"ENV\s+{var_name}[=\s]+([^\s\n]+)"
+        env_matches = re.findall(env_pattern, dockerfile_content, re.IGNORECASE)
+
+        all_matches = arg_matches + env_matches
+        if all_matches:
+            raw_value = all_matches[-1].strip('"\'')
+            parsed_values = parse_gpu_variable_value(var_name, raw_value)
+            if parsed_values:
+                gpu_variables[var_name] = parsed_values
+
+    return gpu_variables
+
+
+def parse_gpu_variable_value(var_name: str, raw_value: str) -> typing.List[str]:
+    """Parse GPU variable value based on variable type and format."""
+    architectures: typing.List[str] = []
+
+    if var_name in ["GPU_TARGETS", "GPU_ARCHS", "PYTORCH_ROCM_ARCH"]:
+        if ";" in raw_value:
+            architectures = [a.strip() for a in raw_value.split(";") if a.strip()]
+        elif "," in raw_value:
+            architectures = [a.strip() for a in raw_value.split(",") if a.strip()]
+        else:
+            architectures = [raw_value.strip()]
+    else:
+        architectures = [raw_value.strip()]
+
+    normalized_archs = []
+    for arch in architectures:
+        normalized = normalize_architecture_name(arch)
+        if normalized:
+            normalized_archs.append(normalized)
+    return normalized_archs
+
+
+def normalize_architecture_name(arch: str) -> typing.Optional[str]:
+    """Normalize architecture name to standard format."""
+    arch = arch.lower().strip()
+    if arch.startswith("gfx"):
+        return arch
+    if arch in ["mi100", "mi-100"]:
+        return "gfx908"
+    if arch in ["mi200", "mi-200", "mi210", "mi250"]:
+        return "gfx90a"
+    if arch in ["mi300", "mi-300", "mi300a"]:
+        return "gfx940"
+    if arch in ["mi300x", "mi-300x"]:
+        return "gfx942"
+    if arch.startswith("mi"):
+        return arch
+    return arch if arch else None
+
+
+def is_target_arch_compatible_with_variable(
+    var_name: str,
+    var_values: typing.List[str],
+    target_arch: str,
+) -> bool:
+    """
+    Check if target architecture is compatible with a GPU variable's values.
+    """
+    if var_name == "MAD_SYSTEM_GPU_ARCHITECTURE":
+        return True
+    if var_name in ["PYTORCH_ROCM_ARCH", "GPU_TARGETS", "GPU_ARCHS"]:
+        return target_arch in var_values
+    if var_name == "GFX_COMPILATION_ARCH":
+        return len(var_values) == 1 and (
+            var_values[0] == target_arch
+            or is_compilation_arch_compatible(var_values[0], target_arch)
+        )
+    return True
+
+
+def is_compilation_arch_compatible(compile_arch: str, target_arch: str) -> bool:
+    """Check if compilation architecture is compatible with target architecture."""
+    compatibility_matrix = {
+        "gfx908": ["gfx908"],
+        "gfx90a": ["gfx90a"],
+        "gfx940": ["gfx940"],
+        "gfx941": ["gfx941"],
+        "gfx942": ["gfx942"],
+    }
+    compatible_archs = compatibility_matrix.get(compile_arch, [compile_arch])
+    return target_arch in compatible_archs
diff --git a/src/madengine/orchestration/image_filtering.py b/src/madengine/orchestration/image_filtering.py
new file mode 100644
index 00000000..c65c2240
--- /dev/null
+++ b/src/madengine/orchestration/image_filtering.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""
+Pure image filtering logic for run orchestrator.
+
+Returns filtered image dicts and lists of skipped items so the orchestrator
+can handle logging and CSV writes. No I/O or console here.
+"""
+
+from typing import Any, Dict, List, Tuple
+
+
+def filter_images_by_gpu_compatibility(
+    built_images: Dict[str, Any],
+    runtime_gpu_vendor: str,
+    runtime_gpu_arch: str,
+) -> Tuple[Dict[str, Any], List[Tuple[str, str]]]:
+    """
+    Filter images compatible with runtime GPU vendor and architecture.
+
+    Args:
+        built_images: Dictionary of built images from manifest.
+        runtime_gpu_vendor: Runtime GPU vendor (AMD, NVIDIA, NONE).
+        runtime_gpu_arch: Runtime GPU architecture (gfx90a, sm_90, etc.).
+
+    Returns:
+        (compatible_images, skipped_list) where skipped_list is
+        [(model_name, reason), ...] for orchestrator to log.
+    """
+    compatible: Dict[str, Any] = {}
+    skipped: List[Tuple[str, str]] = []
+
+    for model_name, image_info in built_images.items():
+        image_gpu_vendor = image_info.get("gpu_vendor", "")
+        image_arch = image_info.get("gpu_architecture", "")
+
+        if not image_gpu_vendor:
+            compatible[model_name] = image_info
+            continue
+
+        if runtime_gpu_vendor == "NONE" or image_gpu_vendor == runtime_gpu_vendor:
+            if image_arch:
+                if image_arch == runtime_gpu_arch:
+                    compatible[model_name] = image_info
+                else:
+                    skipped.append(
+                        (model_name, f"architecture mismatch ({image_arch} != {runtime_gpu_arch})")
+                    )
+            else:
+                compatible[model_name] = image_info
+        else:
+            skipped.append(
+                (model_name, f"GPU vendor mismatch ({image_gpu_vendor} != {runtime_gpu_vendor})")
+            )
+
+    return compatible, skipped
+
+
+def filter_images_by_skip_gpu_arch(
+    built_images: Dict[str, Any],
+    built_models: Dict[str, Any],
+    runtime_gpu_arch: str,
+    disable_skip: bool = False,
+) -> Tuple[Dict[str, Any], List[Tuple[str, Dict, str]]]:
+    """
+    Filter out models that should skip the current GPU architecture.
+
+    Implements skip_gpu_arch from model definitions.
+
+    Args:
+        built_images: Dictionary of built images from manifest.
+        built_models: Dictionary of model metadata from manifest.
+        runtime_gpu_arch: Runtime GPU architecture (gfx90a, A100, etc.).
+        disable_skip: If True, return all images and empty skipped list.
+
+    Returns:
+        (compatible_images, skipped_list) where skipped_list is
+        [(model_name, image_info, gpu_arch), ...] for orchestrator to call
+        _write_skipped_status(model_name, image_info, gpu_arch).
+    """
+    if disable_skip:
+        return built_images, []
+
+    compatible: Dict[str, Any] = {}
+    skipped: List[Tuple[str, Dict, str]] = []
+
+    for model_name, image_info in built_images.items():
+        model_info = built_models.get(model_name, {})
+        skip_gpu_arch_str = model_info.get("skip_gpu_arch", "")
+
+        if not skip_gpu_arch_str:
+            compatible[model_name] = image_info
+            continue
+
+        skip_list = [arch.strip() for arch in skip_gpu_arch_str.split(",")]
+        sys_gpu_arch = runtime_gpu_arch
+        if sys_gpu_arch and "NVIDIA" in sys_gpu_arch:
+            sys_gpu_arch = sys_gpu_arch.split()[1]
+
+        if sys_gpu_arch in skip_list:
+            skipped.append((model_name, image_info, runtime_gpu_arch))
+        else:
+            compatible[model_name] = image_info
+
+    return compatible, skipped
diff --git a/src/madengine/orchestration/run_orchestrator.py b/src/madengine/orchestration/run_orchestrator.py
index e1513bb3..4843e79a 100644
--- a/src/madengine/orchestration/run_orchestrator.py
+++ b/src/madengine/orchestration/run_orchestrator.py
@@ -31,6 +31,10 @@
 )
 from madengine.core.constants import get_rocm_path
 from madengine.utils.session_tracker import SessionTracker
+from madengine.orchestration.image_filtering import (
+    filter_images_by_gpu_compatibility as _filter_by_gpu_compat,
+    filter_images_by_skip_gpu_arch as _filter_by_skip_gpu_arch,
+)
 
 
 class RunOrchestrator:
@@ -981,52 +985,22 @@ def _load_credentials(self) -> Optional[Dict]:
     def _filter_images_by_gpu_compatibility(
         self, built_images: Dict, runtime_gpu_vendor: str, runtime_gpu_arch: str
     ) -> Dict:
-        """Filter images compatible with runtime GPU vendor and architecture.
-        
-        Args:
-            built_images: Dictionary of built images from manifest
-            runtime_gpu_vendor: Runtime GPU vendor (AMD, NVIDIA, NONE)
-            runtime_gpu_arch: Runtime GPU architecture (gfx90a, sm_90, etc.)
-            
-        Returns:
-            Dictionary of compatible images
-        """
+        """Filter images compatible with runtime GPU vendor and architecture."""
         compatible_images = {}
-
         for model_name, image_info in built_images.items():
-            image_gpu_vendor = image_info.get("gpu_vendor", "")
-            image_arch = image_info.get("gpu_architecture", "")
-
-            # Legacy images without vendor info - treat as compatible for backward compatibility
-            if not image_gpu_vendor:
+            if not image_info.get("gpu_vendor", ""):
                 self.rich_console.print(
                     f"[yellow]  Warning: {model_name} has no gpu_vendor, treating as compatible (legacy)[/yellow]"
                 )
                 compatible_images[model_name] = image_info
                 continue
-
-            # Check GPU vendor compatibility first (most important)
-            if runtime_gpu_vendor == "NONE" or image_gpu_vendor == runtime_gpu_vendor:
-                # Vendor matches or CPU-only, check architecture if specified
-                if image_arch:
-                    # Architecture specified, must match
-                    if image_arch == runtime_gpu_arch:
-                        compatible_images[model_name] = image_info
-                    else:
-                        self.rich_console.print(
-                            f"[dim]  Skipping {model_name}: architecture mismatch "
-                            f"({image_arch} != {runtime_gpu_arch})[/dim]"
-                        )
-                else:
-                    # No architecture specified, vendor match is enough
-                    compatible_images[model_name] = image_info
-            else:
-                # Vendor mismatch
-                self.rich_console.print(
-                    f"[dim]  Skipping {model_name}: GPU vendor mismatch "
-                    f"({image_gpu_vendor} != {runtime_gpu_vendor})[/dim]"
-                )
-
+        built_with_vendor = {k: v for k, v in built_images.items() if v.get("gpu_vendor")}
+        compat, skipped = _filter_by_gpu_compat(
+            built_with_vendor, runtime_gpu_vendor, runtime_gpu_arch
+        )
+        compatible_images.update(compat)
+        for model_name, reason in skipped:
+            self.rich_console.print(f"[dim]  Skipping {model_name}: {reason}[/dim]")
         return compatible_images
     
     def _filter_images_by_gpu_architecture(
@@ -1042,51 +1016,21 @@ def _filter_images_by_gpu_architecture(
     def _filter_images_by_skip_gpu_arch(
         self, built_images: Dict, built_models: Dict, runtime_gpu_arch: str
     ) -> Dict:
-        """Filter out models that should skip the current GPU architecture.
-        
-        This implements the skip_gpu_arch logic from model definitions,
-        where models can specify GPU architectures they don't support.
-        
-        Args:
-            built_images: Dictionary of built images from manifest
-            built_models: Dictionary of model metadata from manifest
-            runtime_gpu_arch: Runtime GPU architecture (gfx90a, A100, etc.)
-            
-        Returns:
-            Dictionary of images that should run (not skipped)
-        """
-        if getattr(self.args, 'disable_skip_gpu_arch', False):
-            # User disabled skip logic, run all models
-            self.rich_console.print("[dim]  --disable-skip-gpu-arch flag set, skipping GPU architecture checks[/dim]")
+        """Filter out models that should skip the current GPU architecture."""
+        disable = getattr(self.args, "disable_skip_gpu_arch", False)
+        if disable:
+            self.rich_console.print(
+                "[dim]  --disable-skip-gpu-arch flag set, skipping GPU architecture checks[/dim]"
+            )
             return built_images
-        
-        compatible_images = {}
-        
-        for model_name, image_info in built_images.items():
-            # Get model metadata to check skip_gpu_arch field
-            model_info = built_models.get(model_name, {})
-            skip_gpu_arch_str = model_info.get("skip_gpu_arch", "")
-            
-            if skip_gpu_arch_str:
-                # Parse comma-separated list of architectures to skip
-                skip_list = [arch.strip() for arch in skip_gpu_arch_str.split(",")]
-                
-                # Normalize architecture comparison (handle "NVIDIA A100" -> "A100")
-                sys_gpu_arch = runtime_gpu_arch
-                if sys_gpu_arch and "NVIDIA" in sys_gpu_arch:
-                    sys_gpu_arch = sys_gpu_arch.split()[1]
-                
-                if sys_gpu_arch in skip_list:
-                    self.rich_console.print(
-                        f"[yellow]  Skipping model {model_name} as it is not supported on {runtime_gpu_arch} architecture.[/yellow]"
-                    )
-                    
-                    # Write SKIPPED status to perf CSV
-                    self._write_skipped_status(model_name, image_info, runtime_gpu_arch)
-                    continue
-            
-            compatible_images[model_name] = image_info
-        
+        compatible_images, skipped = _filter_by_skip_gpu_arch(
+            built_images, built_models, runtime_gpu_arch, disable_skip=False
+        )
+        for model_name, image_info, gpu_arch in skipped:
+            self.rich_console.print(
+                f"[yellow]  Skipping model {model_name} as it is not supported on {gpu_arch} architecture.[/yellow]"
+            )
+            self._write_skipped_status(model_name, image_info, gpu_arch)
         return compatible_images
 
     def _write_skipped_status(self, model_name: str, image_info: Dict, gpu_arch: str) -> None:
diff --git a/src/madengine/reporting/update_perf_super.py b/src/madengine/reporting/update_perf_super.py
index f0d5cda5..f0b1753c 100644
--- a/src/madengine/reporting/update_perf_super.py
+++ b/src/madengine/reporting/update_perf_super.py
@@ -100,11 +100,11 @@ def handle_multiple_results_super(
     # Parse config file from args if present
     configs_data = None
     if 'args' in common_info_json and common_info_json['args']:
-        # Try to extract config path from args
-        scripts_path = common_info_json.get('pipeline', '')
+        # model_scripts_path: use None so resolution relies on config_parser.scripts_base_dir
+        # (callers pass scripts_base_dir when creating the parser; 'pipeline' is not a path)
         configs_data = config_parser.parse_and_load(
             common_info_json['args'],
-            scripts_path
+            None
         )
     
     # Process each result row
@@ -118,7 +118,10 @@ def handle_multiple_results_super(
         # Extract standard performance/metric columns
         record["performance"] = result_row.pop("performance")
         record["metric"] = result_row.pop("metric")
-        
+        # test_duration for Duration column in reports (avoid N/A when CSV has it)
+        _td = result_row.pop("test_duration", "")
+        record["test_duration"] = "" if (_td is None or _td == "" or pd.isna(_td)) else str(_td)
+
         # Put remaining metrics into multi_results
         # Exclude internal fields that shouldn't be in multi_results
         extra_metrics = {k: v for k, v in result_row.items() 
diff --git a/src/madengine/utils/path_utils.py b/src/madengine/utils/path_utils.py
new file mode 100644
index 00000000..637efb8f
--- /dev/null
+++ b/src/madengine/utils/path_utils.py
@@ -0,0 +1,33 @@
+"""Path resolution helpers for madengine.
+
+Provides scripts_base_dir_from() and get_madengine_root() for consistent
+path handling across execution and deployment.
+"""
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def scripts_base_dir_from(scripts_path: Optional[str]) -> Optional[str]:
+    """Return the directory containing the given scripts path, or None.
+
+    Args:
+        scripts_path: Path to a script file (or directory). Can be None or empty.
+
+    Returns:
+        Parent directory as string, or None if scripts_path is None/empty.
+    """
+    if not scripts_path or not str(scripts_path).strip():
+        return None
+    return os.path.dirname(scripts_path)
+
+
+def get_madengine_root() -> Path:
+    """Return the madengine package root directory (where madengine/__init__.py lives).
+
+    Returns:
+        Path to the madengine package root.
+    """
+    import madengine
+    return Path(madengine.__file__).resolve().parent
diff --git a/src/madengine/utils/run_details.py b/src/madengine/utils/run_details.py
new file mode 100644
index 00000000..c21776b0
--- /dev/null
+++ b/src/madengine/utils/run_details.py
@@ -0,0 +1,31 @@
+"""Shared helpers for run-details / perf row construction.
+
+Centralizes pipeline and build_number from environment so container_runner
+and deployment (e.g. kubernetes) use the same values. Also provides
+flatten_tags_in_place for consistent tags handling.
+"""
+
+import os
+from typing import Any, Dict
+
+
+def get_pipeline() -> str:
+    """Return pipeline name from environment (e.g. CI pipeline)."""
+    return os.environ.get("pipeline", "")
+
+
+def get_build_number() -> str:
+    """Return build number from environment (e.g. CI BUILD_NUMBER)."""
+    return os.environ.get("BUILD_NUMBER", "0")
+
+
+def flatten_tags_in_place(record: Dict[str, Any]) -> None:
+    """Flatten 'tags' in a run-details record in place.
+
+    If record['tags'] is a list, convert it to a comma-separated string.
+    Matches behavior expected by perf CSV and update_perf_csv.
+    """
+    if "tags" not in record:
+        return
+    if isinstance(record["tags"], list):
+        record["tags"] = ",".join(str(item) for item in record["tags"])
diff --git a/tests/e2e/test_build_workflows.py b/tests/e2e/test_build_workflows.py
index 452c8af4..aec87459 100644
--- a/tests/e2e/test_build_workflows.py
+++ b/tests/e2e/test_build_workflows.py
@@ -24,6 +24,7 @@
 from tests.fixtures.utils import BASE_DIR, MODEL_DIR
 from tests.fixtures.utils import global_data
 from tests.fixtures.utils import clean_test_temp_files
+from tests.fixtures.utils import DEFAULT_CLEAN_FILES
 from tests.fixtures.utils import generate_additional_context_for_machine
 
 
@@ -163,7 +164,7 @@ class TestDiscover:
     """Test the model discovery feature."""
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_static(self, global_data, clean_test_temp_files):
         """
@@ -189,7 +190,7 @@ def test_static(self, global_data, clean_test_temp_files):
             pytest.fail("dummy2/model2 did not run successfully.")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_dynamic(self, global_data, clean_test_temp_files):
         """
@@ -215,7 +216,7 @@ def test_dynamic(self, global_data, clean_test_temp_files):
             pytest.fail("dummy3/model4 did not run successfully.")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_additional_args(self, global_data, clean_test_temp_files):
         """
@@ -245,7 +246,7 @@ def test_additional_args(self, global_data, clean_test_temp_files):
             pytest.fail("dummy2/model2:batch-size=32 did not run successfully.")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_multiple(self, global_data, clean_test_temp_files):
         """
diff --git a/tests/e2e/test_data_workflows.py b/tests/e2e/test_data_workflows.py
index 93709051..42e564e6 100644
--- a/tests/e2e/test_data_workflows.py
+++ b/tests/e2e/test_data_workflows.py
@@ -18,6 +18,7 @@
 from tests.fixtures.utils import BASE_DIR, MODEL_DIR
 from tests.fixtures.utils import global_data
 from tests.fixtures.utils import clean_test_temp_files
+from tests.fixtures.utils import DEFAULT_CLEAN_FILES
 from madengine.core.dataprovider import Data
 
 
@@ -85,7 +86,7 @@ def test_reorder_data_provider_config(self):
             os.unlink(temp_file_path)
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_local_data_provider_runs_successfully(
         self, global_data, clean_test_temp_files
diff --git a/tests/e2e/test_execution_features.py b/tests/e2e/test_execution_features.py
index 7a0ac120..fae4cb70 100644
--- a/tests/e2e/test_execution_features.py
+++ b/tests/e2e/test_execution_features.py
@@ -15,6 +15,12 @@
 from tests.fixtures.utils import clean_test_temp_files
 from tests.fixtures.utils import is_nvidia
 from tests.fixtures.utils import generate_additional_context_for_machine
+from tests.fixtures.utils import (
+    DEFAULT_CLEAN_FILES,
+    build_run_command,
+    get_run_live_log_path,
+    get_timeout_seconds_from_log,
+)
 
 
 
@@ -25,180 +31,35 @@
 class TestCustomTimeoutsFunctionality:
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
-    def test_default_model_timeout_2hrs(self, global_data, clean_test_temp_files):
-        """
-        default model timeout is 2 hrs
-        This test only checks if the timeout is set; it does not actually time the model.
-        """
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy"
-        )
-
-        regexp = re.compile(r"⏰ Setting timeout to ([0-9]*) seconds.")
-        foundTimeout = None
-        with open(
-            os.path.join(
-                BASE_DIR,
-                "dummy_dummy.ubuntu."
-                + ("amd" if not is_nvidia() else "nvidia")
-                + ".run.live.log",
-            ),
-            "r",
-        ) as f:
-            while True:
-                line = f.readline()
-                if not line:
-                    break
-                match = regexp.search(line)
-                if match:
-                    foundTimeout = match.groups()[0]
-        if foundTimeout != "7200":
-            pytest.fail("default model timeout is not 2 hrs (" + str(foundTimeout) + "s).")
-
-    @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
-    )
-    def test_can_override_timeout_in_model(self, global_data, clean_test_temp_files):
-        """
-        timeout can be overridden in model
-        This test only checks if the timeout is set; it does not actually time the model.
-        """
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_timeout"
-        )
-
-        regexp = re.compile(r"⏰ Setting timeout to ([0-9]*) seconds.")
-        foundTimeout = None
-        with open(
-            os.path.join(
-                BASE_DIR,
-                "dummy_timeout_dummy.ubuntu."
-                + ("amd" if not is_nvidia() else "nvidia")
-                + ".run.live.log",
-            ),
-            "r",
-        ) as f:
-            while True:
-                line = f.readline()
-                if not line:
-                    break
-                match = regexp.search(line)
-                if match:
-                    foundTimeout = match.groups()[0]
-        if foundTimeout != "360":
-            pytest.fail(
-                "timeout in models.json (360s) could not override actual timeout ("
-                + str(foundTimeout)
-                + "s)."
-            )
-
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "tags,log_base_name,expected_seconds,extra_args",
+        [
+            ("dummy", "dummy_dummy", "7200", ""),
+            ("dummy_timeout", "dummy_timeout_dummy", "360", ""),
+            ("dummy", "dummy_dummy", "120", "--timeout 120"),
+            ("dummy_timeout", "dummy_timeout_dummy", "120", "--timeout 120"),
+        ],
     )
-    def test_can_override_timeout_in_commandline(
-        self, global_data, clean_test_temp_files
-    ):
-        """
-        timeout command-line argument overrides default timeout
-        This test only checks if the timeout is set; it does not actually time the model.
-        """
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy --timeout 120"
-        )
-
-        regexp = re.compile(r"⏰ Setting timeout to ([0-9]*) seconds.")
-        foundTimeout = None
-        with open(
-            os.path.join(
-                BASE_DIR,
-                "dummy_dummy.ubuntu."
-                + ("amd" if not is_nvidia() else "nvidia")
-                + ".run.live.log",
-            ),
-            "r",
-        ) as f:
-            while True:
-                line = f.readline()
-                if not line:
-                    break
-                match = regexp.search(line)
-                if match:
-                    foundTimeout = match.groups()[0]
-        if foundTimeout != "120":
-            pytest.fail(
-                "timeout command-line argument (120s) could not override actual timeout ("
-                + foundTimeout
-                + "s)."
-            )
-
-    @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
-    )
-    def test_commandline_timeout_overrides_model_timeout(
-        self, global_data, clean_test_temp_files
+    def test_timeout_value_in_log(
+        self, global_data, clean_test_temp_files, tags, log_base_name, expected_seconds, extra_args
     ):
         """
-        timeout command-line argument overrides model timeout
-        This test only checks if the timeout is set; it does not actually time the model.
+        Timeout is set as expected (default 2h, model override, CLI override).
+        Only checks the value in the log; does not actually time the model.
         """
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_timeout --timeout 120"
-        )
-
-        regexp = re.compile(r"⏰ Setting timeout to ([0-9]*) seconds.")
-        foundTimeout = None
-        with open(
-            os.path.join(
-                BASE_DIR,
-                "dummy_timeout_dummy.ubuntu."
-                + ("amd" if not is_nvidia() else "nvidia")
-                + ".run.live.log",
-            ),
-            "r",
-        ) as f:
-            while True:
-                line = f.readline()
-                if not line:
-                    break
-                match = regexp.search(line)
-                if match:
-                    foundTimeout = match.groups()[0]
-        if foundTimeout != "120":
+        global_data["console"].sh(build_run_command(tags, extra_args=extra_args))
+        log_path = get_run_live_log_path(log_base_name)
+        found = get_timeout_seconds_from_log(log_path)
+        if found != expected_seconds:
             pytest.fail(
-                "timeout in command-line argument (360s) could not override model.json timeout ("
-                + foundTimeout
-                + "s)."
+                f"expected timeout {expected_seconds}s in log, got {found}s (log: {log_path})."
             )
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_timeout_in_commandline_timesout_correctly(
@@ -226,7 +87,7 @@ def test_timeout_in_commandline_timesout_correctly(
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_timeout_in_model_timesout_correctly(
@@ -263,7 +124,7 @@ class TestDebuggingFunctionality:
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_keepAlive_keeps_docker_alive(self, global_data, clean_test_temp_files):
@@ -301,7 +162,7 @@ def test_keepAlive_keeps_docker_alive(self, global_data, clean_test_temp_files):
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_no_keepAlive_does_not_keep_docker_alive(
@@ -342,7 +203,7 @@ def test_no_keepAlive_does_not_keep_docker_alive(
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_keepAlive_preserves_model_dir(self, global_data, clean_test_temp_files):
@@ -374,7 +235,7 @@ def test_keepAlive_preserves_model_dir(self, global_data, clean_test_temp_files)
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_keepModelDir_keeps_model_dir(self, global_data, clean_test_temp_files):
@@ -398,7 +259,7 @@ def test_keepModelDir_keeps_model_dir(self, global_data, clean_test_temp_files):
 
     @pytest.mark.parametrize(
         "clean_test_temp_files",
-        [["perf.csv", "perf.html", "run_directory"]],
+        [DEFAULT_CLEAN_FILES + ["run_directory"]],
         indirect=True,
     )
     def test_no_keepModelDir_does_not_keep_model_dir(
@@ -432,7 +293,7 @@ class TestLiveOutputFunctionality:
     """Test the live output functionality."""
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_default_silent_run(self, global_data, clean_test_temp_files):
         """
@@ -458,7 +319,7 @@ def test_default_silent_run(self, global_data, clean_test_temp_files):
             pytest.fail("default run is not silent")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_liveOutput_prints_output_to_screen(
         self, global_data, clean_test_temp_files
diff --git a/tests/e2e/test_profiling_workflows.py b/tests/e2e/test_profiling_workflows.py
index 907dcb7c..bd352d6c 100644
--- a/tests/e2e/test_profiling_workflows.py
+++ b/tests/e2e/test_profiling_workflows.py
@@ -19,6 +19,7 @@
 from tests.fixtures.utils import (
     BASE_DIR,
     MODEL_DIR,
+    DEFAULT_CLEAN_FILES,
     global_data,
     clean_test_temp_files,
     requires_gpu,
@@ -270,7 +271,7 @@ def test_miopen_trace_runs_correctly(self, global_data, clean_test_temp_files):
 
     @pytest.mark.skipif(is_nvidia(), reason="test does not run on NVIDIA")
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_rccl_trace_runs_correctly(self, global_data, clean_test_temp_files):
         """
@@ -311,7 +312,7 @@ def test_rccl_trace_runs_correctly(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_toolA_runs_correctly(self, global_data, clean_test_temp_files):
         """
@@ -357,7 +358,7 @@ def test_toolA_runs_correctly(self, global_data, clean_test_temp_files):
             pytest.fail("all strings were not matched in toolA test.")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_stackable_design_runs_correctly(self, global_data, clean_test_temp_files):
         """
diff --git a/tests/e2e/test_run_workflows.py b/tests/e2e/test_run_workflows.py
index 5b3f894d..bf770891 100644
--- a/tests/e2e/test_run_workflows.py
+++ b/tests/e2e/test_run_workflows.py
@@ -21,6 +21,11 @@
 from tests.fixtures.utils import get_num_cpus
 from tests.fixtures.utils import requires_gpu
 from tests.fixtures.utils import generate_additional_context_for_machine
+from tests.fixtures.utils import (
+    DEFAULT_CLEAN_FILES,
+    build_run_command,
+    assert_model_in_perf_csv,
+)
 
 from madengine.core.context import Context
 
@@ -32,72 +37,33 @@
 class TestContexts:
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files",
+        [DEFAULT_CLEAN_FILES + ["ctx_test"]],
+        indirect=True,
     )
-    def test_dockerfile_picked_on_detected_context_0(
-        self, global_data, clean_test_temp_files
-    ):
-        """
-        picks dockerfile based on detected context and only those
-        """
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_ctxtest "
-        )
-
-        success = False
-        with open(os.path.join(BASE_DIR, "perf.csv"), "r") as csv_file:
-            csv_reader = csv.DictReader(csv_file)
-            for row in csv_reader:
-                if row["model"] == "dummy_ctxtest":
-                    if row["status"] == "SUCCESS" and row["performance"] == "0":
-                        success = True
-                    else:
-                        pytest.fail("model in perf_test.csv did not run successfully.")
-        if not success:
-            pytest.fail("model did not pick correct context.")
-
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html", "ctx_test"]], indirect=True
+        "ctx_value,expected_performance",
+        [(None, "0"), ("1", "1")],
     )
-    def test_dockerfile_picked_on_detected_context_1(
-        self, global_data, clean_test_temp_files
+    def test_dockerfile_picked_on_detected_context(
+        self, global_data, clean_test_temp_files, ctx_value, expected_performance
     ):
         """
-        picks dockerfile based on detected context and only those
-        """
-        with open(os.path.join(BASE_DIR, "ctx_test"), "w") as ctx_test_file:
-            print("1", file=ctx_test_file)
-
-        global_data["console"].sh(
-            "cd "
-            + BASE_DIR
-            + "; "
-            + "MODEL_DIR="
-            + MODEL_DIR
-            + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_ctxtest "
+        Picks dockerfile based on detected context (no ctx_test file -> 0; ctx_test=1 -> 1).
+        """
+        if ctx_value is not None:
+            with open(os.path.join(BASE_DIR, "ctx_test"), "w") as f:
+                print(ctx_value, file=f)
+        global_data["console"].sh(build_run_command("dummy_ctxtest"))
+        assert_model_in_perf_csv(
+            os.path.join(BASE_DIR, "perf.csv"),
+            "dummy_ctxtest",
+            status="SUCCESS",
+            performance=expected_performance,
         )
 
-        success = False
-        with open(os.path.join(BASE_DIR, "perf.csv"), "r") as csv_file:
-            csv_reader = csv.DictReader(csv_file)
-            for row in csv_reader:
-                if row["model"] == "dummy_ctxtest":
-                    if row["status"] == "SUCCESS" and row["performance"] == "1":
-                        success = True
-                    else:
-                        pytest.fail("model in perf_test.csv did not run successfully.")
-        if not success:
-            pytest.fail("model did not pick correct context.")
-
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html", "ctx_test"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES + ["ctx_test"]], indirect=True
     )
     def test_all_dockerfiles_matching_context_executed(
         self, global_data, clean_test_temp_files
@@ -146,7 +112,7 @@ def test_dockerfile_executed_if_contexts_keys_are_not_common(self):
         pass
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_can_override_context_with_additionalContext_commandline(
         self, global_data, clean_test_temp_files
@@ -245,7 +211,7 @@ def test_additionalContext_commandline_overrides_additionalContextFile(
             pytest.fail("model did not pick correct context.")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_base_docker_override(self, global_data, clean_test_temp_files):
         """
@@ -277,7 +243,7 @@ def test_base_docker_override(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_docker_image_override(self, global_data, clean_test_temp_files):
         """
@@ -309,7 +275,7 @@ def test_docker_image_override(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_docker_env_vars_override(self, global_data, clean_test_temp_files):
         """
@@ -340,7 +306,7 @@ def test_docker_env_vars_override(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_docker_mounts_mount_host_paths_in_docker_container(
         self, global_data, clean_test_temp_files
@@ -489,7 +455,7 @@ def test_gpu_product_name_matches_arch(self):
 class TestTagsFunctionality:
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_can_select_model_subset_with_commandline_tag_argument(
         self, global_data, clean_test_temp_files
@@ -516,7 +482,7 @@ def test_can_select_model_subset_with_commandline_tag_argument(
             pytest.fail("dummy2 tag not selected with commandline --tags argument")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_all_models_matching_any_tag_selected_with_multiple_tags(
         self, global_data, clean_test_temp_files
@@ -546,7 +512,7 @@ def test_all_models_matching_any_tag_selected_with_multiple_tags(
             pytest.fail("dummy3 tag not selected with commandline --tags argument")
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_model_names_are_automatically_tags(
         self, global_data, clean_test_temp_files
diff --git a/tests/e2e/test_scripting_workflows.py b/tests/e2e/test_scripting_workflows.py
index 682eb53a..0f44d7d3 100644
--- a/tests/e2e/test_scripting_workflows.py
+++ b/tests/e2e/test_scripting_workflows.py
@@ -17,6 +17,7 @@
 from tests.fixtures.utils import BASE_DIR, MODEL_DIR
 from tests.fixtures.utils import global_data
 from tests.fixtures.utils import clean_test_temp_files
+from tests.fixtures.utils import DEFAULT_CLEAN_FILES
 from tests.fixtures.utils import is_nvidia
 from tests.fixtures.utils import generate_additional_context_for_machine
 
@@ -24,7 +25,7 @@
 class TestPrePostScriptsFunctionality:
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_pre_scripts_run_before_model(self, global_data, clean_test_temp_files):
         """
@@ -64,7 +65,7 @@ def test_pre_scripts_run_before_model(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_post_scripts_run_after_model(self, global_data, clean_test_temp_files):
         """
@@ -104,7 +105,7 @@ def test_post_scripts_run_after_model(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_pre_scripts_accept_arguments(self, global_data, clean_test_temp_files):
         """
@@ -144,7 +145,7 @@ def test_pre_scripts_accept_arguments(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_post_scripts_accept_arguments(self, global_data, clean_test_temp_files):
         """
@@ -184,7 +185,7 @@ def test_post_scripts_accept_arguments(self, global_data, clean_test_temp_files)
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_both_pre_and_post_scripts_run_before_and_after_model(
         self, global_data, clean_test_temp_files
@@ -249,7 +250,7 @@ def test_both_pre_and_post_scripts_run_before_and_after_model(
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_all_pre_scripts_run_in_order(self, global_data, clean_test_temp_files):
         """
@@ -297,7 +298,7 @@ def test_all_pre_scripts_run_in_order(self, global_data, clean_test_temp_files):
             )
 
     @pytest.mark.parametrize(
-        "clean_test_temp_files", [["perf.csv", "perf.html"]], indirect=True
+        "clean_test_temp_files", [DEFAULT_CLEAN_FILES], indirect=True
     )
     def test_all_post_scripts_run_in_order(self, global_data, clean_test_temp_files):
         """
diff --git a/tests/fixtures/dummy/credential.json b/tests/fixtures/dummy/credential.json
index 04e514b5..67675fef 100644
--- a/tests/fixtures/dummy/credential.json
+++ b/tests/fixtures/dummy/credential.json
@@ -1,21 +1,42 @@
 {
-  "NAS_NODES": [
+    "ROCmSoftwarePlatform": {
+      "username": "your-username",
+      "password": "your-token-or-password"
+    },
+    "PUBLIC_GITHUB_ROCM_KEY": {
+      "username": "your-github-username",
+      "password": "your-github-token"
+    },
+    "gerrit-git.amd.com": {
+      "username": "your-gerrit-username",
+      "ssh_key_file": "id_rsa"
+    },
+    "AMD_GITHUB": {
+      "username": "your-github-amd-username",
+      "password": "your-credential"
+    },
+    "MAD_AWS_S3": {
+      "USERNAME": "your-aws-access-key",
+      "PASSWORD": "your-aws-secret-key"
+    },
+    "NAS_NODES": [
       {
-          "NAME": "default",
-          "HOST": "localhost",
-          "PORT": "22",
-          "USERNAME": "admin",
-          "PASSWORD": "admin"
+        "NAME": "default",
+        "HOST": "localhost",
+        "PORT": "22",
+        "USERNAME": "username",
+        "PASSWORD": "password"
       }
-  ],
-  "MAD_AWS_S3": {
-      "USERNAME": "admin",
-      "PASSWORD": "admin"
-  },
-  "MAD_MINIO": {
-      "USERNAME": "admin-access-key",
-      "PASSWORD": "admin-secret-key",
-      "MINIO_ENDPOINT": "http://127.0.1:9000",
-      "AWS_ENDPOINT_URL_S3": "http://127.0.1:9000"
-  }    
-}
\ No newline at end of file
+    ],
+    "MAD_MINIO": {
+      "USERNAME": "your-minio-username",
+      "PASSWORD": "your-minio-password",
+      "MINIO_ENDPOINT": "http://localhost:9000",
+      "AWS_ENDPOINT_URL_S3": "http://localhost:9000"
+    },
+    "dockerhub": {
+      "repository": "your-org/your-repo",
+      "username": "your-docker-username",
+      "password": "your-docker-token"
+    }
+}
diff --git a/tests/fixtures/dummy/models.json b/tests/fixtures/dummy/models.json
index 28b3db7f..fab99fae 100644
--- a/tests/fixtures/dummy/models.json
+++ b/tests/fixtures/dummy/models.json
@@ -264,6 +264,20 @@
     ],
     "args": ""
   },
+  {
+    "name": "dummy_torchrun_multi",
+    "dockerfile": "docker/dummy_torchrun",
+    "scripts": "scripts/dummy_torchrun/run_multi.sh",
+    "n_gpus": "1",
+    "owner": "mad.support@amd.com",
+    "training_precision": "",
+    "tags": [
+      "dummies",
+      "dummy_distributed"
+    ],
+    "args": "",
+    "multiple_results": "perf_dummy_torchrun.csv"
+  },
   {
     "name": "dummy_torchrun_helper",
     "dockerfile": "docker/dummy_torchrun",
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_multi.sh b/tests/fixtures/dummy/scripts/dummy_torchrun/run_multi.sh
new file mode 100644
index 00000000..b6093ebe
--- /dev/null
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_multi.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Bash wrapper for dummy_torchrun_multi: torchrun + multiple_results CSV output
+# Uses MAD_MULTI_NODE_RUNNER for torchrun launcher; writes perf_dummy_torchrun.csv
+#
+
+set -e
+
+echo "========================================================================"
+echo "madengine Torchrun Multi (multiple_results) Wrapper Script"
+echo "========================================================================"
+
+# Get current directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Determine multi-node runner to use
+if [ -z "$MAD_MULTI_NODE_RUNNER" ]; then
+    N_GPUS="${MAD_RUNTIME_NGPUS:-1}"
+    echo "ℹ️  MAD_MULTI_NODE_RUNNER not set, using standalone torchrun"
+    echo "ℹ️  Using $N_GPUS GPUs"
+    MAD_MULTI_NODE_RUNNER="torchrun --standalone --nproc_per_node=$N_GPUS"
+fi
+
+echo "========================================================================"
+echo "Launcher Command:"
+echo "$MAD_MULTI_NODE_RUNNER"
+echo "========================================================================"
+
+# Create MIOpen cache directory if MIOPEN_USER_DB_PATH is set
+if [ -n "$MIOPEN_USER_DB_PATH" ]; then
+    MIOPEN_BASE_DIR=$(dirname "$MIOPEN_USER_DB_PATH")
+    mkdir -p "$MIOPEN_BASE_DIR"
+    echo "ℹ️  MIOpen cache directory: $MIOPEN_USER_DB_PATH"
+fi
+
+# Execute the Python script (multiple_results variant)
+echo "Executing: $MAD_MULTI_NODE_RUNNER run_torchrun_multi.py"
+$MAD_MULTI_NODE_RUNNER run_torchrun_multi.py
+PYTHON_EXIT_CODE=$?
+
+echo "========================================================================"
+echo "Training script completed with exit code: $PYTHON_EXIT_CODE"
+echo "========================================================================"
+
+if [ $PYTHON_EXIT_CODE -ne 0 ]; then
+    echo "ERROR: Training script failed with exit code $PYTHON_EXIT_CODE"
+    exit $PYTHON_EXIT_CODE
+fi
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py
new file mode 100644
index 00000000..dc09b9cf
--- /dev/null
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+"""
+PyTorch Distributed Training Benchmark for madengine (multiple_results variant)
+
+Same as run_torchrun.py but writes a CSV file for multiple_results collection:
+- perf_dummy_torchrun.csv with columns: model,temperature,performance,metric,test_duration
+- Compatible with madengine multiple_results aggregation
+
+Usage:
+  torchrun --standalone --nproc_per_node=1 run_torchrun_multi.py
+  torchrun --standalone --nproc_per_node=8 run_torchrun_multi.py
+"""
+
+import os
+import sys
+import time
+import socket
+import csv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+
+# Configuration
+BATCH_SIZE = 128  # Per-GPU batch size
+NUM_EPOCHS = 5
+NUM_BATCHES = 100  # Number of synthetic batches per epoch
+IMAGE_SIZE = 224
+NUM_CLASSES = 1000
+MULTI_RESULTS_CSV = "perf_dummy_torchrun.csv"
+
+# Get distributed environment variables (set by torchrun)
+rank = int(os.environ.get("RANK", 0))
+local_rank = int(os.environ.get("LOCAL_RANK", 0))
+world_size = int(os.environ.get("WORLD_SIZE", 1))
+master_addr = os.environ.get("MASTER_ADDR", "localhost")
+master_port = os.environ.get("MASTER_PORT", "29500")
+
+
+def print_header():
+    """Print benchmark header"""
+    print("=" * 70)
+    print("madengine PyTorch Distributed Training Benchmark (multi_results)")
+    print("=" * 70)
+    print(f"Hostname: {socket.gethostname()}")
+    print(f"Rank: {rank}/{world_size}")
+    print(f"Local Rank (GPU): {local_rank}")
+    if world_size > 1:
+        print(f"Master: {master_addr}:{master_port}")
+    print(f"\nConfiguration:")
+    print(f"  Batch Size (per GPU): {BATCH_SIZE}")
+    print(f"  Global Batch Size: {BATCH_SIZE * world_size}")
+    print(f"  Epochs: {NUM_EPOCHS}")
+    print(f"  Batches per Epoch: {NUM_BATCHES}")
+    print(f"  Image Size: {IMAGE_SIZE}x{IMAGE_SIZE}")
+    print(f"  Num Classes: {NUM_CLASSES}")
+    print("=" * 70)
+
+
+class SimpleCNN(nn.Module):
+    """Simple CNN model for benchmarking"""
+    def __init__(self, num_classes=1000):
+        super(SimpleCNN, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        
+        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(128)
+        
+        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
+        self.bn3 = nn.BatchNorm2d(256)
+        
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(256, num_classes)
+    
+    def forward(self, x):
+        x = self.pool(F.relu(self.bn1(self.conv1(x))))
+        x = self.pool(F.relu(self.bn2(self.conv2(x))))
+        x = self.pool(F.relu(self.bn3(self.conv3(x))))
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+        return x
+
+
+def generate_synthetic_batch(batch_size, device):
+    """Generate synthetic data for benchmarking"""
+    images = torch.randn(batch_size, 3, IMAGE_SIZE, IMAGE_SIZE, device=device)
+    labels = torch.randint(0, NUM_CLASSES, (batch_size,), device=device)
+    return images, labels
+
+
+def train_epoch(model, optimizer, criterion, epoch, device):
+    """Train for one epoch with node-local throughput measurement"""
+    model.train()
+    epoch_start = time.time()
+    total_samples = 0
+    total_loss = 0.0
+    
+    for batch_idx in range(NUM_BATCHES):
+        batch_start = time.time()
+        
+        # Generate synthetic data
+        images, labels = generate_synthetic_batch(BATCH_SIZE, device)
+        
+        # Forward pass
+        optimizer.zero_grad()
+        outputs = model(images)
+        loss = criterion(outputs, labels)
+        
+        # Backward pass (gradients are automatically synchronized across GPUs)
+        loss.backward()
+        
+        # Update weights
+        optimizer.step()
+        
+        batch_time = time.time() - batch_start
+        total_samples += BATCH_SIZE
+        total_loss += loss.item()
+        
+        # Print progress from local rank 0 on each node
+        if local_rank == 0 and (batch_idx + 1) % 20 == 0:
+            avg_loss = total_loss / (batch_idx + 1)
+            throughput = BATCH_SIZE / batch_time  # Local throughput
+            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                  f"Loss: {loss.item():.4f} "
+                  f"Throughput: {throughput:.2f} samples/sec (local)")
+    
+    epoch_time = time.time() - epoch_start
+    avg_loss = total_loss / NUM_BATCHES
+    
+    # ========================================================================
+    # Node-Local Throughput Measurement
+    # ========================================================================
+    local_samples = NUM_BATCHES * BATCH_SIZE
+    local_gpu_throughput = local_samples / epoch_time
+    local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
+    node_throughput = local_gpu_throughput * local_world_size
+    
+    metrics = {
+        'avg_loss': avg_loss,
+        'node_throughput': node_throughput,
+        'epoch_time': epoch_time,
+        'local_world_size': local_world_size
+    }
+    
+    return metrics
+
+
+def main():
+    """Main training function"""
+    test_start_time = time.time()
+    
+    print_header()
+    
+    # Create per-process MIOpen cache directory to avoid database conflicts
+    if "MIOPEN_USER_DB_PATH" in os.environ:
+        miopen_template = os.environ["MIOPEN_USER_DB_PATH"]
+        miopen_path = miopen_template.replace("${LOCAL_RANK:-0}", str(local_rank)).replace("$LOCAL_RANK", str(local_rank))
+        os.makedirs(miopen_path, exist_ok=True)
+        print(f"[Rank {rank}] ✓ Created MIOpen cache directory: {miopen_path}")
+    
+    # Initialize distributed training
+    if world_size > 1:
+        print(f"\n[Rank {rank}] Initializing distributed process group...")
+        dist.init_process_group(
+            backend="nccl",
+            init_method=f"env://",
+            world_size=world_size,
+            rank=rank
+        )
+        print(f"[Rank {rank}] ✓ Process group initialized")
+        print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
+        print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
+    else:
+        print(f"\n=== Running in Standalone Mode (Single GPU) ===")
+    
+    # Set device
+    if torch.cuda.is_available():
+        num_gpus = torch.cuda.device_count()
+        print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
+        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
+        
+        if local_rank >= num_gpus:
+            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(f"[Rank {rank}] Using cuda:0 instead")
+            device = torch.device("cuda:0")
+        else:
+            device = torch.device(f"cuda:{local_rank}")
+        
+        torch.cuda.set_device(device)
+        print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
+    else:
+        device = torch.device("cpu")
+        print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
+    
+    # Create model
+    print(f"\n[Rank {rank}] Creating model...")
+    model = SimpleCNN(num_classes=NUM_CLASSES).to(device)
+    
+    if world_size > 1:
+        model = DDP(
+            model,
+            device_ids=[local_rank],
+            output_device=local_rank,
+            broadcast_buffers=True,
+            find_unused_parameters=False
+        )
+        print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
+    
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
+    criterion = nn.CrossEntropyLoss()
+    
+    if world_size > 1:
+        dist.barrier(device_ids=[local_rank])
+    
+    local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
+    node_rank = rank // local_world_size if local_world_size > 0 else 0
+    
+    if local_rank == 0:
+        print(f"\n{'='*70}")
+        print(f"[Node {node_rank}] Starting Training")
+        print(f"{'='*70}")
+    
+    # Training loop
+    all_metrics = []
+    for epoch in range(NUM_EPOCHS):
+        metrics = train_epoch(
+            model, optimizer, criterion, epoch, device
+        )
+        all_metrics.append(metrics)
+        
+        if local_rank == 0:
+            print(f"\n[Node {node_rank}] Epoch [{epoch+1}/{NUM_EPOCHS}] Complete:")
+            print(f"  Average Loss: {metrics['avg_loss']:.4f}")
+            print(f"  Node Throughput: {metrics['node_throughput']:.2f} samples/sec")
+            print(f"  Local GPUs: {metrics['local_world_size']}")
+    
+    avg_node_throughput = sum(m['node_throughput'] for m in all_metrics) / len(all_metrics)
+    avg_epoch_time = sum(m['epoch_time'] for m in all_metrics) / len(all_metrics)
+    
+    if world_size > 1:
+        dist.barrier(device_ids=[local_rank])
+    
+    # Node-Local Performance Reporting + multiple_results CSV
+    if local_rank == 0:
+        print(f"\n{'='*70}")
+        print("Node Performance Summary")
+        print(f"{'='*70}")
+        print(f"Node ID: {node_rank}")
+        print(f"Node Hostname: {socket.gethostname()}")
+        print(f"Local GPUs: {local_world_size}")
+        print(f"Node Throughput: {avg_node_throughput:.2f} samples_per_second")
+        print(f"Avg Time per Epoch: {avg_epoch_time:.2f}s")
+        print(f"{'='*70}")
+        
+        print(f"performance: {avg_node_throughput:.2f} samples_per_second", flush=True)
+        print(f"node_id: {node_rank}", flush=True)
+        print(f"local_gpus: {local_world_size}", flush=True)
+        
+        test_duration = time.time() - test_start_time
+        print(f"test_duration: {test_duration:.2f}s", flush=True)
+        sys.stdout.flush()
+
+        # Write multiple_results CSV (model,temperature,performance,metric,test_duration for duration in reports)
+        with open(MULTI_RESULTS_CSV, "w", newline="") as f:
+            writer = csv.writer(f)
+            writer.writerow(["model", "temperature", "performance", "metric", "test_duration"])
+            test_dur_str = f"{test_duration:.2f}s"
+            for i in range(4):
+                # Vary temperature for multiple rows; use throughput for performance
+                writer.writerow([
+                    i + 1,
+                    20 + i * 5,
+                    f"{avg_node_throughput:.2f}",
+                    "samples_per_sec",
+                    test_dur_str,
+                ])
+        print(f"Wrote {MULTI_RESULTS_CSV} for multiple_results collection", flush=True)
+    
+    # Cleanup
+    if world_size > 1:
+        dist.destroy_process_group()
+        if rank == 0:
+            print(f"✓ Process group destroyed")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except Exception as e:
+        print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/tests/fixtures/utils.py b/tests/fixtures/utils.py
index 64b9d50b..21198b90 100644
--- a/tests/fixtures/utils.py
+++ b/tests/fixtures/utils.py
@@ -4,12 +4,13 @@
 """
 
 # built-in modules
+import csv
 import os
+import re
+import shutil
+import subprocess
 import sys
 import json
-import subprocess
-import shutil
-import re
 import pytest
 from unittest.mock import MagicMock
 
@@ -305,10 +306,10 @@ def get_num_cpus() -> int:
         int: Number of CPUs present.
     """
     global _num_cpus_cache
-    
+
     if _num_cpus_cache is not None:
         return _num_cpus_cache
-    
+
     try:
         # Lazy import to avoid collection issues
         from madengine.core.console import Console
@@ -319,3 +320,110 @@ def get_num_cpus() -> int:
         # Default to 64 CPUs if detection fails during collection
         _num_cpus_cache = 64
         return _num_cpus_cache
+
+
+# =============================================================================
+# E2E test helpers (run command, perf CSV, log path, timeout from log)
+# =============================================================================
+
+# Default list of perf output files to clean before/after e2e tests.
+DEFAULT_CLEAN_FILES = ["perf.csv", "perf.html"]
+
+
+def build_run_command(tags, extra_args="", output_file=None, additional_context=None):
+    """Build the base shell command for 'madengine run' e2e tests.
+
+    Args:
+        tags: Model tag(s) string, e.g. 'dummy' or 'dummy_ctxtest'.
+        extra_args: Optional CLI args to append (e.g. '--timeout 120').
+        output_file: Optional output CSV path (e.g. 'perf.csv' or 'perf_test.csv').
+        additional_context: Optional dict or JSON-str for --additional-context.
+            If dict, it is json.dumps'd and wrapped in single quotes.
+
+    Returns:
+        str: Full command to run in shell (cd BASE_DIR; MODEL_DIR=... python3 -m ...).
+    """
+    parts = [
+        "cd " + BASE_DIR + ";",
+        "MODEL_DIR=" + MODEL_DIR,
+        "python3 -m madengine.cli.app run --live-output --tags " + tags,
+    ]
+    cmd = " ".join(parts)
+    if output_file:
+        cmd += " -o " + output_file
+    if additional_context is not None:
+        if isinstance(additional_context, dict):
+            ctx_str = json.dumps(additional_context)
+            cmd += ' --additional-context "' + ctx_str.replace('"', '\\"') + '"'
+        else:
+            ctx_str = additional_context
+            cmd += " --additional-context '" + ctx_str.replace("'", "'\"'\"'") + "'"
+    if extra_args:
+        cmd += " " + extra_args.strip()
+    return cmd
+
+
+def assert_model_in_perf_csv(csv_path, model, status="SUCCESS", performance=None):
+    """Assert that a model row exists in perf CSV with given status and optional performance.
+
+    Fails with pytest.fail if no matching row or row does not match expectations.
+
+    Args:
+        csv_path: Path to the perf CSV file.
+        model: Model name to find (e.g. 'dummy_ctxtest' or 'dummy').
+        status: Expected status string (default 'SUCCESS').
+        performance: Optional expected performance value (compared as string to CSV).
+    """
+    if not os.path.exists(csv_path):
+        pytest.fail(f"Perf CSV not found: {csv_path}")
+    with open(csv_path, "r") as csv_file:
+        csv_reader = csv.DictReader(csv_file)
+        for row in csv_reader:
+            if row.get("model") != model:
+                continue
+            if row.get("status") != status:
+                pytest.fail(
+                    f"model {model} in perf CSV did not run successfully (status={row.get('status')})."
+                )
+            if performance is not None and str(row.get("performance", "")) != str(performance):
+                pytest.fail(
+                    f"model {model} expected performance {performance}, got {row.get('performance')}."
+                )
+            return
+    pytest.fail(f"model {model} not found in perf CSV.")
+
+
+def get_run_live_log_path(log_base_name, suffix=".run.live.log"):
+    """Return the path to a run live log file for the given base name and vendor.
+
+    Args:
+        log_base_name: Base name without extension, e.g. 'dummy_dummy' or 'dummy_timeout_dummy'.
+        suffix: Log file suffix (default '.run.live.log').
+
+    Returns:
+        str: Absolute path under BASE_DIR to the log file.
+    """
+    vendor = "amd" if not is_nvidia() else "nvidia"
+    return os.path.join(BASE_DIR, log_base_name + ".ubuntu." + vendor + suffix)
+
+
+def get_timeout_seconds_from_log(log_path, timeout_regex=None):
+    """Read the first 'Setting timeout to N seconds' line from a run log and return N.
+
+    Args:
+        log_path: Path to the run live log file.
+        timeout_regex: Optional compiled regex; default matches '⏰ Setting timeout to ([0-9]*) seconds.'
+
+    Returns:
+        str or None: The timeout seconds string, or None if not found.
+    """
+    if timeout_regex is None:
+        timeout_regex = re.compile(r"⏰ Setting timeout to ([0-9]*) seconds.")
+    if not os.path.exists(log_path):
+        return None
+    with open(log_path, "r") as f:
+        for line in f:
+            match = timeout_regex.search(line)
+            if match:
+                return match.group(1)
+    return None
diff --git a/tests/integration/test_batch_manifest_integration.py b/tests/integration/test_batch_manifest_integration.py
deleted file mode 100644
index 86841b33..00000000
--- a/tests/integration/test_batch_manifest_integration.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""Integration tests for batch manifest build workflow.
-
-Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
-"""
-
-import json
-import os
-import tempfile
-
-import pytest
-from typer.testing import CliRunner
-
-from madengine.cli import app
-
-
-class TestBatchManifestBuildIntegration:
-    """Integration tests for batch manifest build functionality."""
-
-    def test_batch_manifest_mutually_exclusive_with_tags(self):
-        """Test that --batch-manifest and --tags are mutually exclusive."""
-        runner = CliRunner()
-        
-        # Create a simple batch manifest
-        batch_data = [{"model_name": "dummy", "build_new": True}]
-        
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
-            json.dump(batch_data, f)
-            batch_file = f.name
-        
-        try:
-            # Test that using both options is rejected
-            result = runner.invoke(
-                app,
-                [
-                    "build",
-                    "--batch-manifest", batch_file,
-                    "--tags", "dummy",
-                    "--additional-context", '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
-                ]
-            )
-            
-            # Should fail with mutual exclusivity error
-            assert result.exit_code != 0
-            assert "Cannot specify both --batch-manifest and --tags" in result.output
-        finally:
-            os.unlink(batch_file)
-
diff --git a/tests/integration/test_cli_error_integration.py b/tests/integration/test_cli_error_integration.py
deleted file mode 100644
index b04de1bb..00000000
--- a/tests/integration/test_cli_error_integration.py
+++ /dev/null
@@ -1,281 +0,0 @@
-#!/usr/bin/env python3
-"""
-Unit tests for madengine CLI error handling integration.
-
-Tests the integration of unified error handling in mad_cli.py and
-distributed_orchestrator.py components.
-"""
-
-import pytest
-import json
-import os
-import tempfile
-from unittest.mock import Mock, patch, MagicMock, mock_open
-from rich.console import Console
-
-# Add src to path for imports
-import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
-
-from madengine.core.errors import (
-    ErrorHandler,
-    ConfigurationError,
-    set_error_handler,
-    get_error_handler,
-    create_error_context
-)
-
-
-class TestMadCLIErrorIntegration:
-    """Test mad_cli.py error handling integration."""
-    
-    @patch('madengine.cli.utils.Console')
-    def test_setup_logging_creates_error_handler(self, mock_console_class):
-        """Test that setup_logging initializes the unified error handler."""
-        from madengine.cli import setup_logging
-        
-        mock_console = Mock(spec=Console)
-        mock_console_class.return_value = mock_console
-        
-        # Clear any existing global error handler
-        set_error_handler(None)
-        
-        # Call setup_logging
-        setup_logging(verbose=True)
-        
-        # Verify error handler was set
-        handler = get_error_handler()
-        assert handler is not None
-        assert isinstance(handler, ErrorHandler)
-        assert handler.verbose is True
-    
-    def test_setup_logging_verbose_flag(self):
-        """Test that verbose flag is properly passed to error handler."""
-        from madengine.cli import setup_logging
-        
-        # Test with verbose=False
-        setup_logging(verbose=False)
-        handler = get_error_handler()
-        assert handler.verbose is False
-        
-        # Test with verbose=True
-        setup_logging(verbose=True)
-        handler = get_error_handler()
-        assert handler.verbose is True
-    
-    def test_build_command_error_handling(self):
-        """Test that build command imports and can use unified error handling."""
-        from madengine.cli import ExitCode
-        
-        # Test that the import works and error handling is available
-        try:
-            # This tests the actual import in mad_cli.py
-            from madengine.cli import setup_logging
-            
-            # Verify error handler can be set up
-            setup_logging(verbose=False)
-            
-            # Verify handle_error can be imported in the context where it's used
-            from madengine.core.errors import handle_error, create_error_context
-            
-            # Create a test error to ensure the system works
-            error = Exception("Test build error")
-            context = create_error_context(
-                operation="build", 
-                phase="build",
-                component="CLI"
-            )
-            
-            # This should not raise an exception
-            handle_error(error, context=context)
-            
-        except ImportError as e:
-            pytest.fail(f"Error handling integration failed: {e}")
-    
-    @patch('madengine.cli.utils.console')
-    def test_cli_error_display_consistency(self, mock_console):
-        """Test that CLI errors are displayed consistently through unified handler."""
-        from madengine.cli import setup_logging
-        
-        # Setup logging to initialize error handler
-        setup_logging(verbose=False)
-        
-        # Get the initialized error handler
-        handler = get_error_handler()
-        
-        # Create a test error
-        error = ConfigurationError(
-            "Invalid configuration",
-            context=create_error_context(
-                operation="cli_command",
-                component="CLI",
-                phase="validation"
-            )
-        )
-        
-        # Handle the error through the unified system
-        handler.handle_error(error)
-        
-        # The error should be displayed through Rich console
-        # (Note: The actual console calls depend on the handler implementation)
-        assert handler.console is not None
-
-
-class TestErrorHandlingWorkflow:
-    """Test complete error handling workflow across components."""
-    
-    @patch('madengine.cli.utils.console')
-    def test_end_to_end_error_flow(self, mock_console):
-        """Test complete error flow from CLI through orchestrator."""
-        from madengine.cli import setup_logging
-        from madengine.core.errors import ValidationError
-        
-        # Setup unified error handling
-        setup_logging(verbose=True)
-        handler = get_error_handler()
-        
-        # Create an error that might occur in the orchestrator
-        orchestrator_error = ValidationError(
-            "Invalid model tag format",
-            context=create_error_context(
-                operation="model_discovery",
-                component="DistributedOrchestrator",
-                phase="validation",
-                model_name="invalid::tag"
-            ),
-            suggestions=[
-                "Use format: model_name:version",
-                "Check model name contains only alphanumeric characters"
-            ]
-        )
-        
-        # Handle the error through the unified system
-        handler.handle_error(orchestrator_error)
-        
-        # Verify the error was processed
-        assert handler.console is not None
-        assert orchestrator_error.context.operation == "model_discovery"
-        assert orchestrator_error.context.component == "DistributedOrchestrator"
-        assert len(orchestrator_error.suggestions) == 2
-    
-    def test_error_logging_integration(self):
-        """Test that errors are properly logged with structured data."""
-        from madengine.cli import setup_logging
-        from madengine.core.errors import BuildError
-        
-        # Setup logging
-        setup_logging(verbose=False)
-        handler = get_error_handler()
-        
-        # Create a build error with rich context
-        build_error = BuildError(
-            "Docker build failed",
-            context=create_error_context(
-                operation="docker_build",
-                component="DockerBuilder",
-                phase="build",
-                model_name="test_model",
-                additional_info={"dockerfile": "Dockerfile.ubuntu.amd"}
-            ),
-            suggestions=["Check Dockerfile syntax", "Verify base image availability"]
-        )
-        
-        # Mock the logger to capture log calls
-        with patch.object(handler, 'logger') as mock_logger:
-            handler.handle_error(build_error)
-            
-            # Verify logging was called with structured data
-            mock_logger.error.assert_called_once()
-            log_call_args = mock_logger.error.call_args
-            
-            # Check the log message
-            assert "build: Docker build failed" in log_call_args[0][0]
-            
-            # Check the extra structured data
-            extra_data = log_call_args[1]['extra']
-            assert extra_data['context']['operation'] == "docker_build"
-            assert extra_data['context']['component'] == "DockerBuilder"
-            assert extra_data['recoverable'] is False  # BuildError is not recoverable
-            assert len(extra_data['suggestions']) == 2
-    
-    def test_error_context_serialization(self):
-        """Test that error contexts can be serialized for logging and debugging."""
-        from madengine.core.errors import RuntimeError
-        
-        context = create_error_context(
-            operation="model_execution",
-            component="ContainerRunner",
-            phase="runtime",
-            model_name="llama2",
-            node_id="worker-node-01",
-            file_path="/models/llama2/run.sh",
-            additional_info={
-                "container_id": "abc123",
-                "gpu_count": 2,
-                "timeout": 3600
-            }
-        )
-        
-        error = RuntimeError(
-            "Model execution failed with exit code 1",
-            context=context
-        )
-        
-        # Test that context can be serialized
-        context_dict = error.context.__dict__
-        json_str = json.dumps(context_dict, default=str)
-        
-        # Verify all context information is in the serialized form
-        assert "model_execution" in json_str
-        assert "ContainerRunner" in json_str
-        assert "runtime" in json_str
-        assert "llama2" in json_str
-        assert "worker-node-01" in json_str
-        assert "abc123" in json_str
-
-
-class TestErrorHandlingPerformance:
-    """Test performance aspects of error handling."""
-    
-    def test_error_handler_initialization_performance(self):
-        """Test that error handler initialization is fast."""
-        import time
-        from madengine.core.errors import ErrorHandler
-        from rich.console import Console
-        
-        start_time = time.time()
-        
-        # Create multiple error handlers
-        for _ in range(100):
-            console = Console()
-            handler = ErrorHandler(console=console, verbose=False)
-        
-        end_time = time.time()
-        
-        # Should be able to create 100 handlers in under 1 second
-        assert end_time - start_time < 1.0
-    
-    def test_error_context_creation_performance(self):
-        """Test that error context creation is efficient."""
-        import time
-        
-        start_time = time.time()
-        
-        # Create many error contexts
-        for i in range(1000):
-            context = create_error_context(
-                operation=f"operation_{i}",
-                component=f"Component_{i}",
-                phase="test",
-                model_name=f"model_{i}",
-                additional_info={"iteration": i}
-            )
-        
-        end_time = time.time()
-        
-        # Should be able to create 1000 contexts in under 0.1 seconds
-        assert end_time - start_time < 0.1
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
\ No newline at end of file
diff --git a/tests/integration/test_error_system_integration.py b/tests/integration/test_error_system_integration.py
deleted file mode 100644
index bf704da2..00000000
--- a/tests/integration/test_error_system_integration.py
+++ /dev/null
@@ -1,282 +0,0 @@
-#!/usr/bin/env python3
-"""
-Integration tests for madengine unified error handling system.
-
-This test file focuses on testing the integration without requiring
-optional dependencies like paramiko, ansible-runner, or kubernetes.
-"""
-
-import pytest
-import json
-from unittest.mock import Mock, patch, MagicMock
-
-# Add src to path for imports
-import sys
-import os
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
-
-from madengine.core.errors import (
-    ErrorHandler,
-    MADEngineError,
-    ValidationError,
-    ConfigurationError,
-    RunnerError,
-    set_error_handler,
-    get_error_handler,
-    create_error_context
-)
-
-
-class TestUnifiedErrorSystem:
-    """Test the unified error handling system integration."""
-    
-    def test_error_system_basic_functionality(self):
-        """Test basic error system functionality works."""
-        # Create error handler
-        mock_console = Mock()
-        handler = ErrorHandler(console=mock_console, verbose=False)
-        
-        # Create error with context
-        context = create_error_context(
-            operation="test_operation",
-            component="TestComponent",
-            model_name="test_model"
-        )
-        
-        error = ValidationError("Test validation error", context=context)
-        
-        # Handle the error
-        handler.handle_error(error)
-        
-        # Verify it was handled
-        mock_console.print.assert_called_once()
-        
-        # Verify error structure
-        assert error.context.operation == "test_operation"
-        assert error.context.component == "TestComponent"
-        assert error.recoverable is True
-    
-    def test_mad_cli_error_handler_setup(self):
-        """Test that mad_cli properly sets up error handling."""
-        from madengine.cli import setup_logging
-        
-        # Clear existing handler
-        set_error_handler(None)
-        
-        # Setup logging
-        setup_logging(verbose=True)
-        
-        # Verify handler was created
-        handler = get_error_handler()
-        assert handler is not None
-        assert isinstance(handler, ErrorHandler)
-        assert handler.verbose is True
-    
-    
-    def test_runner_error_base_class(self):
-        """Test that RunnerError base class works properly."""
-        context = create_error_context(
-            operation="runner_test",
-            component="TestRunner"
-        )
-        
-        error = RunnerError("Test runner error", context=context)
-        
-        assert isinstance(error, MADEngineError)
-        assert error.recoverable is True
-        assert error.context.operation == "runner_test"
-        assert error.context.component == "TestRunner"
-    
-    def test_error_context_serialization(self):
-        """Test that error contexts can be serialized."""
-        context = create_error_context(
-            operation="serialization_test",
-            component="TestComponent",
-            model_name="test_model",
-            node_id="test_node",
-            additional_info={"key": "value", "number": 42}
-        )
-        
-        error = ValidationError("Test error", context=context)
-        
-        # Test serialization
-        context_dict = error.context.__dict__
-        json_str = json.dumps(context_dict, default=str)
-        
-        # Verify content
-        assert "serialization_test" in json_str
-        assert "TestComponent" in json_str
-        assert "test_model" in json_str
-        assert "test_node" in json_str
-        assert "key" in json_str
-        assert "42" in json_str
-    
-    def test_error_hierarchy_consistency(self):
-        """Test that all error types maintain consistent behavior."""
-        from madengine.core.errors import (
-            ValidationError, ConnectionError, AuthenticationError,
-            RuntimeError, BuildError, DiscoveryError, OrchestrationError,
-            RunnerError, ConfigurationError, TimeoutError
-        )
-        
-        error_classes = [
-            ValidationError, ConnectionError, AuthenticationError,
-            RuntimeError, BuildError, DiscoveryError, OrchestrationError,
-            RunnerError, ConfigurationError, TimeoutError
-        ]
-        
-        for error_class in error_classes:
-            error = error_class("Test error message")
-            
-            # All should inherit from MADEngineError
-            assert isinstance(error, MADEngineError)
-            
-            # All should have context (even if default)
-            assert error.context is not None
-            
-            # All should have category
-            assert error.category is not None
-            
-            # All should have recoverable flag
-            assert isinstance(error.recoverable, bool)
-    
-    def test_global_error_handler_workflow(self):
-        """Test the complete global error handler workflow."""
-        from madengine.core.errors import handle_error
-        
-        # Create and set global handler
-        mock_console = Mock()
-        handler = ErrorHandler(console=mock_console, verbose=False)
-        set_error_handler(handler)
-        
-        # Create error
-        error = ValidationError(
-            "Global handler test",
-            context=create_error_context(
-                operation="global_test",
-                component="TestGlobalHandler"
-            )
-        )
-        
-        # Use global handle_error function
-        handle_error(error)
-        
-        # Verify it was handled through the global handler
-        mock_console.print.assert_called_once()
-    
-    def test_error_suggestions_and_recovery(self):
-        """Test error suggestions and recovery information."""
-        suggestions = [
-            "Check your configuration file",
-            "Verify network connectivity",
-            "Try running with --verbose flag"
-        ]
-        
-        error = ConfigurationError(
-            "Configuration validation failed",
-            context=create_error_context(
-                operation="config_validation",
-                file_path="/path/to/config.json"
-            ),
-            suggestions=suggestions
-        )
-        
-        assert error.suggestions == suggestions
-        assert error.recoverable is True
-        assert error.context.file_path == "/path/to/config.json"
-        
-        # Test error display includes suggestions
-        mock_console = Mock()
-        handler = ErrorHandler(console=mock_console)
-        handler.handle_error(error)
-        
-        # Should have been called to display the error
-        mock_console.print.assert_called_once()
-    
-    def test_nested_error_handling(self):
-        """Test handling of nested errors with causes."""
-        from madengine.core.errors import RuntimeError as MADRuntimeError, OrchestrationError
-        
-        # Create a chain of errors
-        original_error = ConnectionError("Network timeout")
-        runtime_error = MADRuntimeError("Operation failed", cause=original_error)
-        final_error = OrchestrationError("Orchestration failed", cause=runtime_error)
-        
-        # Test the chain
-        assert final_error.cause == runtime_error
-        assert runtime_error.cause == original_error
-        
-        # Test handling preserves the chain
-        mock_console = Mock()
-        handler = ErrorHandler(console=mock_console, verbose=True)
-        handler.handle_error(final_error, show_traceback=True)
-        
-        # Should display error and potentially traceback
-        assert mock_console.print.call_count >= 1
-    
-    def test_error_performance(self):
-        """Test that error handling is performant."""
-        import time
-        
-        mock_console = Mock()
-        handler = ErrorHandler(console=mock_console)
-        
-        start_time = time.time()
-        
-        # Create and handle many errors
-        for i in range(100):
-            error = ValidationError(
-                f"Test error {i}",
-                context=create_error_context(
-                    operation=f"test_op_{i}",
-                    component="PerformanceTest"
-                )
-            )
-            handler.handle_error(error)
-        
-        end_time = time.time()
-        
-        # Should handle 100 errors in under 1 second
-        assert end_time - start_time < 1.0
-        
-        # Verify all errors were handled
-        assert mock_console.print.call_count == 100
-
-
-class TestErrorSystemBackwardCompatibility:
-    """Test backward compatibility of the error system."""
-    
-    def test_legacy_exception_handling_still_works(self):
-        """Test that legacy exception patterns still work."""
-        try:
-            # Simulate old-style exception raising
-            raise ValueError("Legacy error")
-        except Exception as e:
-            # Should be able to handle with new system
-            mock_console = Mock()
-            handler = ErrorHandler(console=mock_console)
-            
-            context = create_error_context(
-                operation="legacy_handling",
-                component="LegacyTest"
-            )
-            
-            handler.handle_error(e, context=context)
-            
-            # Should handle gracefully
-            mock_console.print.assert_called_once()
-    
-    def test_error_system_without_rich(self):
-        """Test error system fallback when Rich is not available."""
-        # This test verifies the system degrades gracefully
-        # In practice, Rich is a hard dependency, but we test the concept
-        
-        with patch('madengine.core.errors.Console', side_effect=ImportError):
-            # Should still be able to create basic errors
-            error = ValidationError("Test without Rich")
-            assert str(error) == "Test without Rich"
-            assert error.recoverable is True
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
\ No newline at end of file
diff --git a/tests/integration/test_errors.py b/tests/integration/test_errors.py
new file mode 100644
index 00000000..e325e6ac
--- /dev/null
+++ b/tests/integration/test_errors.py
@@ -0,0 +1,327 @@
+"""Integration tests for error handling: CLI integration, workflow, unified system, backward compat.
+
+Merged from test_cli_error_integration and test_error_system_integration.
+Deduplicated: single setup_logging/handler test, one context serialization test.
+"""
+
+import json
+import os
+import sys
+from unittest.mock import Mock, patch, MagicMock
+
+import pytest
+
+# Ensure src on path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
+
+from madengine.core.errors import (
+    ErrorHandler,
+    MADEngineError,
+    ValidationError,
+    ConfigurationError,
+    RunnerError,
+    set_error_handler,
+    get_error_handler,
+    create_error_context,
+)
+
+
+# ---- CLI error integration ----
+
+class TestCLIErrorIntegration:
+    """CLI error handling setup and display."""
+
+    @patch("madengine.cli.utils.Console")
+    def test_setup_logging_creates_error_handler(self, mock_console_class):
+        """setup_logging initializes the unified error handler; verbose flag is respected."""
+        from madengine.cli import setup_logging
+        from rich.console import Console
+
+        mock_console = Mock(spec=Console)
+        mock_console_class.return_value = mock_console
+        set_error_handler(None)
+        setup_logging(verbose=True)
+        handler = get_error_handler()
+        assert handler is not None and isinstance(handler, ErrorHandler)
+        assert handler.verbose is True
+        setup_logging(verbose=False)
+        assert get_error_handler().verbose is False
+
+    def test_build_command_error_handling(self):
+        """Build command can use unified error handling (imports and handle_error)."""
+        from madengine.cli import setup_logging
+        from madengine.core.errors import handle_error
+
+        setup_logging(verbose=False)
+        error = Exception("Test build error")
+        context = create_error_context(operation="build", phase="build", component="CLI")
+        handle_error(error, context=context)
+
+    @patch("madengine.cli.utils.console")
+    def test_cli_error_display_consistency(self, mock_console):
+        """CLI errors go through unified handler and handler has console."""
+        from madengine.cli import setup_logging
+
+        setup_logging(verbose=False)
+        handler = get_error_handler()
+        error = ConfigurationError(
+            "Invalid configuration",
+            context=create_error_context(operation="cli_command", component="CLI", phase="validation"),
+        )
+        handler.handle_error(error)
+        assert handler.console is not None
+
+
+# ---- Error workflow ----
+
+class TestErrorWorkflow:
+    """End-to-end error flow and logging."""
+
+    @patch("madengine.cli.utils.console")
+    def test_end_to_end_error_flow(self, mock_console):
+        """Error flow from CLI through orchestrator-style error."""
+        from madengine.cli import setup_logging
+
+        setup_logging(verbose=True)
+        handler = get_error_handler()
+        err = ValidationError(
+            "Invalid model tag format",
+            context=create_error_context(
+                operation="model_discovery",
+                component="DistributedOrchestrator",
+                phase="validation",
+                model_name="invalid::tag",
+            ),
+            suggestions=["Use format: model_name:version", "Check model name"],
+        )
+        handler.handle_error(err)
+        assert err.context.operation == "model_discovery"
+        assert len(err.suggestions) == 2
+
+    def test_error_logging_integration(self):
+        """Errors are logged with structured data."""
+        from madengine.cli import setup_logging
+        from madengine.core.errors import BuildError
+
+        setup_logging(verbose=False)
+        handler = get_error_handler()
+        build_error = BuildError(
+            "Docker build failed",
+            context=create_error_context(
+                operation="docker_build",
+                component="DockerBuilder",
+                phase="build",
+                model_name="test_model",
+                additional_info={"dockerfile": "Dockerfile.ubuntu.amd"},
+            ),
+            suggestions=["Check Dockerfile syntax", "Verify base image availability"],
+        )
+        with patch.object(handler, "logger") as mock_logger:
+            handler.handle_error(build_error)
+            mock_logger.error.assert_called_once()
+            extra = mock_logger.error.call_args[1]["extra"]
+            assert extra["context"]["operation"] == "docker_build"
+            assert extra["recoverable"] is False
+            assert len(extra["suggestions"]) == 2
+
+    def test_error_context_serialization(self):
+        """Error context can be serialized for logging."""
+        from madengine.core.errors import RuntimeError
+
+        context = create_error_context(
+            operation="model_execution",
+            component="ContainerRunner",
+            phase="runtime",
+            model_name="llama2",
+            node_id="worker-node-01",
+            additional_info={"container_id": "abc123", "gpu_count": 2},
+        )
+        error = RuntimeError("Model execution failed", context=context)
+        data = json.dumps(error.context.__dict__, default=str)
+        assert "model_execution" in data and "ContainerRunner" in data and "abc123" in data
+
+
+# ---- Unified error system ----
+
+class TestUnifiedErrorSystem:
+    """Unified error handling system."""
+
+    def test_error_system_basic_functionality(self):
+        """Handler handles ValidationError and context is preserved."""
+        mock_console = Mock()
+        handler = ErrorHandler(console=mock_console, verbose=False)
+        context = create_error_context(
+            operation="test_operation", component="TestComponent", model_name="test_model"
+        )
+        error = ValidationError("Test validation error", context=context)
+        handler.handle_error(error)
+        mock_console.print.assert_called_once()
+        assert error.context.operation == "test_operation"
+        assert error.recoverable is True
+
+    def test_runner_error_base_class(self):
+        """RunnerError is MADEngineError with recoverable and context."""
+        context = create_error_context(operation="runner_test", component="TestRunner")
+        error = RunnerError("Test runner error", context=context)
+        assert isinstance(error, MADEngineError)
+        assert error.recoverable is True
+        assert error.context.operation == "runner_test"
+
+    def test_error_context_serialization_unified(self):
+        """Context with many fields serializes for logging."""
+        context = create_error_context(
+            operation="serialization_test",
+            component="TestComponent",
+            model_name="test_model",
+            node_id="test_node",
+            additional_info={"key": "value", "number": 42},
+        )
+        error = ValidationError("Test error", context=context)
+        data = json.dumps(error.context.__dict__, default=str)
+        assert "serialization_test" in data and "test_node" in data and "42" in data
+
+    def test_error_hierarchy_consistency(self):
+        """All error types inherit MADEngineError and have context/category/recoverable."""
+        from madengine.core.errors import (
+            ValidationError,
+            ConnectionError,
+            AuthenticationError,
+            RuntimeError,
+            BuildError,
+            DiscoveryError,
+            OrchestrationError,
+            RunnerError,
+            ConfigurationError,
+            TimeoutError,
+        )
+
+        for error_class in [
+            ValidationError,
+            ConnectionError,
+            AuthenticationError,
+            RuntimeError,
+            BuildError,
+            DiscoveryError,
+            OrchestrationError,
+            RunnerError,
+            ConfigurationError,
+            TimeoutError,
+        ]:
+            err = error_class("Test error message")
+            assert isinstance(err, MADEngineError)
+            assert err.context is not None
+            assert err.category is not None
+            assert isinstance(err.recoverable, bool)
+
+    def test_global_error_handler_workflow(self):
+        """handle_error uses global handler when set."""
+        from madengine.core.errors import handle_error
+
+        mock_console = Mock()
+        handler = ErrorHandler(console=mock_console, verbose=False)
+        set_error_handler(handler)
+        error = ValidationError(
+            "Global handler test",
+            context=create_error_context(operation="global_test", component="TestGlobalHandler"),
+        )
+        handle_error(error)
+        mock_console.print.assert_called_once()
+
+    def test_error_suggestions_and_recovery(self):
+        """Suggestions and context are stored and displayed."""
+        suggestions = ["Check config", "Verify network", "Try --verbose"]
+        error = ConfigurationError(
+            "Configuration validation failed",
+            context=create_error_context(
+                operation="config_validation", file_path="/path/to/config.json"
+            ),
+            suggestions=suggestions,
+        )
+        assert error.suggestions == suggestions
+        assert error.context.file_path == "/path/to/config.json"
+        mock_console = Mock()
+        ErrorHandler(console=mock_console).handle_error(error)
+        mock_console.print.assert_called_once()
+
+    def test_nested_error_handling(self):
+        """Nested errors with cause chain are handled."""
+        from madengine.core.errors import RuntimeError as MADRuntimeError, OrchestrationError
+
+        orig = ConnectionError("Network timeout")
+        runtime = MADRuntimeError("Operation failed", cause=orig)
+        final = OrchestrationError("Orchestration failed", cause=runtime)
+        assert final.cause == runtime and runtime.cause == orig
+        mock_console = Mock()
+        ErrorHandler(console=mock_console, verbose=True).handle_error(final, show_traceback=True)
+        assert mock_console.print.call_count >= 1
+
+    def test_error_performance(self):
+        """Handle 100 errors in under 1 second."""
+        import time
+
+        mock_console = Mock()
+        handler = ErrorHandler(console=mock_console)
+        start = time.time()
+        for i in range(100):
+            err = ValidationError(
+                f"Test error {i}",
+                context=create_error_context(operation=f"test_op_{i}", component="PerformanceTest"),
+            )
+            handler.handle_error(err)
+        assert time.time() - start < 1.0
+        assert mock_console.print.call_count == 100
+
+
+# ---- Performance (lightweight) ----
+
+class TestErrorHandlingPerformance:
+    """Error handler and context creation performance."""
+
+    def test_error_handler_initialization_performance(self):
+        """Create 100 handlers in under 1 second."""
+        import time
+        from rich.console import Console
+
+        start = time.time()
+        for _ in range(100):
+            ErrorHandler(console=Console(), verbose=False)
+        assert time.time() - start < 1.0
+
+    def test_error_context_creation_performance(self):
+        """Create 1000 contexts in under 0.1 seconds."""
+        import time
+
+        start = time.time()
+        for i in range(1000):
+            create_error_context(
+                operation=f"op_{i}", component=f"C_{i}", phase="test", model_name=f"m_{i}"
+            )
+        assert time.time() - start < 0.1
+
+
+# ---- Backward compatibility ----
+
+class TestErrorSystemBackwardCompatibility:
+    """Backward compatibility of the error system."""
+
+    def test_legacy_exception_handling_still_works(self):
+        """Legacy ValueError can be handled via handle_error with context."""
+        try:
+            raise ValueError("Legacy error")
+        except Exception as e:
+            mock_console = Mock()
+            handler = ErrorHandler(console=mock_console)
+            context = create_error_context(operation="legacy_handling", component="LegacyTest")
+            handler.handle_error(e, context=context)
+            mock_console.print.assert_called_once()
+
+    def test_error_system_without_rich(self):
+        """Errors can be created and have str/recoverable when Rich is unavailable."""
+        with patch("madengine.core.errors.Console", side_effect=ImportError):
+            error = ValidationError("Test without Rich")
+            assert "Test without Rich" in str(error)
+            assert error.recoverable is True
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/integration/test_multi_gpu_arch.py b/tests/integration/test_multi_gpu_arch.py
deleted file mode 100644
index 339d3fb9..00000000
--- a/tests/integration/test_multi_gpu_arch.py
+++ /dev/null
@@ -1,194 +0,0 @@
-"""Comprehensive unit tests for multi-GPU architecture support in madengine.
-
-Covers:
-- Multi-arch DockerBuilder logic (image naming, manifest, legacy/override)
-- Dockerfile GPU variable parsing/validation
-- Target architecture normalization and compatibility
-- Run-phase manifest filtering by gpu_architecture
-
-UPDATED: Now uses BuildOrchestrator instead of deprecated DistributedOrchestrator.
-
-All tests are logic/unit tests and do not require GPU hardware.
-"""
-import pytest
-from unittest.mock import MagicMock, patch
-from madengine.execution.docker_builder import DockerBuilder
-from madengine.orchestration.build_orchestrator import BuildOrchestrator
-from madengine.orchestration.run_orchestrator import RunOrchestrator
-
-class TestMultiGPUArch:
-    def setup_method(self):
-        self.context = MagicMock()
-        self.console = MagicMock()
-        self.builder = DockerBuilder(self.context, self.console)
-        
-        # Mock args for BuildOrchestrator (replacement for DistributedOrchestrator)
-        mock_args = MagicMock()
-        mock_args.additional_context = '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
-        mock_args.additional_context_file = None
-        mock_args.live_output = True
-        mock_args.data_config_file_name = "data.json"
-        mock_args.tags = []
-        mock_args.target_archs = []
-        mock_args.force_mirror_local = None
-        
-        # Create BuildOrchestrator with mocked args
-        self.orchestrator = BuildOrchestrator(mock_args)
-
-    # --- DockerBuilder Multi-Arch Logic ---
-    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
-    @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
-    @patch.object(DockerBuilder, "build_image")
-    def test_multi_arch_build_image_naming(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
-        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
-        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
-        # GPU variable present
-        mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
-        assert result[0]["docker_image"].endswith("_gfx908")
-        # GPU variable absent
-        mock_check_gpu_vars.return_value = (False, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
-        assert not result[0]["docker_image"].endswith("_gfx908")
-
-    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
-    @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
-    @patch.object(DockerBuilder, "build_image")
-    def test_multi_arch_manifest_fields(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
-        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
-        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
-        mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
-        assert result[0]["gpu_architecture"] == "gfx908"
-
-    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
-    @patch.object(DockerBuilder, "build_image")
-    def test_legacy_single_arch_build(self, mock_build_image, mock_get_dockerfiles):
-        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
-        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
-        result = self.builder._build_model_single_arch(model_info, None, False, None, "", None)
-        assert result[0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
-
-    @patch.object(DockerBuilder, "_build_model_single_arch")
-    def test_additional_context_overrides_target_archs(self, mock_single_arch):
-        self.context.ctx = {"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx908"}}
-        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
-        mock_single_arch.return_value = [{"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}]
-        result = self.builder.build_all_models([model_info], target_archs=["gfx908", "gfx90a"])
-        assert result["successful_builds"][0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
-
-    # --- Dockerfile GPU Variable Parsing/Validation ---
-    def test_parse_dockerfile_gpu_variables(self):
-        dockerfile_content = """
-        ARG MAD_SYSTEM_GPU_ARCHITECTURE=gfx908
-        ENV PYTORCH_ROCM_ARCH=gfx908;gfx90a
-        ARG GPU_TARGETS=gfx908,gfx942
-        ENV GFX_COMPILATION_ARCH=gfx908
-        ARG GPU_ARCHS=gfx908;gfx90a;gfx942
-        """
-        result = self.builder._parse_dockerfile_gpu_variables(dockerfile_content)
-        assert result["MAD_SYSTEM_GPU_ARCHITECTURE"] == ["gfx908"]
-        assert result["PYTORCH_ROCM_ARCH"] == ["gfx908", "gfx90a"]
-        assert result["GPU_TARGETS"] == ["gfx908", "gfx942"]
-        assert result["GFX_COMPILATION_ARCH"] == ["gfx908"]
-        assert result["GPU_ARCHS"] == ["gfx908", "gfx90a", "gfx942"]
-
-    def test_parse_dockerfile_gpu_variables_env_delimiter(self):
-        dockerfile_content = "ENV PYTORCH_ROCM_ARCH = gfx908,gfx90a"
-        result = self.builder._parse_dockerfile_gpu_variables(dockerfile_content)
-        assert result["PYTORCH_ROCM_ARCH"] == ["gfx908", "gfx90a"]
-
-    def test_parse_malformed_dockerfile(self):
-        dockerfile_content = "ENV BAD_LINE\nARG MAD_SYSTEM_GPU_ARCHITECTURE=\nENV PYTORCH_ROCM_ARCH=\n"
-        result = self.builder._parse_dockerfile_gpu_variables(dockerfile_content)
-        assert isinstance(result, dict)
-
-    # --- Target Architecture Normalization/Compatibility ---
-    def test_normalize_architecture_name(self):
-        cases = {
-            "gfx908": "gfx908",
-            "GFX908": "gfx908",
-            "mi100": "gfx908",
-            "mi-100": "gfx908",
-            "mi200": "gfx90a",
-            "mi-200": "gfx90a",
-            "mi210": "gfx90a",
-            "mi250": "gfx90a",
-            "mi300": "gfx940",
-            "mi-300": "gfx940",
-            "mi300a": "gfx940",
-            "mi300x": "gfx942",
-            "mi-300x": "gfx942",
-            "unknown": "unknown",
-            "": None,
-        }
-        for inp, expected in cases.items():
-            assert self.builder._normalize_architecture_name(inp) == expected
-
-    def test_is_target_arch_compatible_with_variable(self):
-        assert self.builder._is_target_arch_compatible_with_variable("MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx908"], "gfx942")
-        assert self.builder._is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908", "gfx942"], "gfx942")
-        assert not self.builder._is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908"], "gfx942")
-        assert self.builder._is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx908")
-        assert not self.builder._is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx942")
-        assert self.builder._is_target_arch_compatible_with_variable("UNKNOWN_VAR", ["foo"], "bar")
-
-    def test_is_compilation_arch_compatible(self):
-        assert self.builder._is_compilation_arch_compatible("gfx908", "gfx908")
-        assert not self.builder._is_compilation_arch_compatible("gfx908", "gfx942")
-        assert self.builder._is_compilation_arch_compatible("foo", "foo")
-
-    # --- Run-Phase Manifest Filtering ---
-    def test_filter_images_by_gpu_architecture(self):
-        """Test image filtering by GPU architecture using RunOrchestrator.
-        
-        Note: Current behavior treats images without gpu_vendor as compatible (legacy support).
-        """
-        # Create RunOrchestrator which has _filter_images_by_gpu_architecture
-        mock_args = MagicMock()
-        mock_args.additional_context = '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
-        mock_args.additional_context_file = None
-        mock_args.tags = []
-        mock_args.live_output = True
-        mock_args.data_config_file_name = "data.json"
-        mock_args.force_mirror_local = None
-        
-        run_orch = RunOrchestrator(mock_args)
-        
-        # Test exact match - both images have gpu_vendor set to "AMD"
-        built_images = {
-            "img1": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}, 
-            "img2": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}
-        }
-        filtered = run_orch._filter_images_by_gpu_architecture(built_images, "gfx908")
-        assert "img1" in filtered and "img2" not in filtered
-        
-        # Test legacy image (no gpu_vendor field) - should be included for compatibility
-        built_images = {
-            "img1": {"gpu_architecture": "gfx908"},  # No gpu_vendor
-            "img2": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}
-        }
-        filtered = run_orch._filter_images_by_gpu_architecture(built_images, "gfx908")
-        # Current behavior: legacy images (no gpu_vendor) are treated as compatible
-        assert "img1" in filtered  # Legacy image included
-        # img2 may or may not be included depending on gpu_vendor matching logic
-        
-        # Test no match case with explicit gpu_vendor
-        built_images = {
-            "img1": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}, 
-            "img2": {"gpu_architecture": "gfx942", "gpu_vendor": "AMD"}
-        }
-        filtered = run_orch._filter_images_by_gpu_architecture(built_images, "gfx908")
-        assert len(filtered) == 0
-        
-        # Test all matching case with gpu_vendor
-        built_images = {
-            "img1": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}, 
-            "img2": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}
-        }
-        filtered = run_orch._filter_images_by_gpu_architecture(built_images, "gfx908")
-        assert len(filtered) == 2
diff --git a/tests/integration/test_orchestrator_workflows.py b/tests/integration/test_orchestrator_workflows.py
index 78d9636b..12aa1a91 100644
--- a/tests/integration/test_orchestrator_workflows.py
+++ b/tests/integration/test_orchestrator_workflows.py
@@ -1,7 +1,7 @@
 """Test the orchestration layer modules.
 
 This module tests the Build and Run orchestrators that coordinate
-the build and execution workflows.
+the build and execution workflows, and batch manifest CLI behavior.
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
@@ -14,13 +14,49 @@
 
 # third-party modules
 import pytest
+from typer.testing import CliRunner
 
 # project modules
+from madengine.cli import app
 from madengine.orchestration.build_orchestrator import BuildOrchestrator
 from madengine.orchestration.run_orchestrator import RunOrchestrator
 from madengine.core.errors import BuildError, ConfigurationError, DiscoveryError
 
 
+# ============================================================================
+# Batch manifest (CLI build options)
+# ============================================================================
+
+class TestBatchManifestBuildIntegration:
+    """Batch manifest and --tags are mutually exclusive."""
+
+    def test_batch_manifest_mutually_exclusive_with_tags(self):
+        """--batch-manifest and --tags cannot be used together."""
+        runner = CliRunner()
+        batch_data = [{"model_name": "dummy", "build_new": True}]
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(batch_data, f)
+            batch_file = f.name
+        try:
+            result = runner.invoke(
+                app,
+                [
+                    "build",
+                    "--batch-manifest", batch_file,
+                    "--tags", "dummy",
+                    "--additional-context", '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}',
+                ],
+            )
+            assert result.exit_code != 0
+            assert "Cannot specify both --batch-manifest and --tags" in result.output
+        finally:
+            os.unlink(batch_file)
+
+
+# ============================================================================
+# Build orchestrator
+# ============================================================================
+
 class TestBuildOrchestrator:
     """Test the Build Orchestrator module."""
 
diff --git a/tests/integration/test_platform_integration.py b/tests/integration/test_platform_integration.py
index be0fe770..4f24d0c7 100644
--- a/tests/integration/test_platform_integration.py
+++ b/tests/integration/test_platform_integration.py
@@ -14,6 +14,13 @@
 from unittest.mock import MagicMock, patch, mock_open
 import pytest
 
+from madengine.execution.docker_builder import DockerBuilder
+from madengine.execution.dockerfile_utils import (
+    parse_dockerfile_gpu_variables,
+    normalize_architecture_name,
+    is_target_arch_compatible_with_variable,
+    is_compilation_arch_compatible,
+)
 from madengine.orchestration.build_orchestrator import BuildOrchestrator
 from madengine.orchestration.run_orchestrator import RunOrchestrator
 from madengine.core.errors import BuildError, ConfigurationError, DiscoveryError
@@ -543,6 +550,139 @@ def test_cpu_only_execution(self, cpu_context, mock_run_args, temp_manifest_file
                             assert orchestrator.context.get_system_ngpus() == 0
 
 
+# ============================================================================
+# Multi-GPU architecture (Dockerfile parsing, normalization, image filtering)
+# ============================================================================
+
+class TestMultiGPUArch:
+    """Multi-arch DockerBuilder logic, dockerfile_utils, and run-phase image filtering."""
+
+    def setup_method(self):
+        self.context = MagicMock()
+        self.console = MagicMock()
+        self.builder = DockerBuilder(self.context, self.console)
+        mock_args = MagicMock()
+        mock_args.additional_context = '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
+        mock_args.additional_context_file = None
+        mock_args.live_output = True
+        mock_args.data_config_file_name = "data.json"
+        mock_args.tags = []
+        mock_args.target_archs = []
+        mock_args.force_mirror_local = None
+        self.orchestrator = BuildOrchestrator(mock_args)
+
+    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
+    @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
+    @patch.object(DockerBuilder, "build_image")
+    def test_multi_arch_build_image_naming(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
+        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
+        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
+        mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
+        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
+        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        assert result[0]["docker_image"].endswith("_gfx908")
+        mock_check_gpu_vars.return_value = (False, "docker/dummy.Dockerfile")
+        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
+        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        assert not result[0]["docker_image"].endswith("_gfx908")
+
+    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
+    @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
+    @patch.object(DockerBuilder, "build_image")
+    def test_multi_arch_manifest_fields(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
+        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
+        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
+        mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
+        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
+        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        assert result[0]["gpu_architecture"] == "gfx908"
+
+    @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
+    @patch.object(DockerBuilder, "build_image")
+    def test_legacy_single_arch_build(self, mock_build_image, mock_get_dockerfiles):
+        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
+        mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
+        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
+        result = self.builder._build_model_single_arch(model_info, None, False, None, "", None)
+        assert result[0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
+
+    @patch.object(DockerBuilder, "_build_model_single_arch")
+    def test_additional_context_overrides_target_archs(self, mock_single_arch):
+        self.context.ctx = {"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx908"}}
+        model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
+        mock_single_arch.return_value = [{"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}]
+        result = self.builder.build_all_models([model_info], target_archs=["gfx908", "gfx90a"])
+        assert result["successful_builds"][0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
+
+    def test_parse_dockerfile_gpu_variables(self):
+        content = """
+        ARG MAD_SYSTEM_GPU_ARCHITECTURE=gfx908
+        ENV PYTORCH_ROCM_ARCH=gfx908;gfx90a
+        ARG GPU_TARGETS=gfx908,gfx942
+        ENV GFX_COMPILATION_ARCH=gfx908
+        ARG GPU_ARCHS=gfx908;gfx90a;gfx942
+        """
+        result = parse_dockerfile_gpu_variables(content)
+        assert result["MAD_SYSTEM_GPU_ARCHITECTURE"] == ["gfx908"]
+        assert result["PYTORCH_ROCM_ARCH"] == ["gfx908", "gfx90a"]
+        assert result["GPU_TARGETS"] == ["gfx908", "gfx942"]
+        assert result["GFX_COMPILATION_ARCH"] == ["gfx908"]
+        assert result["GPU_ARCHS"] == ["gfx908", "gfx90a", "gfx942"]
+
+    def test_parse_dockerfile_gpu_variables_env_delimiter(self):
+        result = parse_dockerfile_gpu_variables("ENV PYTORCH_ROCM_ARCH = gfx908,gfx90a")
+        assert result["PYTORCH_ROCM_ARCH"] == ["gfx908", "gfx90a"]
+
+    def test_parse_malformed_dockerfile(self):
+        result = parse_dockerfile_gpu_variables(
+            "ENV BAD_LINE\nARG MAD_SYSTEM_GPU_ARCHITECTURE=\nENV PYTORCH_ROCM_ARCH=\n"
+        )
+        assert isinstance(result, dict)
+
+    def test_normalize_architecture_name(self):
+        cases = {
+            "gfx908": "gfx908", "GFX908": "gfx908", "mi100": "gfx908", "mi-100": "gfx908",
+            "mi200": "gfx90a", "mi-200": "gfx90a", "mi210": "gfx90a", "mi250": "gfx90a",
+            "mi300": "gfx940", "mi-300": "gfx940", "mi300a": "gfx940",
+            "mi300x": "gfx942", "mi-300x": "gfx942", "unknown": "unknown", "": None,
+        }
+        for inp, expected in cases.items():
+            assert normalize_architecture_name(inp) == expected
+
+    def test_is_target_arch_compatible_with_variable(self):
+        assert is_target_arch_compatible_with_variable("MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx908"], "gfx942")
+        assert is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908", "gfx942"], "gfx942")
+        assert not is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908"], "gfx942")
+        assert is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx908")
+        assert not is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx942")
+        assert is_target_arch_compatible_with_variable("UNKNOWN_VAR", ["foo"], "bar")
+
+    def test_is_compilation_arch_compatible(self):
+        assert is_compilation_arch_compatible("gfx908", "gfx908")
+        assert not is_compilation_arch_compatible("gfx908", "gfx942")
+        assert is_compilation_arch_compatible("foo", "foo")
+
+    def test_filter_images_by_gpu_architecture(self):
+        mock_args = MagicMock()
+        mock_args.additional_context = '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
+        mock_args.additional_context_file = None
+        mock_args.tags = []
+        mock_args.live_output = True
+        mock_args.data_config_file_name = "data.json"
+        mock_args.force_mirror_local = None
+        run_orch = RunOrchestrator(mock_args)
+        built = {"img1": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}, "img2": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}}
+        filtered = run_orch._filter_images_by_gpu_architecture(built, "gfx908")
+        assert "img1" in filtered and "img2" not in filtered
+        built_legacy = {"img1": {"gpu_architecture": "gfx908"}, "img2": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}}
+        filtered = run_orch._filter_images_by_gpu_architecture(built_legacy, "gfx908")
+        assert "img1" in filtered
+        built_nomatch = {"img1": {"gpu_architecture": "gfx90a", "gpu_vendor": "AMD"}, "img2": {"gpu_architecture": "gfx942", "gpu_vendor": "AMD"}}
+        assert len(run_orch._filter_images_by_gpu_architecture(built_nomatch, "gfx908")) == 0
+        built_all = {"img1": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}, "img2": {"gpu_architecture": "gfx908", "gpu_vendor": "AMD"}}
+        assert len(run_orch._filter_images_by_gpu_architecture(built_all, "gfx908")) == 2
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v", "--tb=short"])
 
diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py
deleted file mode 100644
index 458c4e4a..00000000
--- a/tests/test_cleanup.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""Test cleanup functionality for robust directory removal."""
-
-import unittest
-from unittest.mock import Mock, patch, call, MagicMock
-import time
-from madengine.tools.run_models import RunModels
-
-
-class TestCleanupModelDirectory(unittest.TestCase):
-    """Test cases for the _cleanup_model_directory method."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        # Create a mock args object with all required attributes
-        self.mock_args = Mock()
-        self.mock_args.keep_alive = False
-        self.mock_args.keep_model_dir = False
-        self.mock_args.generate_sys_env_details = False
-        self.mock_args.data_config_file_name = "/tmp/nonexistent_data.json"  # Use non-existent path
-        self.mock_args.additional_context = ""
-        self.mock_args.additional_context_file = None
-        self.mock_args.force_mirror_local = False
-        
-        # Patch the dependencies before creating RunModels instance
-        with patch('madengine.tools.run_models.Console'), \
-             patch('madengine.tools.run_models.Context') as mock_context_cls:
-            # Setup Context mock
-            mock_context = MagicMock()
-            mock_context.ctx = {}
-            mock_context_cls.return_value = mock_context
-            
-            self.run_models = RunModels(self.mock_args)
-        
-        # Create mock docker instance
-        self.mock_docker = Mock()
-
-    def test_cleanup_success_first_attempt(self):
-        """Test successful cleanup on first attempt."""
-        model_dir = "test_model_dir"
-        
-        # Mock successful removal
-        self.mock_docker.sh.return_value = ""
-        
-        # Call cleanup method
-        self.run_models._cleanup_model_directory(self.mock_docker, model_dir)
-        
-        # Verify rm command was called
-        self.mock_docker.sh.assert_called_with(f"rm -rf {model_dir}", timeout=240)
-        # Should only be called once on success
-        self.assertEqual(self.mock_docker.sh.call_count, 1)
-
-    def test_cleanup_success_after_retries(self):
-        """Test successful cleanup after retries."""
-        model_dir = "test_model_dir"
-        
-        # Mock failure on first 2 attempts, success on 3rd
-        self.mock_docker.sh.side_effect = [
-            RuntimeError("Directory not empty"),  # First rm -rf fails
-            RuntimeError("Directory not empty"),  # fuser command
-            RuntimeError("Directory not empty"),  # chmod command  
-            RuntimeError("Directory not empty"),  # Second rm -rf fails
-            RuntimeError("Directory not empty"),  # fuser command
-            RuntimeError("Directory not empty"),  # chmod command
-            "",  # Third rm -rf succeeds
-        ]
-        
-        # Call cleanup method with shorter retry delay for testing
-        with patch('time.sleep'):  # Mock sleep to speed up test
-            self.run_models._cleanup_model_directory(
-                self.mock_docker, model_dir, max_retries=3, retry_delay=0.1
-            )
-        
-        # Verify multiple attempts were made
-        self.assertGreater(self.mock_docker.sh.call_count, 1)
-
-    def test_cleanup_all_attempts_fail_no_exception(self):
-        """Test that cleanup failure doesn't raise exception (only logs warning)."""
-        model_dir = "test_model_dir"
-        
-        # Mock all attempts failing
-        self.mock_docker.sh.side_effect = RuntimeError("Directory not empty")
-        
-        # Call cleanup method - should NOT raise exception
-        with patch('time.sleep'):  # Mock sleep to speed up test
-            try:
-                self.run_models._cleanup_model_directory(
-                    self.mock_docker, model_dir, max_retries=2, retry_delay=0.1
-                )
-                # Should complete without raising exception
-                cleanup_succeeded = True
-            except Exception as e:
-                cleanup_succeeded = False
-                self.fail(f"Cleanup should not raise exception, but raised: {e}")
-        
-        self.assertTrue(cleanup_succeeded, "Cleanup should complete even if all attempts fail")
-
-    def test_cleanup_uses_fuser_and_chmod_on_retry(self):
-        """Test that retry attempts use fuser and chmod."""
-        model_dir = "test_model_dir"
-        
-        # Track the commands called
-        commands_called = []
-        
-        def track_commands(cmd, timeout):
-            commands_called.append(cmd)
-            if "rm -rf" in cmd and len([c for c in commands_called if "rm -rf" in c]) == 1:
-                # Fail first rm -rf
-                raise RuntimeError("Directory not empty")
-            return ""
-        
-        self.mock_docker.sh.side_effect = track_commands
-        
-        # Call cleanup method
-        with patch('time.sleep'):  # Mock sleep to speed up test
-            self.run_models._cleanup_model_directory(
-                self.mock_docker, model_dir, max_retries=2, retry_delay=0.1
-            )
-        
-        # Verify fuser and chmod were called on retry
-        command_strings = ' '.join(commands_called)
-        self.assertIn('fuser', command_strings, "fuser should be called on retry")
-        self.assertIn('chmod', command_strings, "chmod should be called on retry")
-
-    def test_cleanup_with_custom_retry_params(self):
-        """Test cleanup with custom retry parameters."""
-        model_dir = "test_model_dir"
-        custom_retries = 5
-        custom_delay = 0.5
-        
-        self.mock_docker.sh.return_value = ""
-        
-        # Call with custom parameters
-        self.run_models._cleanup_model_directory(
-            self.mock_docker, model_dir, 
-            max_retries=custom_retries, 
-            retry_delay=custom_delay
-        )
-        
-        # Verify it worked
-        self.mock_docker.sh.assert_called()
-
-
-class TestCleanupIntegration(unittest.TestCase):
-    """Integration tests for cleanup in run_model_impl."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        self.mock_args = Mock()
-        self.mock_args.keep_alive = False
-        self.mock_args.keep_model_dir = False
-        self.mock_args.generate_sys_env_details = False
-        self.mock_args.skip_model_run = True
-        self.mock_args.data_config_file_name = "/tmp/nonexistent_data.json"
-        self.mock_args.additional_context = ""
-        self.mock_args.additional_context_file = None
-        self.mock_args.force_mirror_local = False
-        
-        with patch('madengine.tools.run_models.Console'), \
-             patch('madengine.tools.run_models.Context') as mock_context_cls:
-            mock_context = MagicMock()
-            mock_context.ctx = {}
-            mock_context_cls.return_value = mock_context
-            self.run_models = RunModels(self.mock_args)
-
-    @patch('madengine.tools.run_models.RunModels._cleanup_model_directory')
-    def test_cleanup_called_when_not_keep_alive(self, mock_cleanup):
-        """Test that cleanup is called when keep_alive is False."""
-        # This test verifies that our new method is called instead of direct rm -rf
-        # We can't easily test the full run_model_impl, but we've verified the code change
-        self.assertTrue(hasattr(self.run_models, '_cleanup_model_directory'))
-        
-        # Verify the method exists and is callable
-        self.assertTrue(callable(self.run_models._cleanup_model_directory))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/test_config_loader.py b/tests/unit/test_config_loader.py
index 7e68833c..65184e68 100644
--- a/tests/unit/test_config_loader.py
+++ b/tests/unit/test_config_loader.py
@@ -19,7 +19,7 @@
 
 from jinja2 import Template
 
-from madengine.deployment.config_loader import ConfigLoader
+from madengine.deployment.config_loader import ConfigLoader, apply_deployment_config
 from madengine.deployment.slurm import SlurmDeployment
 
 
@@ -156,6 +156,30 @@ def test_override_behavior(self):
         assert "OMP_NUM_THREADS" in result["env_vars"]  # Default still there
 
 
+class TestApplyDeploymentConfig:
+    """Test apply_deployment_config helper."""
+
+    def test_apply_slurm_config_mutates_and_returns(self):
+        """apply_deployment_config mutates config.additional_context and returns full config."""
+        class FakeConfig:
+            additional_context = {"slurm": {"nodes": 2}}
+        config = FakeConfig()
+        result = apply_deployment_config(config, ConfigLoader.load_slurm_config)
+        assert result is config.additional_context
+        assert "slurm" in result
+        assert result["slurm"]["nodes"] == 2
+
+    def test_apply_k8s_config_mutates_and_returns(self):
+        """apply_deployment_config with load_k8s_config mutates and returns full config."""
+        class FakeConfig:
+            additional_context = {"k8s": {"gpu_count": 1}}
+        config = FakeConfig()
+        result = apply_deployment_config(config, ConfigLoader.load_k8s_config)
+        assert result is config.additional_context
+        assert "k8s" in result
+        assert result["k8s"]["gpu_count"] == 1
+
+
 class TestConfigLoaderK8sConfigs:
     """Test with actual K8s config files (if they exist)."""
     
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
new file mode 100644
index 00000000..a7230369
--- /dev/null
+++ b/tests/unit/test_constants.py
@@ -0,0 +1,105 @@
+"""Unit tests for core constants and _get_env_or_creds_or_default."""
+
+import json
+import os
+from unittest.mock import patch
+
+import pytest
+
+from madengine.core.constants import (
+    NAS_NODES,
+    MAD_AWS_S3,
+    MAD_MINIO,
+    PUBLIC_GITHUB_ROCM_KEY,
+    _get_env_or_creds_or_default,
+    _DEFAULT_NAS_NODES,
+    _DEFAULT_MAD_AWS_S3,
+    _DEFAULT_MAD_MINIO,
+    _DEFAULT_PUBLIC_GITHUB_ROCM_KEY,
+)
+
+
+class TestGetEnvOrCredsOrDefault:
+    """Test _get_env_or_creds_or_default helper."""
+
+    def test_env_override_returns_parsed_json(self):
+        """When env is set with valid JSON, that value is returned."""
+        with patch.dict(os.environ, {"TEST_KEY": '[{"a": 1}]'}, clear=False):
+            # Need to pass creds - we patch CREDS via the module
+            import madengine.core.constants as constants_module
+            with patch.object(constants_module, "CREDS", {}):
+                result = _get_env_or_creds_or_default(
+                    "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
+                )
+        assert result == [{"a": 1}]
+
+    def test_env_invalid_json_returns_default(self):
+        """When env is set with invalid JSON, default is returned."""
+        with patch.dict(os.environ, {"TEST_KEY": "not json"}, clear=False):
+            import madengine.core.constants as constants_module
+            with patch.object(constants_module, "CREDS", {}):
+                result = _get_env_or_creds_or_default(
+                    "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
+                )
+        assert result == _DEFAULT_NAS_NODES
+
+    def test_creds_fallback_when_env_unset(self):
+        """When env is unset and creds has key, creds value is returned."""
+        with patch.dict(os.environ, {}, clear=False):
+            try:
+                orig = os.environ.get("TEST_KEY")
+                if "TEST_KEY" in os.environ:
+                    del os.environ["TEST_KEY"]
+            except Exception:
+                pass
+        import madengine.core.constants as constants_module
+        with patch.object(constants_module, "CREDS", {"TEST_KEY": [{"from": "creds"}]}):
+            result = _get_env_or_creds_or_default(
+                "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
+            )
+        assert result == [{"from": "creds"}]
+
+    def test_default_when_env_and_creds_unset(self):
+        """When env unset and creds missing key, default is returned."""
+        import madengine.core.constants as constants_module
+        with patch.dict(os.environ, {}, clear=False):
+            if "TEST_KEY" in os.environ:
+                del os.environ["TEST_KEY"]
+        with patch.object(constants_module, "CREDS", {}):
+            result = _get_env_or_creds_or_default(
+                "TEST_KEY", "TEST_KEY", _DEFAULT_MAD_AWS_S3
+            )
+        assert result == _DEFAULT_MAD_AWS_S3
+
+
+class TestConstantsPublicAPI:
+    """Test that the four constants still expose correct shape (smoke)."""
+
+    def test_nas_nodes_is_list_of_dicts(self):
+        """NAS_NODES is a list of node config dicts."""
+        assert isinstance(NAS_NODES, list)
+        assert len(NAS_NODES) >= 1
+        for node in NAS_NODES:
+            assert isinstance(node, dict)
+            assert "NAME" in node or "HOST" in node
+
+    def test_mad_aws_s3_has_expected_keys(self):
+        """MAD_AWS_S3 has USERNAME and PASSWORD."""
+        assert isinstance(MAD_AWS_S3, dict)
+        assert "USERNAME" in MAD_AWS_S3
+        assert "PASSWORD" in MAD_AWS_S3
+
+    def test_mad_minio_has_expected_keys(self):
+        """MAD_MINIO has USERNAME, PASSWORD, MINIO_ENDPOINT, AWS_ENDPOINT_URL_S3."""
+        assert isinstance(MAD_MINIO, dict)
+        assert "USERNAME" in MAD_MINIO
+        assert "PASSWORD" in MAD_MINIO
+        assert "MINIO_ENDPOINT" in MAD_MINIO
+        assert "AWS_ENDPOINT_URL_S3" in MAD_MINIO
+
+    def test_public_github_rocm_key_has_expected_keys(self):
+        """PUBLIC_GITHUB_ROCM_KEY has username and token (no value assert to avoid leaking secrets)."""
+        assert isinstance(PUBLIC_GITHUB_ROCM_KEY, dict)
+        assert set(PUBLIC_GITHUB_ROCM_KEY.keys()) >= {"username", "token"}, (
+            "PUBLIC_GITHUB_ROCM_KEY must have at least keys 'username' and 'token'"
+        )
diff --git a/tests/unit/test_deployment.py b/tests/unit/test_deployment.py
new file mode 100644
index 00000000..1f031650
--- /dev/null
+++ b/tests/unit/test_deployment.py
@@ -0,0 +1,133 @@
+"""Unit tests for deployment: base (create_jinja_env) and common (launcher, rocprof, profiling)."""
+
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from madengine.deployment.base import create_jinja_env
+from madengine.deployment.common import (
+    VALID_LAUNCHERS,
+    configure_multi_node_profiling,
+    is_rocprofv3_available,
+    normalize_launcher,
+)
+
+
+# ---- deployment.base (create_jinja_env) ----
+
+class TestCreateJinjaEnv:
+    """Test create_jinja_env helper."""
+
+    def test_returns_environment_with_dirname_basename_filters(self):
+        """create_jinja_env returns Environment with dirname and basename filters."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            p = Path(tmpdir)
+            (p / "test.j2").write_text("dir={{ path | dirname }} name={{ path | basename }}")
+            env = create_jinja_env(p)
+            template = env.get_template("test.j2")
+            out = template.render(path="/foo/bar/baz.txt")
+            assert "dir=/foo/bar" in out or "dir=foo/bar" in out
+            assert "name=baz.txt" in out
+
+    def test_template_dir_must_exist(self):
+        """create_jinja_env works when template_dir exists."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            env = create_jinja_env(Path(tmpdir))
+            assert env is not None
+            assert env.filters.get("dirname") is not None
+            assert env.filters.get("basename") is not None
+
+
+# ---- deployment.common ----
+
+class TestValidLaunchers:
+    """VALID_LAUNCHERS constant."""
+
+    def test_contains_expected_launchers(self):
+        assert "torchrun" in VALID_LAUNCHERS
+        assert "vllm" in VALID_LAUNCHERS
+        assert "sglang-disagg" in VALID_LAUNCHERS
+
+
+class TestNormalizeLauncher:
+    """normalize_launcher behavior."""
+
+    def test_valid_launcher_passthrough(self):
+        for lt in VALID_LAUNCHERS:
+            assert normalize_launcher(lt, "kubernetes") == lt
+            assert normalize_launcher(lt, "slurm") == lt
+            assert normalize_launcher(lt, "local") == lt
+
+    @pytest.mark.parametrize("launcher", [None, "", "invalid"])
+    def test_invalid_or_missing_launcher_kubernetes_returns_native(self, launcher):
+        assert normalize_launcher(launcher, "kubernetes") == "native"
+
+    @pytest.mark.parametrize("deployment", ["slurm", "local", "unknown"])
+    def test_invalid_or_missing_launcher_non_k8s_returns_docker(self, deployment):
+        assert normalize_launcher(None, deployment) == "docker"
+
+
+class TestIsRocprofv3Available:
+    """is_rocprofv3_available (mocked subprocess)."""
+
+    def test_returns_true_when_help_succeeds(self):
+        with patch("madengine.deployment.common.subprocess.run") as m:
+            m.return_value = MagicMock(returncode=0)
+            assert is_rocprofv3_available() is True
+            m.assert_called_once()
+            assert m.call_args[0][0] == ["rocprofv3", "--help"]
+
+    def test_returns_false_when_not_found(self):
+        with patch("madengine.deployment.common.subprocess.run") as m:
+            m.side_effect = FileNotFoundError()
+            assert is_rocprofv3_available() is False
+
+    def test_returns_false_on_timeout(self):
+        import subprocess
+        with patch("madengine.deployment.common.subprocess.run") as m:
+            m.side_effect = subprocess.TimeoutExpired("rocprofv3", 5)
+            assert is_rocprofv3_available() is False
+
+
+class TestConfigureMultiNodeProfiling:
+    """configure_multi_node_profiling with mocked is_rocprofv3_available."""
+
+    def test_single_node_returns_tools_unchanged(self):
+        logger = MagicMock()
+        tools = [{"name": "rocprof"}]
+        out = configure_multi_node_profiling(1, tools, logger)
+        assert out["enabled"] is True
+        assert out["mode"] == "single_node"
+        assert out["tools"] is tools
+        assert out["per_node_collection"] is False
+
+    @patch("madengine.deployment.common.is_rocprofv3_available", return_value=False)
+    def test_multi_node_no_rocprofv3_returns_disabled(self, _mock_avail):
+        logger = MagicMock()
+        out = configure_multi_node_profiling(2, [{"name": "rocprof"}], logger)
+        assert out["enabled"] is False
+        assert out["mode"] == "multi_node_unsupported"
+        assert out["tools"] == []
+        logger.warning.assert_called_once()
+
+    @patch("madengine.deployment.common.is_rocprofv3_available", return_value=True)
+    def test_multi_node_upgrades_rocprof_to_rocprofv3(self, _mock_avail):
+        logger = MagicMock()
+        tools = [{"name": "rocprof", "args": ["--trace"]}]
+        out = configure_multi_node_profiling(2, tools, logger)
+        assert out["enabled"] is True
+        assert out["mode"] == "multi_node"
+        assert out["per_node_collection"] is True
+        assert len(out["tools"]) == 1
+        assert out["tools"][0]["name"] == "rocprofv3"
+        assert out["tools"][0]["args"] == ["--trace"]
+
+    @patch("madengine.deployment.common.is_rocprofv3_available", return_value=True)
+    def test_multi_node_other_tools_unchanged(self, _mock_avail):
+        logger = MagicMock()
+        tools = [{"name": "rccl_trace"}, {"name": "rocprof"}]
+        out = configure_multi_node_profiling(2, tools, logger)
+        assert out["tools"][0]["name"] == "rccl_trace"
+        assert out["tools"][1]["name"] == "rocprofv3"
diff --git a/tests/unit/test_execution.py b/tests/unit/test_execution.py
new file mode 100644
index 00000000..a271d937
--- /dev/null
+++ b/tests/unit/test_execution.py
@@ -0,0 +1,154 @@
+"""Unit tests for execution: container_runner_helpers and dockerfile_utils."""
+
+import pytest
+
+from madengine.execution.container_runner_helpers import (
+    make_run_log_file_path,
+    resolve_run_timeout,
+)
+from madengine.execution.dockerfile_utils import (
+    GPU_ARCH_VARIABLES,
+    is_compilation_arch_compatible,
+    is_target_arch_compatible_with_variable,
+    normalize_architecture_name,
+    parse_dockerfile_gpu_variables,
+)
+
+
+# ---- container_runner_helpers ----
+
+class TestResolveRunTimeout:
+    """resolve_run_timeout behavior."""
+
+    def test_model_timeout_used_when_cli_default(self):
+        assert resolve_run_timeout({"timeout": 3600}, 7200) == 3600
+        assert resolve_run_timeout({"timeout": 100}, 7200) == 100
+
+    def test_cli_timeout_used_when_explicit(self):
+        assert resolve_run_timeout({"timeout": 3600}, 6000) == 6000
+        assert resolve_run_timeout({"timeout": 3600}, 100) == 100
+
+    def test_cli_default_returned_when_no_model_timeout(self):
+        assert resolve_run_timeout({}, 7200) == 7200
+        assert resolve_run_timeout({"name": "x"}, 3600) == 3600
+
+    @pytest.mark.parametrize("model_timeout", [None, 0])
+    def test_falsy_model_timeout_ignored_uses_cli(self, model_timeout):
+        assert resolve_run_timeout({"timeout": model_timeout}, 7200) == 7200
+
+    def test_custom_default_cli(self):
+        assert resolve_run_timeout({"timeout": 100}, 5000, default_cli_timeout=5000) == 100
+        assert resolve_run_timeout({"timeout": 100}, 7200, default_cli_timeout=5000) == 7200
+
+
+class TestMakeRunLogFilePath:
+    """make_run_log_file_path behavior."""
+
+    def test_basic_format(self):
+        out = make_run_log_file_path(
+            {"name": "org/model"}, "ci-org_model_ubuntu.22.04", "",
+        )
+        assert out == "org_model_ubuntu.22.04.live.log"
+
+    def test_phase_suffix_appended(self):
+        out = make_run_log_file_path({"name": "a/b"}, "ci-a_b_cuda", ".run")
+        assert out == "a_b_cuda.run.live.log"
+
+    def test_slashes_in_model_name_replaced(self):
+        out = make_run_log_file_path(
+            {"name": "foo/bar/baz"}, "ci-foo_bar_baz_ubuntu", "",
+        )
+        assert "/" not in out
+        assert out.endswith(".live.log")
+
+    def test_image_without_ci_prefix(self):
+        out = make_run_log_file_path({"name": "x/y"}, "registry/x_y_tag", "")
+        assert "registry_x_y_tag" in out or "x_y" in out
+        assert out.endswith(".live.log")
+
+    def test_no_model_prefix_in_image(self):
+        out = make_run_log_file_path(
+            {"name": "other/model"}, "ci-some_ubuntu_22", "",
+        )
+        assert out == "other_model_some_ubuntu_22.live.log"
+
+
+# ---- dockerfile_utils ----
+
+class TestGpuArchVariables:
+    def test_contains_expected_vars(self):
+        assert "MAD_SYSTEM_GPU_ARCHITECTURE" in GPU_ARCH_VARIABLES
+        assert "PYTORCH_ROCM_ARCH" in GPU_ARCH_VARIABLES
+        assert "GFX_COMPILATION_ARCH" in GPU_ARCH_VARIABLES
+
+
+class TestParseDockerfileGpuVariables:
+    def test_empty_content(self):
+        assert parse_dockerfile_gpu_variables("") == {}
+        assert parse_dockerfile_gpu_variables("FROM ubuntu") == {}
+
+    def test_arg_parsed(self):
+        content = "ARG PYTORCH_ROCM_ARCH=gfx90a"
+        out = parse_dockerfile_gpu_variables(content)
+        assert "PYTORCH_ROCM_ARCH" in out
+        assert out["PYTORCH_ROCM_ARCH"] == ["gfx90a"]
+
+    def test_multi_arch_semicolon(self):
+        content = "ARG GPU_TARGETS=gfx90a;gfx908"
+        out = parse_dockerfile_gpu_variables(content)
+        assert "GPU_TARGETS" in out
+        assert set(out["GPU_TARGETS"]) == {"gfx90a", "gfx908"}
+
+    def test_takes_last_definition(self):
+        content = "ARG PYTORCH_ROCM_ARCH=gfx908\nARG PYTORCH_ROCM_ARCH=gfx90a"
+        out = parse_dockerfile_gpu_variables(content)
+        assert out["PYTORCH_ROCM_ARCH"] == ["gfx90a"]
+
+
+class TestNormalizeArchitectureName:
+    def test_gfx_passthrough(self):
+        assert normalize_architecture_name("gfx90a") == "gfx90a"
+        assert normalize_architecture_name("gfx942") == "gfx942"
+
+    def test_mi_aliases(self):
+        assert normalize_architecture_name("mi100") == "gfx908"
+        assert normalize_architecture_name("mi-200") == "gfx90a"
+        assert normalize_architecture_name("mi300x") == "gfx942"
+
+    def test_empty_returns_none(self):
+        assert normalize_architecture_name("") is None
+        assert normalize_architecture_name("   ") is None
+
+
+class TestIsTargetArchCompatibleWithVariable:
+    def test_mad_system_always_compatible(self):
+        assert is_target_arch_compatible_with_variable(
+            "MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx90a"], "gfx908"
+        ) is True
+
+    def test_multi_arch_target_in_list(self):
+        assert is_target_arch_compatible_with_variable(
+            "PYTORCH_ROCM_ARCH", ["gfx90a", "gfx908"], "gfx90a"
+        ) is True
+        assert is_target_arch_compatible_with_variable(
+            "GPU_TARGETS", ["gfx90a"], "gfx908"
+        ) is False
+
+    def test_gfx_compilation_exact_match(self):
+        assert is_target_arch_compatible_with_variable(
+            "GFX_COMPILATION_ARCH", ["gfx90a"], "gfx90a"
+        ) is True
+
+
+class TestIsCompilationArchCompatible:
+    def test_exact_match(self):
+        assert is_compilation_arch_compatible("gfx90a", "gfx90a") is True
+        assert is_compilation_arch_compatible("gfx942", "gfx942") is True
+
+    def test_mismatch(self):
+        assert is_compilation_arch_compatible("gfx90a", "gfx908") is False
+        assert is_compilation_arch_compatible("gfx908", "gfx90a") is False
+
+    def test_unknown_arch_treated_as_exact(self):
+        assert is_compilation_arch_compatible("gfx999", "gfx999") is True
+        assert is_compilation_arch_compatible("gfx999", "gfx90a") is False
diff --git a/tests/unit/test_orchestration.py b/tests/unit/test_orchestration.py
new file mode 100644
index 00000000..6c82ff9b
--- /dev/null
+++ b/tests/unit/test_orchestration.py
@@ -0,0 +1,179 @@
+"""Unit tests for orchestration: image_filtering and orchestrator init/validation."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from madengine.orchestration.image_filtering import (
+    filter_images_by_gpu_compatibility,
+    filter_images_by_skip_gpu_arch,
+)
+from madengine.orchestration.build_orchestrator import BuildOrchestrator
+from madengine.orchestration.run_orchestrator import RunOrchestrator
+from madengine.core.errors import ConfigurationError
+
+
+# ---- image_filtering ----
+
+class TestFilterImagesByGpuCompatibility:
+    """filter_images_by_gpu_compatibility behavior."""
+
+    def test_empty_input(self):
+        compat, skipped = filter_images_by_gpu_compatibility({}, "AMD", "gfx90a")
+        assert compat == {}
+        assert skipped == []
+
+    def test_no_vendor_treated_as_compatible(self):
+        built = {"m1": {"gpu_vendor": "", "gpu_architecture": ""}}
+        compat, skipped = filter_images_by_gpu_compatibility(built, "AMD", "gfx90a")
+        assert compat == {"m1": built["m1"]}
+        assert skipped == []
+
+    def test_vendor_match_included_with_or_without_arch(self):
+        """Vendor match with empty arch or matching arch both include the image."""
+        for gpu_arch in ["", "gfx90a"]:
+            built = {"m1": {"gpu_vendor": "AMD", "gpu_architecture": gpu_arch}}
+            compat, skipped = filter_images_by_gpu_compatibility(built, "AMD", "gfx90a")
+            assert compat == {"m1": built["m1"]}
+            assert skipped == []
+
+    def test_vendor_match_arch_mismatch_skipped(self):
+        built = {"m1": {"gpu_vendor": "AMD", "gpu_architecture": "gfx90a"}}
+        compat, skipped = filter_images_by_gpu_compatibility(built, "AMD", "sm_90")
+        assert compat == {}
+        assert len(skipped) == 1
+        assert skipped[0][0] == "m1"
+        assert "architecture mismatch" in skipped[0][1]
+
+    def test_vendor_mismatch_skipped(self):
+        built = {"m1": {"gpu_vendor": "NVIDIA", "gpu_architecture": "sm_90"}}
+        compat, skipped = filter_images_by_gpu_compatibility(built, "AMD", "gfx90a")
+        assert compat == {}
+        assert len(skipped) == 1
+        assert "vendor mismatch" in skipped[0][1]
+
+    def test_none_runtime_vendor_accepts_any_vendor(self):
+        built = {"m1": {"gpu_vendor": "AMD", "gpu_architecture": "gfx90a"}}
+        compat, skipped = filter_images_by_gpu_compatibility(built, "NONE", "gfx90a")
+        assert compat == {"m1": built["m1"]}
+        assert skipped == []
+
+
+class TestFilterImagesBySkipGpuArch:
+    """filter_images_by_skip_gpu_arch behavior."""
+
+    def test_disable_skip_returns_all(self):
+        built = {"m1": {}}
+        models = {"m1": {"skip_gpu_arch": "A100"}}
+        compat, skipped = filter_images_by_skip_gpu_arch(
+            built, models, "A100", disable_skip=True
+        )
+        assert compat == built
+        assert skipped == []
+
+    def test_no_skip_gpu_arch_included(self):
+        built = {"m1": {"img": "x"}}
+        models = {"m1": {}}
+        compat, skipped = filter_images_by_skip_gpu_arch(built, models, "A100")
+        assert compat == {"m1": built["m1"]}
+        assert skipped == []
+
+    def test_skip_gpu_arch_match_skipped(self):
+        built = {"m1": {"img": "x"}}
+        models = {"m1": {"skip_gpu_arch": "A100"}}
+        compat, skipped = filter_images_by_skip_gpu_arch(built, models, "A100")
+        assert compat == {}
+        assert len(skipped) == 1
+        assert skipped[0] == ("m1", built["m1"], "A100")
+
+    def test_skip_gpu_arch_nvidia_prefix_normalized(self):
+        built = {"m1": {}}
+        models = {"m1": {"skip_gpu_arch": "A100"}}
+        compat, skipped = filter_images_by_skip_gpu_arch(
+            built, models, "NVIDIA A100"
+        )
+        assert compat == {}
+        assert skipped[0][2] == "NVIDIA A100"
+
+    def test_skip_gpu_arch_no_match_included(self):
+        built = {"m1": {}}
+        models = {"m1": {"skip_gpu_arch": "A100"}}
+        compat, skipped = filter_images_by_skip_gpu_arch(built, models, "gfx90a")
+        assert compat == {"m1": built["m1"]}
+        assert skipped == []
+
+
+# ---- orchestrator init and validation ----
+
+@pytest.mark.unit
+class TestBuildOrchestratorInit:
+    """Test Build Orchestrator initialization."""
+
+    @patch("madengine.orchestration.build_orchestrator.Context")
+    @patch("os.path.exists", return_value=False)
+    def test_initializes_with_minimal_args(self, mock_exists, mock_context):
+        """Should initialize with minimal arguments."""
+        mock_args = MagicMock()
+        mock_args.additional_context = None
+        mock_args.live_output = True
+
+        orchestrator = BuildOrchestrator(mock_args)
+
+        assert orchestrator.args == mock_args
+        assert orchestrator.additional_context == {}
+        assert orchestrator.credentials is None
+
+    @patch("madengine.orchestration.build_orchestrator.Context")
+    @patch("os.path.exists", return_value=False)
+    def test_parses_additional_context_json(self, mock_exists, mock_context):
+        """Should parse JSON additional context."""
+        mock_args = MagicMock()
+        mock_args.additional_context = '{"key": "value"}'
+        mock_args.live_output = True
+
+        orchestrator = BuildOrchestrator(mock_args)
+
+        assert orchestrator.additional_context == {"key": "value"}
+
+
+@pytest.mark.unit
+class TestRunOrchestratorInit:
+    """Test Run Orchestrator initialization."""
+
+    @patch("madengine.orchestration.run_orchestrator.Context")
+    def test_initializes_with_args(self, mock_context):
+        """Should initialize with provided arguments."""
+        mock_args = MagicMock()
+        mock_args.additional_context = None
+        mock_args.live_output = True
+
+        orchestrator = RunOrchestrator(mock_args)
+
+        assert orchestrator.args == mock_args
+        assert orchestrator.additional_context == {}
+
+    def test_parses_deploy_type_from_context(self):
+        """Should extract deploy type from additional context."""
+        mock_args = MagicMock()
+        mock_args.additional_context = '{"deploy": "slurm"}'
+        mock_args.live_output = True
+
+        orchestrator = RunOrchestrator(mock_args)
+
+        assert orchestrator.additional_context["deploy"] == "slurm"
+
+
+@pytest.mark.unit
+class TestManifestValidation:
+    """Test manifest validation logic."""
+
+    @patch("os.path.exists", return_value=False)
+    def test_run_without_manifest_or_tags_raises_error(self, mock_exists):
+        """Should raise ConfigurationError without manifest or tags."""
+        mock_args = MagicMock()
+        mock_args.additional_context = None
+        mock_args.live_output = True
+
+        orchestrator = RunOrchestrator(mock_args)
+
+        with pytest.raises(ConfigurationError):
+            orchestrator.execute(manifest_file=None, tags=None)
diff --git a/tests/unit/test_orchestrator_logic.py b/tests/unit/test_orchestrator_logic.py
deleted file mode 100644
index 4f0aaa6d..00000000
--- a/tests/unit/test_orchestrator_logic.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-Orchestrator logic unit tests.
-
-Pure unit tests for orchestrator initialization and logic without external dependencies.
-
-Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
-"""
-
-import pytest
-from unittest.mock import MagicMock, mock_open, patch
-
-from madengine.orchestration.build_orchestrator import BuildOrchestrator
-from madengine.orchestration.run_orchestrator import RunOrchestrator
-from madengine.core.errors import ConfigurationError
-
-
-@pytest.mark.unit
-class TestBuildOrchestratorInit:
-    """Test Build Orchestrator initialization."""
-    
-    @patch("madengine.orchestration.build_orchestrator.Context")
-    @patch("os.path.exists", return_value=False)
-    def test_initializes_with_minimal_args(self, mock_exists, mock_context):
-        """Should initialize with minimal arguments."""
-        mock_args = MagicMock()
-        mock_args.additional_context = None
-        mock_args.live_output = True
-        
-        orchestrator = BuildOrchestrator(mock_args)
-        
-        assert orchestrator.args == mock_args
-        assert orchestrator.additional_context == {}
-        assert orchestrator.credentials is None
-    
-    @patch("madengine.orchestration.build_orchestrator.Context")
-    @patch("os.path.exists", return_value=False)
-    def test_parses_additional_context_json(self, mock_exists, mock_context):
-        """Should parse JSON additional context."""
-        mock_args = MagicMock()
-        mock_args.additional_context = '{"key": "value"}'
-        mock_args.live_output = True
-        
-        orchestrator = BuildOrchestrator(mock_args)
-        
-        assert orchestrator.additional_context == {"key": "value"}
-
-
-@pytest.mark.unit
-class TestRunOrchestratorInit:
-    """Test Run Orchestrator initialization."""
-    
-    @patch("madengine.orchestration.run_orchestrator.Context")
-    def test_initializes_with_args(self, mock_context):
-        """Should initialize with provided arguments."""
-        mock_args = MagicMock()
-        mock_args.additional_context = None
-        mock_args.live_output = True
-        
-        orchestrator = RunOrchestrator(mock_args)
-        
-        assert orchestrator.args == mock_args
-        assert orchestrator.additional_context == {}
-    
-    def test_parses_deploy_type_from_context(self):
-        """Should extract deploy type from additional context."""
-        mock_args = MagicMock()
-        mock_args.additional_context = '{"deploy": "slurm"}'
-        mock_args.live_output = True
-        
-        orchestrator = RunOrchestrator(mock_args)
-        
-        assert orchestrator.additional_context["deploy"] == "slurm"
-
-
-@pytest.mark.unit
-class TestManifestValidation:
-    """Test manifest validation logic."""
-    
-    @patch("os.path.exists", return_value=False)
-    def test_run_without_manifest_or_tags_raises_error(self, mock_exists):
-        """Should raise ConfigurationError without manifest or tags."""
-        mock_args = MagicMock()
-        mock_args.additional_context = None
-        mock_args.live_output = True
-        
-        orchestrator = RunOrchestrator(mock_args)
-        
-        with pytest.raises(ConfigurationError):
-            orchestrator.execute(manifest_file=None, tags=None)
-
-
-# Total: 5 unit tests
diff --git a/tests/unit/test_rocm_path.py b/tests/unit/test_rocm_path.py
index f33916eb..c0e3dbd9 100644
--- a/tests/unit/test_rocm_path.py
+++ b/tests/unit/test_rocm_path.py
@@ -20,10 +20,14 @@ def test_get_rocm_path_default(self, monkeypatch):
         path = get_rocm_path(None)
         assert path == "/opt/rocm"
 
-    def test_get_rocm_path_override(self):
-        """Override argument takes precedence."""
+    def test_get_rocm_path_override(self, monkeypatch):
+        """Override argument takes precedence over env."""
         path = get_rocm_path("/custom/rocm")
         assert path == os.path.abspath("/custom/rocm").rstrip(os.sep)
+        monkeypatch.setenv("ROCM_PATH", "/env/rocm")
+        path_with_env = get_rocm_path("/cli/rocm")
+        assert path_with_env == os.path.abspath("/cli/rocm").rstrip(os.sep)
+        monkeypatch.delenv("ROCM_PATH", raising=False)
 
     def test_get_rocm_path_env(self, monkeypatch):
         """ROCM_PATH env is used when override is None."""
@@ -34,15 +38,6 @@ def test_get_rocm_path_env(self, monkeypatch):
         finally:
             monkeypatch.delenv("ROCM_PATH", raising=False)
 
-    def test_get_rocm_path_override_overrides_env(self, monkeypatch):
-        """Override takes precedence over ROCM_PATH env."""
-        monkeypatch.setenv("ROCM_PATH", "/env/rocm")
-        try:
-            path = get_rocm_path("/cli/rocm")
-            assert path == os.path.abspath("/cli/rocm").rstrip(os.sep)
-        finally:
-            monkeypatch.delenv("ROCM_PATH", raising=False)
-
 
 @pytest.mark.unit
 class TestContextRocmPath:
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
new file mode 100644
index 00000000..2e09b8c3
--- /dev/null
+++ b/tests/unit/test_utils.py
@@ -0,0 +1,93 @@
+"""Unit tests for utils: path_utils and run_details."""
+
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from madengine.utils.path_utils import scripts_base_dir_from, get_madengine_root
+from madengine.utils.run_details import (
+    get_pipeline,
+    get_build_number,
+    flatten_tags_in_place,
+)
+
+
+# ---- path_utils ----
+
+class TestScriptsBaseDirFrom:
+    """Test scripts_base_dir_from helper."""
+
+    @pytest.mark.parametrize("value", [None, "", "   "])
+    def test_falsy_input_returns_none(self, value):
+        """None, empty string, or whitespace return None."""
+        assert scripts_base_dir_from(value) is None
+
+    def test_file_path_returns_parent_dir(self):
+        """Path to a file returns its parent directory."""
+        assert scripts_base_dir_from("/foo/bar/script.sh") == "/foo/bar"
+        assert scripts_base_dir_from("scripts/run.sh") == "scripts"
+
+    def test_dir_path_returns_parent_of_dir(self):
+        """Path to a directory returns the parent of that directory."""
+        assert scripts_base_dir_from("/foo/bar/baz") == "/foo/bar"
+
+    def test_single_segment_returns_empty_string(self):
+        """Single segment (no slash) returns empty string from dirname."""
+        assert scripts_base_dir_from("script.sh") == ""
+
+
+class TestGetMadengineRoot:
+    """Test get_madengine_root helper."""
+
+    def test_returns_madengine_package_path(self):
+        """Returns a Path that exists, is a directory, and ends with 'madengine'."""
+        root = get_madengine_root()
+        assert isinstance(root, Path)
+        assert root.name == "madengine"
+        assert root.exists() and root.is_dir()
+
+
+# ---- run_details ----
+
+class TestGetPipeline:
+    @pytest.mark.parametrize("env_val,expected", [({}, ""), ({"pipeline": "ci-mad"}, "ci-mad")])
+    def test_pipeline_from_env_or_default(self, env_val, expected):
+        with patch.dict(os.environ, env_val, clear=False):
+            if not env_val and "pipeline" in os.environ:
+                del os.environ["pipeline"]
+            assert get_pipeline() == expected
+
+
+class TestGetBuildNumber:
+    @pytest.mark.parametrize("env_val,expected", [({}, "0"), ({"BUILD_NUMBER": "42"}, "42")])
+    def test_build_number_from_env_or_default(self, env_val, expected):
+        with patch.dict(os.environ, env_val, clear=False):
+            if not env_val and "BUILD_NUMBER" in os.environ:
+                del os.environ["BUILD_NUMBER"]
+            assert get_build_number() == expected
+
+
+class TestFlattenTagsInPlace:
+    def test_list_converted_to_comma_string(self):
+        record = {"tags": ["a", "b", "c"], "other": 1}
+        flatten_tags_in_place(record)
+        assert record["tags"] == "a,b,c"
+        assert record["other"] == 1
+
+    def test_non_list_unchanged(self):
+        record = {"tags": "already,a,string"}
+        flatten_tags_in_place(record)
+        assert record["tags"] == "already,a,string"
+
+    def test_missing_tags_unchanged(self):
+        record = {"model": "m1"}
+        flatten_tags_in_place(record)
+        assert "tags" not in record
+        assert record["model"] == "m1"
+
+    def test_empty_list_converted_to_empty_string(self):
+        record = {"tags": []}
+        flatten_tags_in_place(record)
+        assert record["tags"] == ""