Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions cluster.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
params {
// Resource limits
max_memory = 3000.GB
max_cpus = 128
max_time = 7.d
}

apptainer {
enabled = true
runOptions = "-B ${System.getenv('APPTAINER_BINDPATH') ?: '/scratch'}"
}

process {
resourceLimits = [
memory: params.max_memory,
cpus: params.max_cpus,
time: params.max_time
]
executor = 'slurm'
clusterOptions = {
"--account=${System.getenv('SLURM_JOB_ACCOUNT')} --export=NONE"
}

// Only retry for specific SLURM exit codes (e.g., OOM or Timeouts)
errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139] ? 'retry' : 'finish' }
maxRetries = 3

cache = 'lenient' // Tolerates minor timestamp differences on shared filesystems
}

executor {
queueSize = 500

// Job submission throttling - prevents overwhelming scheduler
submitRateLimit = '3 sec' // Max 3 jobs/sec (180 jobs/min)
pollInterval = '10 sec' // Check running jobs every 10s (reduces I/O on shared FS)
queueStatInterval = '2 min' // Check queue status every 2min (reduces squeue load)

// SLURM optimization flags (version-dependent, gracefully ignored if unsupported)
queueGlobalStatus = true // Query jobs globally, not per-partition (23.01.0+)
onlyJobState = true // Use --only-job-state for faster queries (25.12.0+, requires SLURM 24.05+)

// Prevents false failures when file sync is delayed across storage nodes
exitReadTimeout = '10 min'

// Job submission retry with exponential backoff
retry {
maxAttempts = 5 // Try up to 5 times before giving up
delay = '5 sec' // Delay when retrying failed job submissions
maxDelay = '1 min' // Cap exponential backoff at 60sec
jitter = 0.25 // Add ±25% randomness to avoid retry storms
}
}
1 change: 1 addition & 0 deletions launch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ log_dir="/scratch/gencore/GENEFLOW/alpha/logs/${fcid}/pipeline"

nextflow_command="nextflow \
-log ${log_dir}/nextflow.log run /home/gencore/SCRIPTS/GENEFLOW/main.nf \
-c /home/gencore/SCRIPTS/GENEFLOW/cluster.config \
-c /home/gencore/SCRIPTS/GENEFLOW/nextflow.config \
--run_dir_path $run_dir_path \
--trace_file_path ${log_dir}/trace.txt \
Expand Down