diff --git a/conf/modules/bedtools_intersect.config b/conf/modules/bedtools_intersect.config index 629d072b..ee8287e1 100644 --- a/conf/modules/bedtools_intersect.config +++ b/conf/modules/bedtools_intersect.config @@ -3,12 +3,5 @@ process { ext.args = '' ext.prefix = { "${intervals1.baseName}" } ext.suffix = 'targeted.bedGraph' - publishDir = [ - [ - path: { params.aligner == 'bismark' ? "${params.outdir}/bismark/methylation_calls/bedGraph" : "${params.outdir}/methyldackel" }, - mode: params.publish_dir_mode, - pattern: "*targeted.bedGraph" - ] - ] } } diff --git a/conf/modules/bismark_align.config b/conf/modules/bismark_align.config index 5bd12ffe..c3b0ce46 100644 --- a/conf/modules/bismark_align.config +++ b/conf/modules/bismark_align.config @@ -15,24 +15,5 @@ process { ) ) ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/alignments/unmapped" }, - mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: params.unmapped - ], - [ - path: { "${params.outdir}/${params.aligner}/alignments" }, - mode: params.publish_dir_mode, - pattern: "*.bam", - enabled: (params.save_align_intermeds || params.skip_deduplication || params.rrbs) - ], - [ - path: { "${params.outdir}/${params.aligner}/alignments/logs" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ] } } diff --git a/conf/modules/bismark_coverage2cytosine.config b/conf/modules/bismark_coverage2cytosine.config index 8737c561..500f43c6 100644 --- a/conf/modules/bismark_coverage2cytosine.config +++ b/conf/modules/bismark_coverage2cytosine.config @@ -1,22 +1,5 @@ process { withName: BISMARK_COVERAGE2CYTOSINE { ext.args = params.nomeseq ? '--nome-seq' : '' - publishDir = [ - [ - path: { "${params.outdir}/bismark/coverage2cytosine/summaries" }, - mode: params.publish_dir_mode, - pattern: "*_summary.txt" - ], - [ - path: { "${params.outdir}/bismark/coverage2cytosine/reports" }, - mode: params.publish_dir_mode, - pattern: "*_report.txt.gz" - ], - [ - path: { "${params.outdir}/bismark/coverage2cytosine/coverage" }, - mode: params.publish_dir_mode, - pattern: "*cov.gz" - ] - ] } } diff --git a/conf/modules/bismark_deduplicate.config b/conf/modules/bismark_deduplicate.config index be770f44..3bde9e1c 100644 --- a/conf/modules/bismark_deduplicate.config +++ b/conf/modules/bismark_deduplicate.config @@ -1,12 +1,5 @@ process { withName: BISMARK_DEDUPLICATE { ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/logs" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ] } } diff --git a/conf/modules/bismark_genomepreparation.config b/conf/modules/bismark_genomepreparation.config index af001049..5a618c09 100644 --- a/conf/modules/bismark_genomepreparation.config +++ b/conf/modules/bismark_genomepreparation.config @@ -4,11 +4,5 @@ process { (params.aligner == 'bismark_hisat') ? ' --hisat2' : ' --bowtie2', params.slamseq ? ' --slam' : '' ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/reference_genome" }, - saveAs: { it =~ /.*\.yml/ ? null : it }, - mode: params.publish_dir_mode, - enabled: params.save_reference - ] } } diff --git a/conf/modules/bismark_methylationextractor.config b/conf/modules/bismark_methylationextractor.config index 5f57a403..43625a8f 100644 --- a/conf/modules/bismark_methylationextractor.config +++ b/conf/modules/bismark_methylationextractor.config @@ -10,32 +10,5 @@ process { meta.single_end ? '' : (params.ignore_r2 > 0 ? "--ignore_r2 ${params.ignore_r2}" : ""), meta.single_end ? '' : (params.ignore_3prime_r2 > 0 ? "--ignore_3prime_r2 ${params.ignore_3prime_r2}": "") ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/methylation_calls/mbias" }, - mode: params.publish_dir_mode, - pattern: "*M-bias.txt" - ], - [ - path: { "${params.outdir}/${params.aligner}/methylation_calls/methylation_coverage" }, - mode: params.publish_dir_mode, - pattern: "*cov.gz" - ], - [ - path: { "${params.outdir}/${params.aligner}/methylation_calls/bedGraph" }, - mode: params.publish_dir_mode, - pattern: "*bedGraph.gz" - ], - [ - path: { "${params.outdir}/${params.aligner}/methylation_calls/splitting_report" }, - mode: params.publish_dir_mode, - pattern: "*splitting_report.txt" - ], - [ - path: { "${params.outdir}/${params.aligner}/methylation_calls/methylation_calls" }, - mode: params.publish_dir_mode, - pattern: "*txt.gz" - ] - ] } } diff --git a/conf/modules/bismark_report.config b/conf/modules/bismark_report.config index 27e98821..74667a41 100644 --- a/conf/modules/bismark_report.config +++ b/conf/modules/bismark_report.config @@ -1,10 +1,5 @@ process { withName: BISMARK_REPORT { ext.args = '' - publishDir = [ - path: "${params.outdir}/${params.aligner}/reports", - mode: params.publish_dir_mode, - pattern: "*.html" - ] } } diff --git a/conf/modules/bismark_summary.config b/conf/modules/bismark_summary.config index 92a5d4c0..4cdfe69b 100644 --- a/conf/modules/bismark_summary.config +++ b/conf/modules/bismark_summary.config @@ -1,10 +1,5 @@ process { withName: BISMARK_SUMMARY { ext.args = '' - publishDir = [ - path: "${params.outdir}/${params.aligner}/summary", - mode: params.publish_dir_mode, - pattern: "*.{html,txt}" - ] } } diff --git a/conf/modules/bwameth_align.config b/conf/modules/bwameth_align.config index bc4ab758..3467d4a8 100644 --- a/conf/modules/bwameth_align.config +++ b/conf/modules/bwameth_align.config @@ -2,11 +2,5 @@ process { withName: BWAMETH_ALIGN { cache = 'lenient' // This is set because in the module command the index files are touched so as to have bwameth not complain ext.args = '' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/alignments" }, - pattern: "*.bam", - mode: params.publish_dir_mode, - enabled: params.save_align_intermeds - ] } } diff --git a/conf/modules/bwameth_index.config b/conf/modules/bwameth_index.config index 74559b06..b5883cdc 100644 --- a/conf/modules/bwameth_index.config +++ b/conf/modules/bwameth_index.config @@ -1,11 +1,5 @@ process { withName: BWAMETH_INDEX { ext.args = '' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/reference_genome" }, - mode: params.publish_dir_mode, - saveAs: { it.equals('versions.yml') ? null : it.tokenize("/").last() }, - enabled: params.save_reference - ] } } diff --git a/conf/modules/fastqc.config b/conf/modules/fastqc.config index f611d032..64bd5879 100644 --- a/conf/modules/fastqc.config +++ b/conf/modules/fastqc.config @@ -1,17 +1,5 @@ process { withName: FASTQC { ext.args = '--quiet' - publishDir = [ - [ - path: { "${params.outdir}/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.html" - ], - [ - path: { "${params.outdir}/fastqc/zips" }, - mode: params.publish_dir_mode, - pattern: "*.zip" - ] - ] } } diff --git a/conf/modules/gunzip.config b/conf/modules/gunzip.config deleted file mode 100644 index 3a03a67b..00000000 --- a/conf/modules/gunzip.config +++ /dev/null @@ -1,12 +0,0 @@ -process { - withName: GUNZIP { - publishDir = [ - [ - path: { "${params.outdir}/gunzip/" }, - mode: params.publish_dir_mode, - pattern: "*.{fa,fasta}", - enabled: false - ] - ] - } -} diff --git a/conf/modules/methyldackel_extract.config b/conf/modules/methyldackel_extract.config index 651fecc0..3e045e05 100644 --- a/conf/modules/methyldackel_extract.config +++ b/conf/modules/methyldackel_extract.config @@ -7,17 +7,5 @@ process { params.methyl_kit ? " --methylKit" : '', params.min_depth > 0 ? " --minDepth ${params.min_depth}" : '' ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/methyldackel" }, - mode: params.publish_dir_mode, - pattern: "*.bedGraph" - ], - [ - path: { "${params.outdir}/methyldackel" }, - mode: params.publish_dir_mode, - pattern: "*.methylKit" - ] - ] } } diff --git a/conf/modules/methyldackel_mbias.config b/conf/modules/methyldackel_mbias.config index b16e38b3..24a5547d 100644 --- a/conf/modules/methyldackel_mbias.config +++ b/conf/modules/methyldackel_mbias.config @@ -4,12 +4,5 @@ process { params.all_contexts ? ' --CHG --CHH' : '', params.ignore_flags ? " --ignoreFlags" : '' ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/methyldackel/mbias" }, - mode: params.publish_dir_mode, - pattern: "*mbias.txt" - ] - ] } } diff --git a/conf/modules/multiqc.config b/conf/modules/multiqc.config index 3334a8ec..a1ba09bf 100644 --- a/conf/modules/multiqc.config +++ b/conf/modules/multiqc.config @@ -1,10 +1,5 @@ process { withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } } diff --git a/conf/modules/parabricks_fq2bammeth.config b/conf/modules/parabricks_fq2bammeth.config index 71e2596a..40127294 100644 --- a/conf/modules/parabricks_fq2bammeth.config +++ b/conf/modules/parabricks_fq2bammeth.config @@ -2,11 +2,5 @@ process { withName: PARABRICKS_FQ2BAMMETH { cache = 'lenient' // This is set because in the module command the index files are touched so as to have bwameth not complain ext.args = '--low-memory' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/alignments" }, - pattern: "*.bam", - mode: params.publish_dir_mode, - enabled: params.save_align_intermeds - ] } } diff --git a/conf/modules/picard_bedtointervallist.config b/conf/modules/picard_bedtointervallist.config index a9528349..1728bc93 100644 --- a/conf/modules/picard_bedtointervallist.config +++ b/conf/modules/picard_bedtointervallist.config @@ -1,10 +1,5 @@ process { withName: PICARD_BEDTOINTERVALLIST { ext.args = "" - publishDir = [ - path: { "${params.outdir}/enrichment_metrics" }, - mode: params.publish_dir_mode, - pattern: "*.intervallist" - ] } } diff --git a/conf/modules/picard_collecthsmetrics.config b/conf/modules/picard_collecthsmetrics.config index 7dbcac6b..4f02cfa0 100644 --- a/conf/modules/picard_collecthsmetrics.config +++ b/conf/modules/picard_collecthsmetrics.config @@ -1,10 +1,5 @@ process { withName: PICARD_COLLECTHSMETRICS { ext.args = "--MINIMUM_MAPPING_QUALITY 20 --COVERAGE_CAP 1000 --NEAR_DISTANCE 500" - publishDir = [ - path: { "${params.outdir}/enrichment_metrics" }, - mode: params.publish_dir_mode, - pattern: "*_metrics" - ] } } diff --git a/conf/modules/picard_createsequencedictionary.config b/conf/modules/picard_createsequencedictionary.config index 4c8990a6..9e3a913c 100644 --- a/conf/modules/picard_createsequencedictionary.config +++ b/conf/modules/picard_createsequencedictionary.config @@ -1,11 +1,5 @@ process { withName: PICARD_CREATESEQUENCEDICTIONARY { ext.args = "" - publishDir = [ - path: { "${params.outdir}/${params.aligner}/reference_genome" }, - mode: params.publish_dir_mode, - enabled: params.save_reference, - pattern: "*.dict" - ] } } diff --git a/conf/modules/picard_markduplicates.config b/conf/modules/picard_markduplicates.config index ea2368dc..ca936e66 100644 --- a/conf/modules/picard_markduplicates.config +++ b/conf/modules/picard_markduplicates.config @@ -2,17 +2,5 @@ process { withName: PICARD_MARKDUPLICATES { ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp" ext.prefix = { "${meta.id}.markdup.sorted" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/picard_metrics" }, - pattern: "*.metrics.txt", - mode: params.publish_dir_mode - ], - [ - path: { "${params.outdir}/${params.aligner}/deduplicated" }, - pattern: "*.bam", - mode: params.publish_dir_mode - ] - ] } } diff --git a/conf/modules/preseq_lcextrap.config b/conf/modules/preseq_lcextrap.config index efc2a888..454037c0 100644 --- a/conf/modules/preseq_lcextrap.config +++ b/conf/modules/preseq_lcextrap.config @@ -1,19 +1,5 @@ process { withName: PRESEQ_LCEXTRAP { ext.args = " -verbose -bam" - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/preseq" }, - mode: params.publish_dir_mode, - pattern: "*.txt", - enabled: params.run_preseq - ], - [ - path: { "${params.outdir}/${params.aligner}/preseq/log" }, - mode: params.publish_dir_mode, - pattern: "*.log", - enabled: params.run_preseq - ] - ] } } diff --git a/conf/modules/qualimap_bamqc.config b/conf/modules/qualimap_bamqc.config index 2cc87871..ad9688ba 100644 --- a/conf/modules/qualimap_bamqc.config +++ b/conf/modules/qualimap_bamqc.config @@ -4,13 +4,5 @@ process { params.genome.toString().startsWith('GRCh') ? '-gd HUMAN' : '', params.genome.toString().startsWith('GRCm') ? '-gd MOUSE' : '' ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/qualimap/bamqc/" }, - mode: params.publish_dir_mode, - pattern: "*", - enabled: params.run_qualimap - ] - ] } } diff --git a/conf/modules/samtools_faidx.config b/conf/modules/samtools_faidx.config index 26bb23f9..c4d1dce3 100644 --- a/conf/modules/samtools_faidx.config +++ b/conf/modules/samtools_faidx.config @@ -1,11 +1,5 @@ process { withName: SAMTOOLS_FAIDX { ext.args = "" - publishDir = [ - path: { "${params.outdir}/${params.aligner}/reference_genome" }, - mode: params.publish_dir_mode, - enabled: params.save_reference, - pattern: "*.fai" - ] } } diff --git a/conf/modules/samtools_flagstat.config b/conf/modules/samtools_flagstat.config index 4b79875c..98361571 100644 --- a/conf/modules/samtools_flagstat.config +++ b/conf/modules/samtools_flagstat.config @@ -1,11 +1,4 @@ process { withName: SAMTOOLS_FLAGSTAT { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.flagstat" - ] - ] } } diff --git a/conf/modules/samtools_index.config b/conf/modules/samtools_index.config index 76cf0fd7..dd0f6f90 100644 --- a/conf/modules/samtools_index.config +++ b/conf/modules/samtools_index.config @@ -1,19 +1,5 @@ process { withName: SAMTOOLS_INDEX { ext.prefix = "" - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/" }, - mode: params.publish_dir_mode, - pattern: "*.bai", - enabled: !params.skip_deduplication - ], - [ - path: { "${params.outdir}/${params.aligner}/alignments/" }, - mode: params.publish_dir_mode, - pattern: "*.bai", - enabled: params.skip_deduplication - ] - ] } } diff --git a/conf/modules/samtools_sort.config b/conf/modules/samtools_sort.config index 95c78bd0..7a04319e 100644 --- a/conf/modules/samtools_sort.config +++ b/conf/modules/samtools_sort.config @@ -1,30 +1,5 @@ process { withName: SAMTOOLS_SORT { - ext.prefix = params.skip_deduplication ? { "${meta.id}.sorted" } : { "${meta.id}.deduplicated.sorted" } - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/" }, - mode: params.publish_dir_mode, - pattern: "*.deduplicated.sorted.bam" - ], - [ - path: { "${params.outdir}/${params.aligner}/alignments/" }, - mode: params.publish_dir_mode, - pattern: "*.sorted.bam", - enabled: params.skip_deduplication - ], - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/" }, - mode: params.publish_dir_mode, - pattern: "*markdup*.bam", - enabled: params.save_align_intermeds - ], - [ - path: { "${params.outdir}/${params.aligner}/alignments/" }, - mode: params.publish_dir_mode, - pattern: "*.bam", - enabled: params.save_align_intermeds - ] - ] + ext.prefix = { params.skip_deduplication ? "${meta.id}.sorted" : "${meta.id}.deduplicated.sorted" } } } diff --git a/conf/modules/samtools_stats.config b/conf/modules/samtools_stats.config deleted file mode 100644 index 7574f62f..00000000 --- a/conf/modules/samtools_stats.config +++ /dev/null @@ -1,11 +0,0 @@ -process { - withName: SAMTOOLS_STATS { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.stats" - ] - ] - } -} diff --git a/conf/modules/trimgalore.config b/conf/modules/trimgalore.config index 465f4c08..9628c584 100644 --- a/conf/modules/trimgalore.config +++ b/conf/modules/trimgalore.config @@ -59,28 +59,5 @@ process { ) ), ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/trimgalore/fastqc" }, - mode: params.publish_dir_mode, - pattern: "*.html" - ], - [ - path: { "${params.outdir}/trimgalore/fastqc/zips" }, - mode: params.publish_dir_mode, - pattern: "*.zip" - ], - [ - path: { "${params.outdir}/trimgalore" }, - mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: params.save_trimmed - ], - [ - path: { "${params.outdir}/trimgalore/logs" }, - mode: params.publish_dir_mode, - pattern: "*.txt" - ] - ] } } diff --git a/conf/subworkflows/fasta_index_bismark_bwameth.config b/conf/subworkflows/fasta_index_bismark_bwameth.config index b170b48d..5c79b1b8 100644 --- a/conf/subworkflows/fasta_index_bismark_bwameth.config +++ b/conf/subworkflows/fasta_index_bismark_bwameth.config @@ -1,4 +1,3 @@ -includeConfig "../modules/gunzip.config" includeConfig "../modules/bismark_genomepreparation.config" includeConfig "../modules/samtools_faidx.config" includeConfig "../modules/bwameth_index.config" diff --git a/conf/subworkflows/fastq_align_dedup_bwameth.config b/conf/subworkflows/fastq_align_dedup_bwameth.config index fec290d8..3550bfc6 100644 --- a/conf/subworkflows/fastq_align_dedup_bwameth.config +++ b/conf/subworkflows/fastq_align_dedup_bwameth.config @@ -3,7 +3,6 @@ includeConfig "../modules/bwameth_align.config" includeConfig "../modules/parabricks_fq2bammeth.config" includeConfig "../modules/samtools_sort.config" includeConfig "../modules/samtools_flagstat.config" -includeConfig "../modules/samtools_stats.config" includeConfig "../modules/picard_markduplicates.config" includeConfig "../modules/methyldackel_extract.config" includeConfig "../modules/methyldackel_mbias.config" @@ -12,25 +11,10 @@ process { withName: SAMTOOLS_INDEX_ALIGNMENTS { ext.args = "" - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/alignments/" }, - mode: params.publish_dir_mode, - pattern: "*.bam.bai", - enabled: params.save_align_intermeds - ] - ] } withName: SAMTOOLS_INDEX_DEDUPLICATED { ext.args = "" - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/deduplicated/" }, - mode: params.publish_dir_mode, - pattern: "*.bam.bai" - ] - ] } } diff --git a/main.nf b/main.nf index 85bb5773..81fd4bc3 100644 --- a/main.nf +++ b/main.nf @@ -15,21 +15,260 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +nextflow.preview.types = true + include { FASTA_INDEX_BISMARK_BWAMETH } from './subworkflows/nf-core/fasta_index_bismark_bwameth/main' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_methylseq_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_methylseq_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_methylseq_pipeline' include { METHYLSEQ } from './workflows/methylseq/' +include { Sample } from './utils/types.nf' +include { MethylseqParams } from './workflows/methylseq/' +include { MethylseqResult } from './workflows/methylseq/' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = getGenomeAttribute('fasta') -params.fasta_index = getGenomeAttribute('fasta_index') -params.bwameth_index = getGenomeAttribute('bwameth') -params.bismark_index = params.aligner == 'bismark_hisat' ? getGenomeAttribute('bismark_hisat2') : getGenomeAttribute('bismark') + +params { + + // Path to comma-separated file containing information about the samples in the experiment. + input: String + + // The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. + outdir: String + + /// MultiQC options + + // Custom config file to supply to MultiQC. + multiqc_config: String? + + // MultiQC report title. Printed as page header, used for filename if not otherwise specified. + multiqc_title: String? + + // Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file + multiqc_logo: String? + + // File size limit when attaching MultiQC reports to summary emails. + max_multiqc_email_size: String = '25.MB' + + // Custom MultiQC yaml file containing HTML including a methods description. + multiqc_methods_description: String? + + /// Intermediate files + + // Save reference(s) to results directory + save_reference: Boolean + + // Save aligned intermediates to results directory + save_align_intermeds: Boolean + + // Bismark only - Save unmapped reads to FastQ files + unmapped: Boolean + + // Save trimmed reads to results directory. + save_trimmed: Boolean + + /// Reference options + + // Name of iGenomes reference. + genome: String? + + // Path to FASTA genome file + fasta: Path? + + // Path to Fasta index file. + fasta_index: Path? + + // Path to a directory containing a Bismark reference index. + bismark_index: Path? + + // bwameth index filename base + bwameth_index: Path? + + /// Alignment options + + // Alignment tool to use. + aligner: String = 'bismark' + + // Use BWA-MEM2 algorithm for BWA-Meth indexing and alignment. + use_mem2: Boolean + + /// Library presets + + // Preset for working with PBAT libraries. + pbat: Boolean + + // Turn on if dealing with MspI digested material. + rrbs: Boolean + + // Run bismark in SLAM-seq mode. + slamseq: Boolean + + // Preset for EM-seq libraries. + em_seq: Boolean + + // Trimming preset for single-cell bisulfite libraries. + single_cell: Boolean + + // Trimming preset for the Accel kit. + accel: Boolean + + // Trimming preset for the Zymo kit. + zymo: Boolean + + /// Trimming options + + // Trim bases from the 5' end of read 1 (or single-end reads). + clip_r1: Integer = 0 + + // Trim bases from the 5' end of read 2 (paired-end only). + clip_r2: Integer = 0 + + // Trim bases from the 3' end of read 1 AFTER adapter/quality trimming. + three_prime_clip_r1: Integer = 0 + + // Trim bases from the 3' end of read 2 AFTER adapter/quality trimming + three_prime_clip_r2: Integer = 0 + + // Trim bases below this quality value from the 3' end of the read, ignoring high-quality G bases + nextseq_trim: Integer = 0 + + // Discard reads that become shorter than INT because of either quality or adapter trimming. + length_trim: Integer? + + // Skip presetting trimming parameters entirely + skip_trimming_presets: Boolean + + /// Bismark options + + // Run alignment against all four possible strands. + non_directional: Boolean + + // Output stranded cytosine report, following Bismark's bismark_methylation_extractor step. + cytosine_report: Boolean + + // Turn on to relax stringency for alignment (set allowed penalty with --num_mismatches). + relax_mismatches: Boolean + + // 0.6 will allow a penalty of bp * -0.6 - for 100bp reads (bismark default is 0.2) + num_mismatches: Float = 0.6 + + // Specify a minimum read coverage to report a methylation call + meth_cutoff: Integer? + + // Ignore read 2 methylation when it overlaps read 1 + no_overlap: Boolean = true + + // Ignore methylation in first n bases of 5' end of R1 + ignore_r1: Integer = 0 + + // Ignore methylation in first n bases of 5' end of R2 + ignore_r2: Integer = 2 + + // Ignore methylation in last n bases of 3' end of R1 + ignore_3prime_r1: Integer = 0 + + // Ignore methylation in last n bases of 3' end of R2 + ignore_3prime_r2: Integer = 2 + + // Supply a .gtf file containing known splice sites (bismark_hisat only). + known_splices: String? + + // Allow soft-clipping of reads (potentially useful for single-cell experiments). + local_alignment: Boolean + + // The minimum insert size for valid paired-end alignments. + minins: Integer? + + // The maximum insert size for valid paired-end alignments. + maxins: Integer? + + // Sample is NOMe-seq or NMT-seq. Runs coverage2cytosine. + nomeseq: Boolean + + // Merges methylation calls for every strand into a single, context dependent file. + comprehensive: Boolean + + /// bwa-meth options + + // Call methylation in all three CpG, CHG and CHH contexts. + all_contexts: Boolean + + // Merges methylation metrics of the Cytosines in a given context. + merge_context: Boolean + + // Specify a minimum read coverage for MethylDackel to report a methylation call. + min_depth: Integer = 0 + + // MethylDackel - ignore SAM flags + ignore_flags: Boolean + + // Save files for use with methylKit + methyl_kit: Boolean + + /// Qualimap options + + // A GFF or BED file containing the target regions which will be passed to Qualimap/Bamqc. + bamqc_regions_file: String? + + /// Targeted sequencing options + + // A BED file containing the target regions + target_regions_file: String? + + // Run Picard CollectHsMetrics in the targeted analysis + collecthsmetrics: Boolean + + /// Skipping options + + // Skip read trimming. + skip_trimming: Boolean + + // Skip deduplication step after alignment. + skip_deduplication: Boolean + + // Skip FastQC + skip_fastqc: Boolean + + // Skip MultiQC + skip_multiqc: Boolean + + /// Run options + + // Run preseq/lcextrap tool + run_preseq: Boolean + + // Run qualimap/bamqc tool + run_qualimap: Boolean + + // Run advanced analysis for targeted methylation kits with enrichment of specific regions + run_targeted_sequencing: Boolean + + // Email address for completion summary. + email: String? + + // Email address for completion summary, only when pipeline fails. + email_on_fail: String? + + // Send plain-text email instead of HTML. + plaintext_email: Boolean + + // Do not use coloured log outputs. + monochrome_logs: Boolean + + // Incoming hook URL for messaging service + hook_url: String? + + // Display version and exit. + version: Boolean + + // Boolean whether to validate parameters against the schema at runtime + validate_params: Boolean = true +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -43,53 +282,74 @@ params.bismark_index = params.aligner == 'bismark_hisat' ? getGenomeAttribute('b workflow NFCORE_METHYLSEQ { take: - samplesheet // channel: samplesheet read in from --input + ch_samples: Channel + params_index: IndexParams + params_methylseq: MethylseqParams main: - ch_versions = Channel.empty() - - // - // Initialize file channels or values based on params - // - ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] } : Channel.empty() - ch_or_val_fasta_index = params.fasta_index ? Channel.fromPath(params.fasta_index).map{ it -> [ [id:it.baseName], it ] } : [] - ch_or_val_bismark_index = params.bismark_index ? Channel.fromPath(params.bismark_index).map{ it -> [ [id:it.baseName], it ] } : [] - ch_or_val_bwameth_index = params.bwameth_index ? Channel.fromPath(params.bwameth_index).map{ it -> [ [id:it.baseName], it ] } : [] - // // SUBWORKFLOW: Prepare any required reference genome indices // - FASTA_INDEX_BISMARK_BWAMETH( - ch_fasta, - ch_or_val_fasta_index, - ch_or_val_bismark_index, - ch_or_val_bwameth_index, - params.aligner, - params.collecthsmetrics, - params.use_mem2 + fasta = params_index.fasta ?: file(getGenomeAttribute('fasta', params)) + fasta_index = params_index.fasta_index ?: file(getGenomeAttribute('fasta_index', params)) + bismark_index = params_index.bismark_index ?: bismarkIndex(params_methylseq.aligner) + bwameth_index = params_index.bwameth_index ?: bwamethIndex() + + indices = FASTA_INDEX_BISMARK_BWAMETH( + fasta, + fasta_index, + bismark_index, + bwameth_index, + params_index.use_mem2, + params_methylseq ) - ch_versions = ch_versions.mix(FASTA_INDEX_BISMARK_BWAMETH.out.versions) // // WORKFLOW: Run pipeline // - METHYLSEQ ( - samplesheet, - ch_versions, - FASTA_INDEX_BISMARK_BWAMETH.out.fasta, - FASTA_INDEX_BISMARK_BWAMETH.out.fasta_index, - FASTA_INDEX_BISMARK_BWAMETH.out.bismark_index, - FASTA_INDEX_BISMARK_BWAMETH.out.bwameth_index, + methylseq = METHYLSEQ ( + ch_samples, + indices.fasta, + indices.fasta_index, + indices.bismark_index, + indices.bwameth_index, + params_methylseq ) - ch_versions = ch_versions.mix(METHYLSEQ.out.versions) emit: - multiqc_report = METHYLSEQ.out.multiqc_report // channel: [ path(multiqc_report.html ) ] - versions = ch_versions // channel: [ path(versions.yml) ] + fasta_index = indices.fasta_index + bismark_index = indices.bismark_index + bwameth_index = indices.bwameth_index + results = methylseq.results + bismark_summary = methylseq.bismark_summary + reference_dict = methylseq.reference_dict + intervallist = methylseq.intervallist + multiqc_report = methylseq.multiqc_report + +} + +record IndexParams { + fasta: Path? + fasta_index: Path? + bismark_index: Path? + bwameth_index: Path? + use_mem2: Boolean +} + +def bismarkIndex(aligner: String) -> Path? { + def indexPath = aligner == 'bismark_hisat' + ? getGenomeAttribute('bismark_hisat2', params) + : getGenomeAttribute('bismark', params) + return indexPath ? file(indexPath) : null +} +def bwamethIndex() -> Path? { + def indexPath = getGenomeAttribute('bwameth', params) + return indexPath ? file(indexPath) : null } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -102,7 +362,8 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + ch_samples = PIPELINE_INITIALISATION ( + params.input, params.version, params.validate_params, params.monochrome_logs, @@ -113,8 +374,10 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_METHYLSEQ ( - PIPELINE_INITIALISATION.out.samplesheet + methylseq = NFCORE_METHYLSEQ ( + ch_samples, + params, + params ) // // SUBWORKFLOW: Run completion tasks @@ -126,8 +389,96 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_METHYLSEQ.out.multiqc_report + methylseq.multiqc_report ) + + publish: + fasta_index = methylseq.fasta_index + bismark_index = methylseq.bismark_index + bwameth_index = methylseq.bwameth_index + samples = methylseq.results + bismark_summary = methylseq.bismark_summary + reference_dict = methylseq.reference_dict + intervallist = methylseq.intervallist + multiqc_report = methylseq.multiqc_report +} + +output { + fasta_index: Path { + path "${params.aligner}/reference_genome" + enabled params.save_reference + } + + bismark_index: Path { + path "${params.aligner}/reference_genome" + enabled params.save_reference + } + + bwameth_index: Path { + path "${params.aligner}/reference_genome" + enabled params.save_reference + } + + samples: Channel { + path { r -> + r.fastqc_html >> "fastqc/" + r.fastqc_zip >> "fastqc/zips/" + + r.trim_reads >> (params.save_trimmed ? "trimgalore/" : null) + r.trim_log >> "trimgalore/logs/" + r.trim_unpaired >> (params.save_trimmed ? "trimgalore/" : null) + r.trim_html >> "trimgalore/fastqc/" + r.trim_zip >> "trimgalore/fastqc/zips/" + + r.bam >> (params.save_align_intermeds ? "${params.aligner}/alignments/" : null) + r.bai >> (params.skip_deduplication ? "${params.aligner}/alignments/" : "${params.aligner}/deduplicated/") + + r.align_report >> "${params.aligner}/alignments/logs/" + r.unmapped >> "${params.aligner}/alignments/unmapped/" + r.dedup_report >> "${params.aligner}/deduplicated/logs/" + r.coverage2cytosine_coverage >> "bismark/coverage2cytosine/coverage/" + r.coverage2cytosine_report >> "bismark/coverage2cytosine/reports/" + r.coverage2cytosine_summary >> "bismark/coverage2cytosine/summaries/" + r.methylation_bedgraph >> "${params.aligner}/methylation_calls/bedGraph/" + r.methylation_calls >> "${params.aligner}/methylation_calls/methylation_calls/" + r.methylation_coverage >> "${params.aligner}/methylation_calls/methylation_coverage/" + r.methylation_report >> "${params.aligner}/methylation_calls/splitting_report/" + r.methylation_mbias >> "${params.aligner}/methylation_calls/mbias/" + r.bismark_report >> "${params.aligner}/reports/" + + r.samtools_flagstat >> "${params.aligner}/alignments/samtools_stats/" + r.samtools_stats >> "${params.aligner}/alignments/samtools_stats/" + r.methyldackel_extract_bedgraph >> "methyldackel/" + r.methyldackel_extract_methylkit >> "methyldackel/" + r.methyldackel_mbias >> "methyldackel/mbias/" + r.picard_metrics >> "${params.aligner}/deduplicated/picard_metrics/" + + r.qualimap_bamqc >> "${params.aligner}/qualimap/bamqc/" + + r.bedgraph_intersect >> (params.aligner == 'bismark' ? "bismark/methylation_calls/bedGraph/" : "methyldackel/") + r.picard_hsmetrics >> "enrichment_metrics/" + + r.lc_extrap >> "${params.aligner}/preseq/" + r.lc_log >> "${params.aligner}/preseq/log/" + } + } + + bismark_summary: Record { + path "${params.aligner}/summary" + } + + reference_dict: Record { + path "${params.aligner}/reference_genome" + enabled params.save_reference + } + + intervallist: Record { + path "enrichment_metrics" + } + + multiqc_report: Path { + path "multiqc/${params.aligner}" + } } /* diff --git a/modules/local/writefile/main.nf b/modules/local/writefile/main.nf new file mode 100644 index 00000000..98c4014c --- /dev/null +++ b/modules/local/writefile/main.nf @@ -0,0 +1,23 @@ + +nextflow.preview.types = true + +process WRITE_FILE { + input: + record( + name: String, + items: List, + newLine: Boolean? + ) + + output: + file(name) + + exec: + def path = task.workDir.resolve(name) + path.delete() + items.each { item -> + path << item + if( newLine ) + path << '\n' + } +} diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf index d9e79e7f..8369f9cd 100644 --- a/modules/nf-core/bedtools/intersect/main.nf +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BEDTOOLS_INTERSECT { tag "$meta.id" label 'process_single' @@ -8,15 +10,22 @@ process BEDTOOLS_INTERSECT { 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: - tuple val(meta), path(intervals1), path(intervals2) - tuple val(meta2), path(chrom_sizes) + record( + meta: Record, + intervals1: Path, + intervals2: Path, + chrom_sizes: Path? + ) output: - tuple val(meta), path("*.${extension}"), emit: intersect - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + bedgraph_intersect: file("*.${extension}") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/bismark/align/main.nf b/modules/nf-core/bismark/align/main.nf index b367a071..505df430 100644 --- a/modules/nf-core/bismark/align/main.nf +++ b/modules/nf-core/bismark/align/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_ALIGN { tag "$meta.id" label 'process_high' @@ -8,25 +10,34 @@ process BISMARK_ALIGN { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta, stageAs: 'tmp/*') // This change mounts as directory containing the FASTA file to prevent nested symlinks - tuple val(meta3), path(index) + record( + meta: Record, + reads: List, + fasta: Path, + bismark_index: Path + ) + + stage: + stageAs fasta, 'tmp/*' // This change mounts as directory containing the FASTA file to prevent nested symlinks output: - tuple val(meta), path("*bam") , emit: bam - tuple val(meta), path("*report.txt"), emit: report - tuple val(meta), path("*fq.gz") , emit: unmapped, optional: true - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + bam: file("*bam"), + align_report: file("*report.txt"), + unmapped: file("*fq.gz", optional: true) + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' if(task.ext.prefix){ args += " --prefix ${task.ext.prefix}" } - def fastq = meta.single_end ? reads : "-1 ${reads[0]} -2 ${reads[1]}" + def fastq = meta.single_end ? "${reads[0]}" : "-1 ${reads[0]} -2 ${reads[1]}" // Try to assign sensible bismark --multicore if not already set if(!args.contains('--multicore') && task.cpus){ @@ -58,7 +69,7 @@ process BISMARK_ALIGN { """ bismark \\ ${fastq} \\ - --genome ${index} \\ + --genome ${bismark_index} \\ --bam \\ ${args} diff --git a/modules/nf-core/bismark/coverage2cytosine/main.nf b/modules/nf-core/bismark/coverage2cytosine/main.nf index c1e3a7b4..b9cf0e49 100644 --- a/modules/nf-core/bismark/coverage2cytosine/main.nf +++ b/modules/nf-core/bismark/coverage2cytosine/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_COVERAGE2CYTOSINE { tag "$meta.id" label 'process_low' @@ -8,26 +10,35 @@ process BISMARK_COVERAGE2CYTOSINE { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(coverage_file) - tuple val(meta2), path(fasta, stageAs: 'tmp/*') // This change mounts as directory containing the FASTA file to prevent nested symlinks - tuple val(meta3), path(index) + record( + meta: Record, + methylation_coverage: Path, + fasta: Path, + bismark_index: Path + ) + + stage: + stageAs fasta, 'tmp/*' // This change mounts as directory containing the FASTA file to prevent nested symlinks output: - tuple val(meta), path("*.cov.gz") , emit: coverage, optional: true - tuple val(meta), path("*report.txt.gz") , emit: report - tuple val(meta), path("*cytosine_context_summary.txt") , emit: summary - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + coverage2cytosine_coverage : file("*.cov.gz", optional: true), + coverage2cytosine_report : file("*report.txt.gz"), + coverage2cytosine_summary : file("*cytosine_context_summary.txt") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ coverage2cytosine \\ - ${coverage_file} \\ - --genome ${index} \\ + ${methylation_coverage} \\ + --genome ${bismark_index} \\ --output ${prefix} \\ --gzip \\ ${args} diff --git a/modules/nf-core/bismark/deduplicate/main.nf b/modules/nf-core/bismark/deduplicate/main.nf index 43629171..f4c048a8 100644 --- a/modules/nf-core/bismark/deduplicate/main.nf +++ b/modules/nf-core/bismark/deduplicate/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_DEDUPLICATE { tag "$meta.id" label 'process_high' @@ -8,19 +10,25 @@ process BISMARK_DEDUPLICATE { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(bam) + record( + meta: Record, + bam: Path + ) output: - tuple val(meta), path("*.deduplicated.bam") , emit: bam - tuple val(meta), path("*.deduplication_report.txt"), emit: report - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + bam : file("*.deduplicated.bam"), + dedup_report : file("*.deduplication_report.txt") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def seqtype = meta.single_end ? '-s' : '-p' """ deduplicate_bismark \\ diff --git a/modules/nf-core/bismark/genomepreparation/main.nf b/modules/nf-core/bismark/genomepreparation/main.nf index 9794084e..213313ab 100644 --- a/modules/nf-core/bismark/genomepreparation/main.nf +++ b/modules/nf-core/bismark/genomepreparation/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_GENOMEPREPARATION { tag "$fasta" label 'process_high' @@ -8,14 +10,16 @@ process BISMARK_GENOMEPREPARATION { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(fasta, name:"BismarkIndex/") + fasta: Path + + stage: + stageAs fasta, "BismarkIndex/" output: - tuple val(meta), path("BismarkIndex"), emit: index - path "versions.yml" , emit: versions + file("BismarkIndex") - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/bismark/methylationextractor/main.nf b/modules/nf-core/bismark/methylationextractor/main.nf index 94c9e134..0641b9fd 100644 --- a/modules/nf-core/bismark/methylationextractor/main.nf +++ b/modules/nf-core/bismark/methylationextractor/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_METHYLATIONEXTRACTOR { tag "$meta.id" label 'process_high' @@ -8,19 +10,25 @@ process BISMARK_METHYLATIONEXTRACTOR { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(bam) - tuple val(meta2), path(index) + record( + meta: Record, + bam: Path, + bismark_index: Path + ) output: - tuple val(meta), path("*.bedGraph.gz") , emit: bedgraph - tuple val(meta), path("*.txt.gz") , emit: methylation_calls - tuple val(meta), path("*.cov.gz") , emit: coverage - tuple val(meta), path("*_splitting_report.txt"), emit: report - tuple val(meta), path("*.M-bias.txt") , emit: mbias - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + methylation_bedgraph : file("*.bedGraph.gz"), + methylation_calls : files("*.txt.gz"), + methylation_coverage : file("*.cov.gz"), + methylation_report : file("*_splitting_report.txt"), + methylation_mbias : file("*.M-bias.txt"), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -29,8 +37,8 @@ process BISMARK_METHYLATIONEXTRACTOR { args += " --multicore ${(task.cpus / 3) as int}" } // Only set buffer_size when there are more than 6.GB of memory available - if(!args.contains('--buffer_size') && task.memory?.giga > 6){ - args += " --buffer_size ${task.memory.giga - 2}G" + if(!args.contains('--buffer_size') && task.memory?.toGiga() > 6){ + args += " --buffer_size ${task.memory.toGiga() - 2}G" } def seqtype = meta.single_end ? '-s' : '-p' diff --git a/modules/nf-core/bismark/report/main.nf b/modules/nf-core/bismark/report/main.nf index c95bae99..d2d32083 100644 --- a/modules/nf-core/bismark/report/main.nf +++ b/modules/nf-core/bismark/report/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_REPORT { tag "$meta.id" label 'process_low' @@ -8,14 +10,23 @@ process BISMARK_REPORT { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - tuple val(meta), path(align_report), path(dedup_report), path(splitting_report), path(mbias) + record( + meta: Record, + align_report: Path, + dedup_report: Path, + methylation_report: Path, + methylation_mbias: Path + ) output: - tuple val(meta), path("*report.{html,txt}"), emit: report - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id: meta.id, + meta: meta, + bismark_report: file("*report.{html,txt}") + ) + + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/bismark/summary/main.nf b/modules/nf-core/bismark/summary/main.nf index a8a0a83e..84d3505a 100644 --- a/modules/nf-core/bismark/summary/main.nf +++ b/modules/nf-core/bismark/summary/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BISMARK_SUMMARY { label 'process_low' @@ -7,18 +9,22 @@ process BISMARK_SUMMARY { 'community.wave.seqera.io/library/bismark:0.25.1--1f50935de5d79c47' }" input: - val(bam) - path(align_report) - path(dedup_report) - path(splitting_report) - path(mbias) + record( + bam: Set, + align_report: Set, + dedup_report: Set, + methylation_report: Set, + methylation_mbias: Set + ) output: - path("*report.{html,txt}"), emit: summary - path "versions.yml" , emit: versions + record( + html: file("*report.html"), + txt: file("*report.txt") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/bwameth/align/main.nf b/modules/nf-core/bwameth/align/main.nf index d63061fc..11a2f17a 100644 --- a/modules/nf-core/bwameth/align/main.nf +++ b/modules/nf-core/bwameth/align/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BWAMETH_ALIGN { tag "${meta.id}" label 'process_high' @@ -8,16 +10,22 @@ process BWAMETH_ALIGN { 'biocontainers/bwameth:0.2.9--pyh7e72e81_0' }" input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(index) + record( + meta: Record, + reads: Path, + fasta: Path, + index: Path + ) output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id: meta.id, + meta: meta, + bam: file("*.bam") + ) + + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/bwameth/index/main.nf b/modules/nf-core/bwameth/index/main.nf index 314a8874..5139af2d 100644 --- a/modules/nf-core/bwameth/index/main.nf +++ b/modules/nf-core/bwameth/index/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process BWAMETH_INDEX { tag "$fasta" label 'process_high' @@ -8,15 +10,17 @@ process BWAMETH_INDEX { 'biocontainers/bwameth:0.2.9--pyh7e72e81_0' }" input: - tuple val(meta), path(fasta, name:"BwamethIndex/") - val use_mem2 + fasta: Path + use_mem2: Boolean + + stage: + stageAs fasta, "BwamethIndex/" output: - tuple val(meta), path("BwamethIndex"), emit: index - path "versions.yml" , emit: versions + file("BwamethIndex") - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index acfb6d0e..cccd85b9 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process CAT_FASTQ { tag "${meta.id}" label 'process_single' @@ -8,20 +10,29 @@ process CAT_FASTQ { : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" input: - tuple val(meta), path(reads, stageAs: "input*/*") + record( + meta: Record, + reads: List + ) + + stage: + stageAs reads, "input*/*" output: - tuple val(meta), path("*.merged.fastq.gz"), emit: reads - path "versions.yml", emit: versions + record( + id: meta.id, + meta: meta, + reads: files("*.merged.fastq.gz").toSorted() + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads instanceof List ? reads.collect { it.toString() } : [reads.toString()] + def readList = reads.collect { file -> "${file}" }.toList() if (meta.single_end) { - if (readList.size >= 1) { + if (readList.size() >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -35,10 +46,10 @@ process CAT_FASTQ { } } else { - if (readList.size >= 2) { + if (readList.size() >= 2) { def read1 = [] def read2 = [] - readList.eachWithIndex { v, ix -> (ix & 1 ? read2 : read1) << v } + readList.withIndex().each { v, ix -> (ix & 1 ? read2 : read1) << v } """ cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz @@ -55,9 +66,9 @@ process CAT_FASTQ { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads instanceof List ? reads.collect { it.toString() } : [reads.toString()] + def readList = reads.collect { file -> "${file}" } if (meta.single_end) { - if (readList.size >= 1) { + if (readList.size() >= 1) { """ echo '' | gzip > ${prefix}.merged.fastq.gz @@ -71,7 +82,7 @@ process CAT_FASTQ { } } else { - if (readList.size >= 2) { + if (readList.size() >= 2) { """ echo '' | gzip > ${prefix}_1.merged.fastq.gz echo '' | gzip > ${prefix}_2.merged.fastq.gz diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 23e16634..99b06366 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process FASTQC { tag "${meta.id}" label 'process_medium' @@ -8,23 +10,29 @@ process FASTQC { 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: - tuple val(meta), path(reads) + record( + meta: Record, + reads: List + ) output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + fastqc_html: files("*.html"), + fastqc_zip: files("*.zip") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def old_new_pairs = reads.withIndex().collect { entry, index -> ["${entry}", "${prefix}_${index + 1}.${entry.extension}"] } def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + def renamed_files = old_new_pairs.collect { pair -> pair[1] }.join(' ') // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 3ffc8e92..4c34f41a 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process GUNZIP { tag "${archive}" label 'process_single' @@ -8,19 +10,18 @@ process GUNZIP { : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" input: - tuple val(meta), path(archive) + archive: Path output: - tuple val(meta), path("${gunzip}"), emit: gunzip - path "versions.yml", emit: versions + file(gunzip) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" + def extension = archive.name.replace('.gz', '').tokenize('.')[-1] + def name = archive.name.replace(".${extension}.gz", '') def prefix = task.ext.prefix ?: name gunzip = prefix + ".${extension}" """ @@ -41,8 +42,8 @@ process GUNZIP { stub: def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" + def extension = archive.name.replace('.gz', '').tokenize('.')[-1] + def name = archive.name.replace(".${extension}.gz", '') def prefix = task.ext.prefix ?: name gunzip = prefix + ".${extension}" """ diff --git a/modules/nf-core/methyldackel/extract/main.nf b/modules/nf-core/methyldackel/extract/main.nf index 66b2745a..139ff7c0 100644 --- a/modules/nf-core/methyldackel/extract/main.nf +++ b/modules/nf-core/methyldackel/extract/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process METHYLDACKEL_EXTRACT { tag "$meta.id" label 'process_medium' @@ -8,17 +10,24 @@ process METHYLDACKEL_EXTRACT { 'biocontainers/methyldackel:0.6.1--he4a0461_7' }" input: - tuple val(meta), path(bam), path(bai) - path fasta - path fai + record( + meta: Record, + bam: Path, + bai: Path, + fasta: Path, + fai: Path + ) output: - tuple val(meta), path("*.bedGraph") , optional: true, emit: bedgraph - tuple val(meta), path("*.methylKit"), optional: true, emit: methylkit - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id : meta.id, + meta : meta, + methydackel_bedgraph : file("*.bedGraph", optional: true), + methydackel_methylkit : file("*.methylKit", optional: true) + ) + + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/methyldackel/mbias/main.nf b/modules/nf-core/methyldackel/mbias/main.nf index f0c48a34..ff35c40b 100644 --- a/modules/nf-core/methyldackel/mbias/main.nf +++ b/modules/nf-core/methyldackel/mbias/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process METHYLDACKEL_MBIAS { tag "$meta.id" label 'process_low' @@ -8,16 +10,23 @@ process METHYLDACKEL_MBIAS { 'biocontainers/methyldackel:0.6.1--he4a0461_7' }" input: - tuple val(meta), path(bam), path(bai) - path fasta - path fai + record( + meta: Record, + bam: Path, + bai: Path, + fasta: Path, + fai: Path + ) output: - tuple val(meta), path("*.mbias.txt"), emit: txt - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id: meta.id, + meta: meta, + methyldackel_mbias: file("*.mbias.txt") + ) + + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index a508541b..70d50d8f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process MULTIQC { label 'process_single' @@ -7,21 +9,24 @@ process MULTIQC { 'biocontainers/multiqc:1.30--pyhdfd78af_0' }" input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - path(replace_names) - path(sample_names) + record( + multiqc_files: Set, + multiqc_config: Path, + extra_multiqc_config: Path?, + multiqc_logo: Path?, + replace_names: Path?, + sample_names: Path? + ) - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions + stage: + stageAs multiqc_files, "?/*" - when: - task.ext.when == null || task.ext.when + output: + record( + report: file("*multiqc_report.html"), + data: file("*_data"), + plots: file("*_plots", optional: true) + ) script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/parabricks/fq2bammeth/main.nf b/modules/nf-core/parabricks/fq2bammeth/main.nf index 456413af..9fd3038c 100644 --- a/modules/nf-core/parabricks/fq2bammeth/main.nf +++ b/modules/nf-core/parabricks/fq2bammeth/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PARABRICKS_FQ2BAMMETH { tag "$meta.id" label 'process_high' @@ -6,21 +8,27 @@ process PARABRICKS_FQ2BAMMETH { container "nvcr.io/nvidia/clara/clara-parabricks:4.3.2-1" input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(index) - path(known_sites) + record( + meta: Record, + reads: Path, + fasta: Path, + bwameth_index: Path, + known_sites: Path? + ) output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , emit: bai - path("qc_metrics") , emit: qc_metrics, optional:true - path("*.table") , emit: bqsr_table, optional:true - path("duplicate-metrics.txt") , emit: duplicate_metrics, optional:true - path("versions.yml") , emit: versions + record( + id : meta.id, + meta : meta, + bam : file("*.bam"), + bai : file("*.bai"), + qc_metrics : file("qc_metrics", optional: true), + bqsr_table : file("*.table", optional: true), + duplicate_metrics : file("duplicate-metrics.txt", optional: true), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: // Exit if running this module with -profile conda / -profile mamba @@ -30,19 +38,19 @@ process PARABRICKS_FQ2BAMMETH { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def in_fq_command = meta.single_end ? "--in-se-fq $reads" : "--in-fq $reads" - def known_sites_command = known_sites ? known_sites.collect{"--knownSites $it"}.join(' ') : "" + def known_sites_command = known_sites ? "--knownSites ${known_sites}" : "" def known_sites_output = known_sites ? "--out-recal-file ${prefix}.table" : "" def num_gpus = task.accelerator ? "--num-gpus $task.accelerator.request" : '' """ if [ -L $fasta ]; then - ln -sf \$(readlink $fasta) $index/$fasta + ln -sf \$(readlink $fasta) ${bwameth_index}/$fasta else - ln -sf ../$fasta $index/$fasta + ln -sf ../$fasta ${bwameth_index}/$fasta fi pbrun \\ fq2bam_meth \\ - --ref $index/$fasta \\ + --ref ${bwameth_index}/$fasta \\ $in_fq_command \\ --out-bam ${prefix}.bam \\ $known_sites_command \\ diff --git a/modules/nf-core/picard/bedtointervallist/main.nf b/modules/nf-core/picard/bedtointervallist/main.nf index 38c2eee7..a2072cf9 100644 --- a/modules/nf-core/picard/bedtointervallist/main.nf +++ b/modules/nf-core/picard/bedtointervallist/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PICARD_BEDTOINTERVALLIST { tag "$meta.id" label 'process_low' @@ -8,16 +10,22 @@ process PICARD_BEDTOINTERVALLIST { 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: - tuple val(meta) , path(bed) - tuple val(meta2), path(dict) - file arguments_file + record( + meta: Record, + bed: Path, + reference_dict: Path, + arguments_file: Path? + ) output: - tuple val(meta), path('*.intervallist'), emit: intervallist - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + intervallist: file('*.intervallist') + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -27,7 +35,7 @@ process PICARD_BEDTOINTERVALLIST { if (!task.memory) { log.info '[Picard BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.toMega() * 0.8).intValue() } """ picard \\ @@ -35,7 +43,7 @@ process PICARD_BEDTOINTERVALLIST { BedToIntervalList \\ --INPUT ${bed} \\ --OUTPUT ${prefix}.intervallist \\ - --SEQUENCE_DICTIONARY ${dict} \\ + --SEQUENCE_DICTIONARY ${reference_dict} \\ --TMP_DIR . \\ ${args_file} \\ ${args} @@ -52,7 +60,7 @@ process PICARD_BEDTOINTERVALLIST { if (!task.memory) { log.info '[Picard BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.toMega() * 0.8).intValue() } def args_file = arguments_file ? "--arguments_file ${arguments_file}" : "" """ @@ -61,7 +69,7 @@ process PICARD_BEDTOINTERVALLIST { BedToIntervalList \\ --INPUT ${bed} \\ --OUTPUT ${prefix}.intervallist \\ - --SEQUENCE_DICTIONARY ${dict} \\ + --SEQUENCE_DICTIONARY ${reference_dict} \\ --TMP_DIR . \\ ${args_file} \\ ${args}" diff --git a/modules/nf-core/picard/collecthsmetrics/main.nf b/modules/nf-core/picard/collecthsmetrics/main.nf index 1d017ef8..5fd5f952 100644 --- a/modules/nf-core/picard/collecthsmetrics/main.nf +++ b/modules/nf-core/picard/collecthsmetrics/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PICARD_COLLECTHSMETRICS { tag "$meta.id" label 'process_single' @@ -8,17 +10,30 @@ process PICARD_COLLECTHSMETRICS { 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bai), path(bait_intervals, stageAs: "baits/*"), path(target_intervals, stageAs: 'targets/*') - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(dict) + record( + meta: Record, + bam: Path, + bai: Path, + bait_intervals: Set, + target_intervals: Set, + fasta: Path, + fai: Path, + reference_dict: Path + ) + + stage: + stageAs bait_intervals, "baits/*" + stageAs target_intervals, 'targets/*' output: - tuple val(meta), path("*_metrics") , emit: metrics - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + picard_hsmetrics: file("*_metrics") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -29,21 +44,21 @@ process PICARD_COLLECTHSMETRICS { if (!task.memory) { log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.toMega() * 0.8).intValue() } def bait_interval_list = bait_intervals def bait_intervallist_cmd = "" if (bait_intervals =~ /.(bed|bed.gz)$/){ bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${reference_dict} --TMP_DIR ." } def target_interval_list = target_intervals def target_intervallist_cmd = "" if (target_intervals =~ /.(bed|bed.gz)$/){ target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${reference_dict} --TMP_DIR ." } diff --git a/modules/nf-core/picard/createsequencedictionary/main.nf b/modules/nf-core/picard/createsequencedictionary/main.nf index 49637d18..3df10692 100644 --- a/modules/nf-core/picard/createsequencedictionary/main.nf +++ b/modules/nf-core/picard/createsequencedictionary/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PICARD_CREATESEQUENCEDICTIONARY { tag "$meta.id" label 'process_medium' @@ -8,14 +10,20 @@ process PICARD_CREATESEQUENCEDICTIONARY { 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: - tuple val(meta), path(fasta) + record( + meta: Record, + fasta: Path + ) output: - tuple val(meta), path("*.dict"), emit: reference_dict - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + reference_dict: file("*.dict") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -24,7 +32,7 @@ process PICARD_CREATESEQUENCEDICTIONARY { if (!task.memory) { log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.toMega() * 0.8).intValue() } """ picard \\ diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index 8a2ed64e..0b2bd007 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' @@ -8,19 +10,25 @@ process PICARD_MARKDUPLICATES { 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) + record( + meta: Record, + reads: Path, + fasta: Path, + fai: Path + ) output: - tuple val(meta), path("*.bam") , emit: bam, optional: true - tuple val(meta), path("*.bai") , emit: bai, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.metrics.txt"), emit: metrics - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + bam : file("*.bam", optional: true), + bai : file("*.bai", optional: true), + cram : file("*.cram", optional: true), + picard_metrics : file("*.metrics.txt"), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -31,7 +39,7 @@ process PICARD_MARKDUPLICATES { if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = (task.memory.mega*0.8).intValue() + avail_mem = (task.memory.toMega() * 0.8).intValue() } if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" diff --git a/modules/nf-core/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf index 540a5fb2..7151b6fd 100644 --- a/modules/nf-core/preseq/lcextrap/main.nf +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process PRESEQ_LCEXTRAP { tag "$meta.id" label 'process_single' @@ -9,19 +11,25 @@ process PRESEQ_LCEXTRAP { 'biocontainers/preseq:3.2.0--hdcf5f25_6' }" input: - tuple val(meta), path(bam) + record( + meta: Record, + bam: Path + ) output: - tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + lc_extrap: file("*.lc_extrap.txt"), + lc_log: file("*.log") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' - args = task.attempt > 1 ? args.join(' -defects') : args // Disable testing for defects + args = task.attempt > 1 ? args + ' -defects' : args // Disable testing for defects def prefix = task.ext.prefix ?: "${meta.id}" def paired_end = meta.single_end ? '' : '-pe' """ diff --git a/modules/nf-core/qualimap/bamqc/main.nf b/modules/nf-core/qualimap/bamqc/main.nf index 8140e143..6cf7ad04 100644 --- a/modules/nf-core/qualimap/bamqc/main.nf +++ b/modules/nf-core/qualimap/bamqc/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process QUALIMAP_BAMQC { tag "$meta.id" label 'process_medium' @@ -8,36 +10,41 @@ process QUALIMAP_BAMQC { 'biocontainers/qualimap:2.3--hdfd78af_0' }" input: - tuple val(meta), path(bam) - path gff + record( + meta: Record, + strandedness: String?, + bam: Path, + gff: Path + ) output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions + record( + id: meta.id, + meta: meta, + qualimap_bamqc: file("${prefix}") + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' - def memory = (task.memory.mega*0.8).intValue() + 'M' + def memory = (task.memory.toMega() * 0.8).intValue() def regions = gff ? "--gff $gff" : '' - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } + strandedness = strandedness + ? "strand-specific-${strandedness}" + : 'non-strand-specific' + """ unset DISPLAY mkdir -p tmp export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp qualimap \\ - --java-mem-size=$memory \\ + --java-mem-size=${memory}M \\ bamqc \\ $args \\ -bam $bam \\ diff --git a/modules/nf-core/qualimap/bamqccram/main.nf b/modules/nf-core/qualimap/bamqccram/main.nf index 81e1a485..09e3a1ee 100644 --- a/modules/nf-core/qualimap/bamqccram/main.nf +++ b/modules/nf-core/qualimap/bamqccram/main.nf @@ -15,10 +15,7 @@ process QUALIMAP_BAMQCCRAM { output: tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + path "versions.yml" , topic: versions script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 6de0095d..57d418f5 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' @@ -8,19 +10,24 @@ process SAMTOOLS_FAIDX { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(fasta) - tuple val(meta2), path(fai) - val get_sizes + record( + meta: Record, + fasta: Path, + fai: Path?, + get_sizes: Boolean + ) output: - tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true - tuple val(meta), path ("*.sizes") , emit: sizes, optional: true - tuple val(meta), path ("*.fai") , emit: fai, optional: true - tuple val(meta), path ("*.gzi") , emit: gzi, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + meta : meta, + fa : file("*.{fa,fasta}", optional: true), + sizes : file("*.sizes", optional: true), + fai : file("*.fai", optional: true), + gzi : file("*.gzi", optional: true) + ) + + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index c23f3a5c..d8e06b01 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_single' @@ -8,14 +10,21 @@ process SAMTOOLS_FLAGSTAT { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(bam), path(bai) + record( + meta: Record, + bam: Path, + bai: Path + ) output: - tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id: meta.id, + meta: meta, + samtools_flagstat: file("*.flagstat") + ) + + topic: + file("versions.yml") >> 'versions' script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 7019a72e..c5f458f8 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' @@ -8,16 +10,22 @@ process SAMTOOLS_INDEX { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(input) + record( + meta: Record, + input: Path + ) output: - tuple val(meta), path("*.bai") , optional:true, emit: bai - tuple val(meta), path("*.csi") , optional:true, emit: csi - tuple val(meta), path("*.crai"), optional:true, emit: crai - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + bai : file("*.bai", optional: true), + csi : file("*.csi", optional: true), + crai : file("*.crai", optional: true), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -36,7 +44,7 @@ process SAMTOOLS_INDEX { stub: def args = task.ext.args ?: '' - def extension = file(input).getExtension() == 'cram' ? + def extension = input.getExtension() == 'cram' ? "crai" : args.contains("-c") ? "csi" : "bai" """ touch ${input}.${extension} diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index caf3c61a..cb120ccc 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' @@ -8,18 +10,24 @@ process SAMTOOLS_SORT { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta) , path(bam) - tuple val(meta2), path(fasta) + record( + meta: Record, + bam: Path, + fasta: Path? + ) output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml", emit: versions + record( + id : meta.id, + meta : meta, + bam : file("*.bam", optional: true), + cram : file("*.cram", optional: true), + crai : file("*.crai", optional: true), + csi : file("*.csi", optional: true), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4443948b..083dae73 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' @@ -8,15 +10,22 @@ process SAMTOOLS_STATS { 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: - tuple val(meta), path(input), path(input_index) - tuple val(meta2), path(fasta) + record( + meta: Record, + input: Path, + input_index: Path, + fasta: Path? + ) output: - tuple val(meta), path("*.stats"), emit: stats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when + record( + id: meta.id, + meta: meta, + samtools_stats: file("*.stats") + ) + + topic: + file("versions.yml") >> 'versions' script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf index 5fe53669..34cbe09b 100644 --- a/modules/nf-core/trimgalore/main.nf +++ b/modules/nf-core/trimgalore/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process TRIMGALORE { tag "${meta.id}" label 'process_high' @@ -8,18 +10,24 @@ process TRIMGALORE { : 'community.wave.seqera.io/library/cutadapt_trim-galore_pigz:a98edd405b34582d'}" input: - tuple val(meta), path(reads) + record( + meta: Record, + reads: List + ) output: - tuple val(meta), path("*{3prime,5prime,trimmed,val}{,_1,_2}.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log, optional: true - tuple val(meta), path("*unpaired{,_1,_2}.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html, optional: true - tuple val(meta), path("*.zip") , emit: zip, optional: true - path "versions.yml" , emit: versions + record( + id : meta.id, + meta : meta, + trim_reads : files("*{3prime,5prime,trimmed,val}{,_1,_2}.fq.gz").toSorted(), + trim_log : files("*report.txt", optional: true).toSorted(), + trim_unpaired : files("*unpaired{,_1,_2}.fq.gz", optional: true).toSorted(), + trim_html : files("*.html", optional: true).toSorted(), + trim_zip : files("*.zip", optional: true).toSorted(), + ) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' @@ -46,7 +54,7 @@ process TRIMGALORE { def args_list = args.split("\\s(?=--)").toList() args_list.removeAll { it.toLowerCase().contains('_r2 ') } """ - [ ! -f ${prefix}.fastq.gz ] && ln -s ${reads} ${prefix}.fastq.gz + [ ! -f ${prefix}.fastq.gz ] && ln -s ${reads[0]} ${prefix}.fastq.gz trim_galore \\ ${args_list.join(' ')} \\ --cores ${cores} \\ diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index e712ebe6..baedf9df 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + process UNTAR { tag "${archive}" label 'process_single' @@ -8,19 +10,18 @@ process UNTAR { : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" input: - tuple val(meta), path(archive) + archive: Path output: - tuple val(meta), path("${prefix}"), emit: untar - path "versions.yml", emit: versions + file(prefix) - when: - task.ext.when == null || task.ext.when + topic: + file("versions.yml") >> 'versions' script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + prefix = task.ext.prefix ?: archive.baseName.replaceFirst(/\.tar$/, "") """ mkdir ${prefix} @@ -50,7 +51,7 @@ process UNTAR { """ stub: - prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + prefix = task.ext.prefix ?: archive.name.replaceFirst(/\.[^\.]+(.gz)?$/, "") """ mkdir ${prefix} ## Dry-run untaring the archive to get the files and place all in prefix diff --git a/nextflow.config b/nextflow.config index 2bf94045..4220fe5a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,109 +9,15 @@ // Global default params, used in configs params { - // Input options - input = null - // References - genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Intermediate files - save_reference = false - save_align_intermeds = false - unmapped = false - save_trimmed = false - - // Alignment options - aligner = 'bismark' - use_mem2 = false - - // Library presets - pbat = false - rrbs = false - slamseq = false - em_seq = false - single_cell = false - accel = false - zymo = false - - // Trimming options - clip_r1 = 0 - clip_r2 = 0 - three_prime_clip_r1 = 0 - three_prime_clip_r2 = 0 - nextseq_trim = 0 - length_trim = null - skip_trimming_presets = false - - // Bismark options - non_directional = false - cytosine_report = false - relax_mismatches = false - num_mismatches = 0.6 - // 0.6 will allow a penalty of bp * -0.6 - // For 100bp reads, this is -60. Mismatches cost -6, gap opening -5 and gap extension -2 - // So -60 would allow 10 mismatches or ~ 8 x 1-2bp indels - // Bismark default is 0.2 (L,0,-0.2), Bowtie2 default is 0.6 (L,0,-0.6) - meth_cutoff = null - no_overlap = true - ignore_r1 = 0 - ignore_r2 = 2 - ignore_3prime_r1 = 0 - ignore_3prime_r2 = 2 - known_splices = null - local_alignment = false - minins = null - maxins = null - nomeseq = false - comprehensive = false - - // bwa-meth options - all_contexts = false - merge_context = false - min_depth = 0 - ignore_flags = false - methyl_kit = false - - - // Skipping options - skip_trimming = false - skip_deduplication = false - skip_fastqc = false - skip_multiqc = false - - // Run options - run_preseq = false - run_qualimap = false - run_targeted_sequencing = false - - // Qualimap options - bamqc_regions_file = null - - // Targeted sequencing options - target_regions_file = null - collecthsmetrics = false - // Boilerplate options - outdir = null publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null help = false help_full = false show_hidden = false - version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/methylseq/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options config_profile_name = null @@ -121,11 +27,11 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - - // Schema validation default options - validate_params = true } +outputDir = params.outdir +workflow.output.mode = params.publish_dir_mode + // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -134,7 +40,6 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false - nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -396,7 +301,7 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.6.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { @@ -407,27 +312,6 @@ validation { command = "nextflow run nf-core/methylseq -profile --input samplesheet.csv --outdir " fullParameter = "help_full" showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/methylseq ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/methylseq/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText } } diff --git a/subworkflows/local/targeted_sequencing/main.nf b/subworkflows/local/targeted_sequencing/main.nf index 78bbf72a..88cd1320 100644 --- a/subworkflows/local/targeted_sequencing/main.nf +++ b/subworkflows/local/targeted_sequencing/main.nf @@ -6,6 +6,8 @@ * HS Library Size, Percent Duplicates, and Percent Off Bait. This is relevant for methylome experiments with targeted seq. */ +nextflow.preview.types = true + include { BEDTOOLS_INTERSECT } from '../../../modules/nf-core/bedtools/intersect/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' include { PICARD_BEDTOINTERVALLIST } from '../../../modules/nf-core/picard/bedtointervallist/main' @@ -14,91 +16,82 @@ include { PICARD_COLLECTHSMETRICS } from '../../../modules/nf-core/picar workflow TARGETED_SEQUENCING { take: - ch_bedgraph // channel: [ val(meta), [ bedGraph(s) ]] when bwameth, [ val(meta), bedGraph ] when bismark - ch_target_regions // channel: path(target_regions.bed) - ch_fasta // channel: [ [:], /path/to/genome.fa] - ch_fasta_index // channel: [ val(meta), /path/to/genome.fa.fai] - ch_bam // channel: [ val(meta), [ bam ] ] ## BAM from alignment - ch_bai // channel: [ val(meta), [ bai ] ] ## BAI from alignment - collecthsmetrics // boolean: whether to run Picard CollectHsMetrics + ch_inputs: Channel + val_target_regions: Value + val_fasta: Value + val_fasta_index: Value + collecthsmetrics: Boolean main: - ch_versions = Channel.empty() - ch_picard_metrics = Channel.empty() - /* * Intersect bedGraph files with target regions * Ensure ch_bedgraph contains the bedGraph file(s) in an array and split into individual bedGraphs */ - ch_bedgraphs_target = ch_bedgraph - .map { meta, bedgraphs -> tuple(meta, bedgraphs instanceof List ? bedgraphs : [bedgraphs]) } - .flatMap { meta, bedgraphs -> bedgraphs.collect { bedgraph -> [meta, bedgraph] } } - .combine(ch_target_regions) + ch_intersect_inputs = ch_inputs + .map { r -> + record( + id: r.id, + intervals1: r.bedgraph, + ) + } + .combine(intervals2: val_target_regions) - BEDTOOLS_INTERSECT( - ch_bedgraphs_target, - [[:], []] - ) - ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + ch_results = BEDTOOLS_INTERSECT( ch_intersect_inputs ) /* * Run Picard CollectHSMetrics */ if (collecthsmetrics) { - // Create target regions with meta for Picard tools - target_regions_with_meta = ch_target_regions.map { target_file -> - tuple(["id": file(target_file).baseName], target_file) - } /* * Creation of a dictionary for the reference genome */ - PICARD_CREATESEQUENCEDICTIONARY(ch_fasta) - ch_sequence_dictionary = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict - ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + val_reference_dict_inputs = val_fasta.map { fa -> record(id: fa.baseName, fasta: fa) } + val_reference_dict = PICARD_CREATESEQUENCEDICTIONARY( val_reference_dict_inputs ).map { r -> r.reference_dict } /* * Conversion of the covered targets BED file to an interval list */ - PICARD_BEDTOINTERVALLIST( - target_regions_with_meta, - ch_sequence_dictionary, - [] - ) - ch_intervals = PICARD_BEDTOINTERVALLIST.out.intervallist.map { it[1] } - ch_versions = ch_versions.mix(PICARD_BEDTOINTERVALLIST.out.versions) + val_intervallist_inputs = val_target_regions.map { tr -> record(id: tr.baseName, bed: tr) } + val_intervallist = PICARD_BEDTOINTERVALLIST( val_intervallist_inputs.combine(reference_dict: val_reference_dict) ).map { r -> r.intervallist } /* * Generation of the metrics * Note: Using the same intervals for both target and bait as they are typically * the same for targeted methylation sequencing experiments */ - ch_picard_inputs = ch_bam.join(ch_bai) - .combine(ch_intervals) - .combine(ch_intervals) - .combine(ch_fasta) - .combine(ch_fasta_index) - .combine(ch_sequence_dictionary) - .multiMap { meta, bam, bai, intervals1, intervals2, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_dict, dict -> - bam_etc: [ meta, bam, bai, intervals1, intervals2 ] // intervals: baits, targets - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - dict: [ meta_dict, dict ] - } - - PICARD_COLLECTHSMETRICS( - ch_picard_inputs.bam_etc, - ch_picard_inputs.fasta, - ch_picard_inputs.fasta_index, - ch_picard_inputs.dict + ch_picard_hsmetrics = PICARD_COLLECTHSMETRICS( + ch_inputs.combine( + bait_intervals: val_intervallist, + target_intervals: val_intervallist, + fasta: val_fasta, + fai: val_fasta_index, + reference_dict: val_reference_dict + ) ) - ch_picard_metrics = PICARD_COLLECTHSMETRICS.out.metrics - ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions) + ch_results = ch_results.join(ch_picard_hsmetrics, by: 'id') + } else { + val_reference_dict = null + val_intervallist = null } emit: - bedgraph_filtered = BEDTOOLS_INTERSECT.out.intersect // channel: [ val(meta), path("*.bedGraph") ] - picard_metrics = ch_picard_metrics // channel: [ val(meta), path("*_metrics") ] - versions = ch_versions // channel: path("*.version.txt") + results : Channel = ch_results + reference_dict : Value? = val_reference_dict + intervallist : Value? = val_intervallist +} + + +record AlignedSample { + id: String + bedgraph: Path + bam: Path + bai: Path +} + +record TargetedSequencingResult { + id: String + bedgraph_intersect: Path + picard_hsmetrics: Path? } diff --git a/subworkflows/local/utils_nfcore_methylseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_methylseq_pipeline/main.nf index 36f545f8..c7b5afcb 100644 --- a/subworkflows/local/utils_nfcore_methylseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_methylseq_pipeline/main.nf @@ -2,6 +2,8 @@ // Subworkflow with functionality specific to the nf-core/methylseq pipeline // +nextflow.preview.types = true + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS @@ -26,15 +28,15 @@ include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' workflow PIPELINE_INITIALISATION { take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved + input: String + version: Boolean // Display version and exit + validate_params: Boolean // Boolean whether to validate parameters against the schema at runtime + monochrome_logs: Boolean // Do not use coloured log outputs + nextflow_cli_args: List // List of positional nextflow CLI args + outdir: String // string: The output directory where the results will be saved main: - ch_versions = Channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -73,26 +75,28 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + .map { row -> row as Tuple } .map { meta, fastq_1, fastq_2, genome -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } + def single_end = !fastq_2 + def reads = single_end ? [ fastq_1 ] : [ fastq_1, fastq_2 ] + def sample = record( + id: meta.id, + meta: record(meta) + record(single_end: single_end), + reads: reads + ) + tuple(sample.id, sample) } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) + .groupBy() + .map { id, samples -> + validateInputSamplesheet(samples) } - .set { ch_samplesheet } - ch_samplesheet.dump(tag: "ch_samplesheet") + + ch_samplesheet.view(tag: "ch_samplesheet") emit: samplesheet = ch_samplesheet - versions = ch_versions } /* @@ -115,7 +119,6 @@ workflow PIPELINE_COMPLETION { main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - def multiqc_reports = multiqc_report.toList() // // Completion email and summary @@ -129,7 +132,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - multiqc_reports.getVal(), + multiqc_report.getVal(), ) } @@ -159,21 +162,23 @@ def validateInputParameters() { // // Validate channels from input samplesheet // -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] +def validateInputSamplesheet(samples: Bag) -> Record { + + def sample = samples.toList().first() // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + def endedness_ok = samples.collect{ r -> r.meta.single_end }.toUnique().size() == 1 if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${sample.id}") } - return [ metas[0], fastqs ] + def reads = samples.collectMany { r -> r.reads } + return sample + record(reads: reads) } // // Get attribute from genome config file e.g. fasta // -def getGenomeAttribute(attribute) { +def getGenomeAttribute(attribute: String, params: Record) -> String? { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { return params.genomes[ params.genome ][ attribute ] diff --git a/subworkflows/nf-core/fasta_index_bismark_bwameth/main.nf b/subworkflows/nf-core/fasta_index_bismark_bwameth/main.nf index 5ba0e0b1..9cb153f1 100644 --- a/subworkflows/nf-core/fasta_index_bismark_bwameth/main.nf +++ b/subworkflows/nf-core/fasta_index_bismark_bwameth/main.nf @@ -1,131 +1,87 @@ +nextflow.preview.types = true + include { UNTAR } from '../../../modules/nf-core/untar/main' include { GUNZIP } from '../../../modules/nf-core/gunzip/main' include { BISMARK_GENOMEPREPARATION } from '../../../modules/nf-core/bismark/genomepreparation/main' include { BWAMETH_INDEX } from '../../../modules/nf-core/bwameth/index/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { MethylseqParams } from '../../../workflows/methylseq/main' + +def isGzipped(file: Path) -> Boolean { + return file.name.endsWith('.gz') +} + workflow FASTA_INDEX_BISMARK_BWAMETH { take: - fasta // channel: [ val(meta), [ fasta ] ] - fasta_index // channel: [ val(meta), [ fasta index ] ] - bismark_index // channel: [ val(meta), [ bismark index ] ] - bwameth_index // channel: [ val(meta), [ bwameth index ] ] - aligner // string: bismark, bismark_hisat or bwameth - collecthsmetrics // boolean: whether to run picard collecthsmetrics - use_mem2 // boolean: generate mem2 index if no index provided, and bwameth is selected + fasta: Path? + fasta_index: Path? + bismark_index: Path? + bwameth_index: Path? + use_mem2: Boolean // generate mem2 index if no index provided, and bwameth is selected + params: MethylseqParams main: - ch_fasta = Channel.empty() - ch_fasta_index = Channel.empty() - ch_bismark_index = Channel.empty() - ch_bwameth_index = Channel.empty() - ch_versions = Channel.empty() + val_fasta = null + val_fasta_index = null + val_bismark_index = null + val_bwameth_index = null // Check if fasta file is gzipped and decompress if needed - fasta - .branch { - gzipped: it[1].toString().endsWith('.gz') - unzipped: true - } - .set { ch_fasta_branched } - - GUNZIP ( - ch_fasta_branched.gzipped - ) - - ch_fasta = ch_fasta_branched.unzipped.mix(GUNZIP.out.gunzip) - ch_versions = ch_versions.mix(GUNZIP.out.versions) + if( fasta ) { + val_fasta = isGzipped(fasta) + ? GUNZIP( fasta ) + : channel.value(fasta) + } // Aligner: bismark or bismark_hisat - if( aligner =~ /bismark/ ){ + if( params.aligner =~ /bismark/ ){ /* * Generate bismark index if not supplied */ if (bismark_index) { // Handle channel-based bismark index - bismark_index - .branch { - gzipped: it[1].toString().endsWith('.gz') - unzipped: true - } - .set { ch_bismark_index_branched } - - UNTAR ( - ch_bismark_index_branched.gzipped - ) - - ch_bismark_index = ch_bismark_index_branched.unzipped.mix(UNTAR.out.untar) - ch_versions = ch_versions.mix(UNTAR.out.versions) + val_bismark_index = isGzipped(bismark_index) + ? UNTAR( bismark_index ) + : channel.value(bismark_index) } else { - BISMARK_GENOMEPREPARATION ( - ch_fasta - ) - ch_bismark_index = BISMARK_GENOMEPREPARATION.out.index - ch_versions = ch_versions.mix(BISMARK_GENOMEPREPARATION.out.versions) + val_bismark_index = BISMARK_GENOMEPREPARATION( val_fasta ) } } // Aligner: bwameth - else if ( aligner == 'bwameth' ){ + else if ( params.aligner == 'bwameth' ){ /* * Generate bwameth index if not supplied */ if (bwameth_index) { // Handle channel-based bwameth index - bwameth_index - .branch { - gzipped: it[1].toString().endsWith('.gz') - unzipped: true - } - .set { ch_bwameth_index_branched } - - UNTAR ( - ch_bwameth_index_branched.gzipped - ) - - ch_bwameth_index = ch_bwameth_index_branched.unzipped.mix(UNTAR.out.untar) - ch_versions = ch_versions.mix(UNTAR.out.versions) + val_bwameth_index = isGzipped(bwameth_index) + ? UNTAR( bwameth_index ) + : channel.value(bwameth_index) } else { - if (use_mem2) { - BWAMETH_INDEX ( - ch_fasta, - true - ) - } else { - BWAMETH_INDEX ( - ch_fasta, - false - ) - } - ch_bwameth_index = BWAMETH_INDEX.out.index - ch_versions = ch_versions.mix(BWAMETH_INDEX.out.versions) + val_bwameth_index = BWAMETH_INDEX( val_fasta, use_mem2 ) } } /* * Generate fasta index if not supplied for bwameth workflow or picard collecthsmetrics tool */ - if (aligner == 'bwameth' || collecthsmetrics) { + if (params.aligner == 'bwameth' || params.collecthsmetrics) { // already exising fasta index if (fasta_index) { - ch_fasta_index = fasta_index + val_fasta_index = channel.value(fasta_index) } else { - SAMTOOLS_FAIDX( - ch_fasta, - [[:], []], - false - ) - ch_fasta_index = SAMTOOLS_FAIDX.out.fai - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + val_faidx_inputs = val_fasta.map { fa -> record(fasta: fa, get_sizes: false) } + val_fasta_index = SAMTOOLS_FAIDX( val_faidx_inputs ).map { r -> r.fai } } } emit: - fasta = ch_fasta // channel: [ val(meta), [ fasta ] ] - fasta_index = ch_fasta_index // channel: [ val(meta), [ fasta index ] ] - bismark_index = ch_bismark_index // channel: [ val(meta), [ bismark index ] ] - bwameth_index = ch_bwameth_index // channel: [ val(meta), [ bwameth index ] ] - versions = ch_versions // channel: [ versions.yml ] + fasta : Value? = val_fasta + fasta_index : Value? = val_fasta_index + bismark_index : Value? = val_bismark_index + bwameth_index : Value? = val_bwameth_index } diff --git a/subworkflows/nf-core/fastq_align_dedup_bismark/main.nf b/subworkflows/nf-core/fastq_align_dedup_bismark/main.nf index e5f2ffe5..cdb3d09d 100644 --- a/subworkflows/nf-core/fastq_align_dedup_bismark/main.nf +++ b/subworkflows/nf-core/fastq_align_dedup_bismark/main.nf @@ -1,3 +1,5 @@ +nextflow.preview.types = true + include { BISMARK_ALIGN } from '../../../modules/nf-core/bismark/align/main' include { BISMARK_DEDUPLICATE } from '../../../modules/nf-core/bismark/deduplicate/main' include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' @@ -7,148 +9,143 @@ include { BISMARK_COVERAGE2CYTOSINE } from '../../../modules/nf-core/bismark/ include { BISMARK_REPORT } from '../../../modules/nf-core/bismark/report/main' include { BISMARK_SUMMARY } from '../../../modules/nf-core/bismark/summary/main' +include { Sample } from '../../../utils/types.nf' + workflow FASTQ_ALIGN_DEDUP_BISMARK { take: - ch_reads // channel: [ val(meta), [ reads ] ] - ch_fasta // channel: [ val(meta), [ fasta ] ] - ch_bismark_index // channel: [ val(meta), [ bismark index ] ] - skip_deduplication // boolean: whether to deduplicate alignments - cytosine_report // boolean: whether the run coverage2cytosine + ch_reads: Channel + val_fasta: Value + val_bismark_index: Value + skip_deduplication: Boolean + cytosine_report: Boolean main: - ch_alignments = Channel.empty() - ch_alignment_reports = Channel.empty() - ch_methylation_bedgraph = Channel.empty() - ch_methylation_calls = Channel.empty() - ch_methylation_coverage = Channel.empty() - ch_methylation_report = Channel.empty() - ch_methylation_mbias = Channel.empty() - ch_coverage2cytosine_coverage = Channel.empty() - ch_coverage2cytosine_report = Channel.empty() - ch_coverage2cytosine_summary = Channel.empty() - ch_bismark_report = Channel.empty() - ch_bismark_summary = Channel.empty() - ch_multiqc_files = Channel.empty() - ch_versions = Channel.empty() /* * Align with bismark */ - BISMARK_ALIGN ( - ch_reads, - ch_fasta, - ch_bismark_index + ch_alignment = BISMARK_ALIGN( + ch_reads.combine(fasta: val_fasta, bismark_index: val_bismark_index) ) - ch_alignments = BISMARK_ALIGN.out.bam - ch_alignment_reports = BISMARK_ALIGN.out.report.map{ meta, report -> [ meta, report, [] ] } - ch_versions = ch_versions.mix(BISMARK_ALIGN.out.versions) if (!skip_deduplication) { /* * Run deduplicate_bismark */ - BISMARK_DEDUPLICATE ( - BISMARK_ALIGN.out.bam - ) - ch_alignments = BISMARK_DEDUPLICATE.out.bam - ch_alignment_reports = BISMARK_ALIGN.out.report.join(BISMARK_DEDUPLICATE.out.report) - ch_versions = ch_versions.mix(BISMARK_DEDUPLICATE.out.versions) + ch_alignment_dedup = ch_alignment + .join( BISMARK_DEDUPLICATE( ch_alignment ) , by: 'id') + } else { + ch_alignment_dedup = ch_alignment } /* * MODULE: Run samtools sort on aligned or deduplicated bam */ - SAMTOOLS_SORT ( - ch_alignments, - [[:],[]] // [ [meta], [fasta]] - ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + ch_bam = SAMTOOLS_SORT( ch_alignment_dedup ) /* * MODULE: Run samtools index on aligned or deduplicated bam */ - SAMTOOLS_INDEX ( - SAMTOOLS_SORT.out.bam + ch_bai = SAMTOOLS_INDEX( + ch_bam.map { r -> record(id: r.id, meta: r.meta, input: r.bam) } ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) /* * Run bismark_methylation_extractor */ - BISMARK_METHYLATIONEXTRACTOR ( - ch_alignments, - ch_bismark_index + ch_methylation = BISMARK_METHYLATIONEXTRACTOR( + ch_alignment_dedup.combine(bismark_index: val_bismark_index) ) - ch_methylation_bedgraph = BISMARK_METHYLATIONEXTRACTOR.out.bedgraph - ch_methylation_calls = BISMARK_METHYLATIONEXTRACTOR.out.methylation_calls - ch_methylation_coverage = BISMARK_METHYLATIONEXTRACTOR.out.coverage - ch_methylation_report = BISMARK_METHYLATIONEXTRACTOR.out.report - ch_methylation_mbias = BISMARK_METHYLATIONEXTRACTOR.out.mbias - ch_versions = ch_versions.mix(BISMARK_METHYLATIONEXTRACTOR.out.versions) /* * Run bismark coverage2cytosine */ if (cytosine_report) { - BISMARK_COVERAGE2CYTOSINE ( - ch_methylation_coverage, - ch_fasta, - ch_bismark_index + ch_coverage2cytosine = BISMARK_COVERAGE2CYTOSINE( + ch_methylation.combine( + fasta: val_fasta, + bismark_index: val_bismark_index + ) ) - ch_coverage2cytosine_coverage = BISMARK_COVERAGE2CYTOSINE.out.coverage - ch_coverage2cytosine_report = BISMARK_COVERAGE2CYTOSINE.out.report - ch_coverage2cytosine_summary = BISMARK_COVERAGE2CYTOSINE.out.summary - ch_versions = ch_versions.mix(BISMARK_COVERAGE2CYTOSINE.out.versions) + } else { + ch_coverage2cytosine = channel.empty() } /* * Generate bismark sample reports */ - BISMARK_REPORT ( - ch_alignment_reports - .join(ch_methylation_report) - .join(ch_methylation_mbias) + ch_bismark_report = BISMARK_REPORT( + ch_alignment_dedup.join(ch_methylation, by: 'id') ) - ch_bismark_report = BISMARK_REPORT.out.report - ch_versions = ch_versions.mix(BISMARK_REPORT.out.versions) + + /* + * Collect per-sample results + */ + ch_results = ch_alignment_dedup + .join(ch_bam, by: 'id') + .join(ch_bai, by: 'id') + .join(ch_coverage2cytosine, by: 'id', remainder: true) + .join(ch_methylation, by: 'id') + .join(ch_bismark_report, by: 'id') /* * Generate bismark summary report */ - BISMARK_SUMMARY ( - BISMARK_ALIGN.out.bam.collect{ meta, bam -> bam.name }, - ch_alignment_reports.collect{ meta, align_report, dedup_report -> align_report }, - ch_alignment_reports.collect{ meta, align_report, dedup_report -> dedup_report }.ifEmpty([]), - ch_methylation_report.collect{ meta, report -> report }, - ch_methylation_mbias.collect{ meta, mbias -> mbias } - ) - ch_bismark_summary = BISMARK_SUMMARY.out.summary - ch_versions = ch_versions.mix(BISMARK_SUMMARY.out.versions) + ch_bam_name = ch_alignment.map { r -> record(id: r.id, bam_name: r.bam.name) } + + ch_bismark_summary_inputs = ch_alignment_dedup + .join(ch_methylation, by: 'id') + .join(ch_bam_name, by: 'id') + .collect() + .map { rs -> + record( + bam: rs*.bam_name.toSet(), + align_report: rs*.align_report.toSet(), + dedup_report: rs*.dedup_report.findAll { v -> v != null }.toSet(), + methylation_report: rs*.methylation_report.toSet(), + methylation_mbias: rs*.methylation_mbias.toSet() + ) + } + + val_bismark_summary = BISMARK_SUMMARY( ch_bismark_summary_inputs ) /* * Collect MultiQC inputs */ - ch_multiqc_files = ch_bismark_summary - .mix(ch_alignment_reports.collect{ meta, align_report, dedup_report -> align_report }) - .mix(ch_alignment_reports.collect{ meta, align_report, dedup_report -> dedup_report }) - .mix(ch_methylation_report.collect{ meta, report -> report }) - .mix(ch_methylation_mbias.collect{ meta, mbias -> mbias }) - .mix(ch_bismark_report.collect{ meta, report -> report }) + ch_multiqc_files = channel.empty() + .mix( val_bismark_summary.flatMap { r -> [r.html, r.txt] } ) + .mix( ch_alignment_dedup.flatMap { r -> [r.align_report, r.dedup_report] }.filter { v -> v != null } ) + .mix( ch_methylation.flatMap { r -> [r.methylation_report, r.methylation_mbias] } ) + .mix( ch_bismark_report.map { r -> r.bismark_report } ) emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - coverage2cytosine_coverage = ch_coverage2cytosine_coverage // channel: [ val(meta), [ coverage ] ] - coverage2cytosine_report = ch_coverage2cytosine_report // channel: [ val(meta), [ report ] ] - coverage2cytosine_summary = ch_coverage2cytosine_summary // channel: [ val(meta), [ summary ] ] - methylation_bedgraph = ch_methylation_bedgraph // channel: [ val(meta), [ bedgraph ] ] - methylation_calls = ch_methylation_calls // channel: [ val(meta), [ methylation_calls ] ] - methylation_coverage = ch_methylation_coverage // channel: [ val(meta), [ coverage ] ] - methylation_report = ch_methylation_report // channel: [ val(meta), [ report ] ] - methylation_mbias = ch_methylation_mbias // channel: [ val(meta), [ mbias ] ] - bismark_report = ch_bismark_report // channel: [ val(meta), [ report ] ] - bismark_summary = ch_bismark_summary // channel: [ val(meta), [ summary ] ] - multiqc = ch_multiqc_files // path: *{html,txt} - versions = ch_versions // path: *.version.txt + results : Channel = ch_results + bismark_summary : Value = val_bismark_summary + multiqc : Channel = ch_multiqc_files +} + + +record BismarkResult { + id: String + single_end: Boolean + bam: Path + bai: Path + align_report: Path + unmapped: Path? + dedup_report: Path + coverage2cytosine_coverage: Path + coverage2cytosine_report: Path + coverage2cytosine_summary: Path + methylation_bedgraph: Path + methylation_calls: Path + methylation_coverage: Path + methylation_report: Path + methylation_mbias: Path + bismark_report: Path +} + +record BismarkSummary { + html: Path + txt: Path } diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf index 90488250..7b4ec485 100644 --- a/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf @@ -1,3 +1,6 @@ + +nextflow.preview.types = true + include { BWAMETH_ALIGN } from '../../../modules/nf-core/bwameth/align/main' include { PARABRICKS_FQ2BAMMETH } from '../../../modules/nf-core/parabricks/fq2bammeth/main' include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' @@ -9,29 +12,20 @@ include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DEDUPLICATED } from '../../../modules include { METHYLDACKEL_EXTRACT } from '../../../modules/nf-core/methyldackel/extract/main' include { METHYLDACKEL_MBIAS } from '../../../modules/nf-core/methyldackel/mbias/main' +include { Sample } from '../../../utils/types.nf' + workflow FASTQ_ALIGN_DEDUP_BWAMETH { take: - ch_reads // channel: [ val(meta), [ reads ] ] - ch_fasta // channel: [ val(meta), [ fasta ] ] - ch_fasta_index // channel: [ val(meta), [ fasta index ] ] - ch_bwameth_index // channel: [ val(meta), [ bwameth index ] ] - skip_deduplication // boolean: whether to deduplicate alignments - use_gpu // boolean: whether to use GPU or CPU for bwameth alignment + ch_reads: Channel + val_fasta: Value + val_fasta_index: Value + val_bwameth_index: Value + skip_deduplication: Boolean + use_gpu: Boolean main: - ch_alignment = Channel.empty() - ch_alignment_index = Channel.empty() - ch_samtools_flagstat = Channel.empty() - ch_samtools_stats = Channel.empty() - ch_methydackel_extract_bedgraph = Channel.empty() - ch_methydackel_extract_methylkit = Channel.empty() - ch_methydackel_mbias = Channel.empty() - ch_picard_metrics = Channel.empty() - ch_multiqc_files = Channel.empty() - ch_versions = Channel.empty() - /* * Align with bwameth */ @@ -39,126 +33,105 @@ workflow FASTQ_ALIGN_DEDUP_BWAMETH { /* * Align with parabricks GPU enabled fq2bammeth implementation of bwameth */ - PARABRICKS_FQ2BAMMETH ( - ch_reads, - ch_fasta, - ch_bwameth_index, - [] // known sites + ch_alignment = PARABRICKS_FQ2BAMMETH ( + ch_reads.combine(fasta: val_fasta, bwameth_index: val_bwameth_index) ) - ch_alignment = PARABRICKS_FQ2BAMMETH.out.bam - ch_versions = ch_versions.mix(PARABRICKS_FQ2BAMMETH.out.versions) } else { /* * Align with CPU version of bwameth */ - BWAMETH_ALIGN ( - ch_reads, - ch_fasta, - ch_bwameth_index + ch_alignment = BWAMETH_ALIGN ( + ch_reads.combine(fasta: val_fasta, bwameth_index: val_bwameth_index) ) - ch_alignment = BWAMETH_ALIGN.out.bam - ch_versions = BWAMETH_ALIGN.out.versions } /* * Sort raw output BAM */ - SAMTOOLS_SORT ( - ch_alignment, - [[:],[]] // [ [meta], [fasta]] - ) - ch_alignment = SAMTOOLS_SORT.out.bam - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + ch_alignment = SAMTOOLS_SORT( ch_alignment ) /* * Run samtools index on alignment */ - SAMTOOLS_INDEX_ALIGNMENTS ( - ch_alignment + ch_alignment_index = SAMTOOLS_INDEX_ALIGNMENTS( + ch_alignment.map { r -> record(id: r.id, input: r.bam) } ) - ch_alignment_index = SAMTOOLS_INDEX_ALIGNMENTS.out.bai - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_ALIGNMENTS.out.versions) + ch_alignment = ch_alignment.join(ch_alignment_index, by: 'id') /* * Run samtools flagstat */ - SAMTOOLS_FLAGSTAT ( - ch_alignment.join(ch_alignment_index) - ) - ch_samtools_flagstat = SAMTOOLS_FLAGSTAT.out.flagstat - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + ch_samtools_flagstat = SAMTOOLS_FLAGSTAT( ch_alignment ) /* * Run samtools stats */ - SAMTOOLS_STATS ( - ch_alignment.join(ch_alignment_index), - [[:],[]] // [ [meta], [fasta]] + ch_samtools_stats = SAMTOOLS_STATS( + ch_alignment.map { r -> record(id: r.id, input: r.bam, input_index: r.bai) } ) - ch_samtools_stats = SAMTOOLS_STATS.out.stats - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) if (!skip_deduplication) { /* * Run Picard MarkDuplicates */ - PICARD_MARKDUPLICATES ( - ch_alignment, - ch_fasta, - ch_fasta_index + ch_picard = PICARD_MARKDUPLICATES( + ch_alignment.combine(fasta: val_fasta, fasta_index: val_fasta_index) ) /* * Run samtools index on deduplicated alignment */ - SAMTOOLS_INDEX_DEDUPLICATED ( - PICARD_MARKDUPLICATES.out.bam + ch_alignment_index_dedup = SAMTOOLS_INDEX_DEDUPLICATED( + ch_picard.map { r -> record(id: r.id, input: r.bam) } ) - ch_alignment = PICARD_MARKDUPLICATES.out.bam - ch_alignment_index = SAMTOOLS_INDEX_DEDUPLICATED.out.bai - ch_picard_metrics = PICARD_MARKDUPLICATES.out.metrics - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DEDUPLICATED.out.versions) + ch_alignment = ch_alignment + .join(ch_picard, by: 'id') + .join(ch_alignment_index_dedup, by: 'id') } /* * Extract per-base methylation and plot methylation bias */ - METHYLDACKEL_EXTRACT ( - ch_alignment.join(ch_alignment_index), - ch_fasta.map{ meta, fasta_file -> fasta_file }, - ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ch_methydackel_extract = METHYLDACKEL_EXTRACT ( + ch_alignment.combine(fasta: val_fasta, fasta_index: val_fasta_index) ) - ch_methydackel_extract_bedgraph = METHYLDACKEL_EXTRACT.out.bedgraph - ch_methydackel_extract_methylkit = METHYLDACKEL_EXTRACT.out.methylkit - ch_versions = ch_versions.mix(METHYLDACKEL_EXTRACT.out.versions) - - METHYLDACKEL_MBIAS ( - ch_alignment.join(ch_alignment_index), - ch_fasta.map{ meta, fasta_file -> fasta_file }, - ch_fasta_index.map{ meta, fasta_index -> fasta_index } + + ch_methydackel_mbias = METHYLDACKEL_MBIAS ( + ch_alignment.combine(fasta: val_fasta, fasta_index: val_fasta_index) ) - ch_methydackel_mbias = METHYLDACKEL_MBIAS.out.txt - ch_versions = ch_versions.mix(METHYLDACKEL_MBIAS.out.versions) + + ch_results = ch_alignment + .join(ch_samtools_flagstat, by: 'id') + .join(ch_samtools_stats, by: 'id') + .join(ch_methydackel_extract, by: 'id') + .join(ch_methydackel_mbias, by: 'id') + .join(ch_picard, by: 'id', remainder: true) /* * Collect MultiQC inputs */ - ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics } - .mix(ch_samtools_flagstat.collect{ meta, flagstat -> flagstat }) - .mix(ch_samtools_stats.collect{ meta, stats -> stats }) - .mix(ch_methydackel_extract_bedgraph.collect{ meta, bedgraph -> bedgraph }) - .mix(ch_methydackel_mbias.collect{ meta, txt -> txt }) + ch_multiqc_files = channel.empty() + .mix( ch_picard.map { r -> r.picard_metrics } ) + .mix( ch_samtools_flagstat.map { r -> r.samtools_flagstat } ) + .mix( ch_samtools_stats.map { r -> r.samtools_stats } ) + .mix( ch_methydackel_extract.map { r -> r.methydackel_bedgraph } ) + .mix( ch_methydackel_mbias.map { r -> r.methyldackel_mbias } ) emit: - bam = ch_alignment // channel: [ val(meta), [ bam ] ] - bai = ch_alignment_index // channel: [ val(meta), [ bai ] ] - samtools_flagstat = ch_samtools_flagstat // channel: [ val(meta), [ flagstat ] ] - samtools_stats = ch_samtools_stats // channel: [ val(meta), [ stats ] ] - methydackel_extract_bedgraph = ch_methydackel_extract_bedgraph // channel: [ val(meta), [ bedgraph ] ] - methydackel_extract_methylkit = ch_methydackel_extract_methylkit // channel: [ val(meta), [ methylkit ] ] - methydackel_mbias = ch_methydackel_mbias // channel: [ val(meta), [ mbias ] ] - picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ] - multiqc = ch_multiqc_files // channel: [ *{html,txt} ] - versions = ch_versions // channel: [ versions.yml ] + results: Channel = ch_results + multiqc: Channel = ch_multiqc_files +} + + +record BwamethResult { + id: String + single_end: Boolean + bam: Path + bai: Path + samtools_flagstat: Path + samtools_stats: Path + methyldackel_extract_bedgraph: Path + methyldackel_extract_methylkit: Path + methyldackel_mbias: Path + picard_metrics: Path } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index d6e593e8..0237cb22 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -74,7 +74,10 @@ def dumpParametersToJSON(outdir) { def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') def filename = "params_${timestamp}.json" def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = groovy.json.JsonOutput.toJson(params) + def jsonOutput = new groovy.json.JsonGenerator.Options() + .addConverter(Path) { value -> value.toUriString() } + .build() + def jsonStr = jsonOutput.toJson(params) temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd25876..3331bafa 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,6 +2,8 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // +nextflow.preview.types = true + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION @@ -77,7 +79,7 @@ def getWorkflowVersion() { // // Get software versions for pipeline // -def processVersionsFromYAML(yaml_file) { +def processVersionsFromYAML(yaml_file: Path) -> String { def yaml = new org.yaml.snakeyaml.Yaml() def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } return yaml.dumpAsMap(versions).trim() @@ -97,8 +99,16 @@ def workflowVersionToYAML() { // // Get channel of software versions used in pipeline in YAML format // -def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +workflow softwareVersionsToYAML { + take: + ch_versions: Channel + + emit: + ch_versions + .unique() + .map { version -> processVersionsFromYAML(version as Path) } + .unique() + .mix(channel.of(workflowVersionToYAML())) } // diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303e..223e584a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -28,18 +28,6 @@ workflow UTILS_NFSCHEMA_PLUGIN { log.info paramsSummaryLog(input_workflow) } - // - // Validate the parameters using nextflow_schema.json or the schema - // given via the validation.parametersSchema configuration option - // - if(validate_params) { - if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() - } - } - emit: dummy_emit = true } diff --git a/utils/types.nf b/utils/types.nf new file mode 100644 index 00000000..cef55eb8 --- /dev/null +++ b/utils/types.nf @@ -0,0 +1,6 @@ + +record Sample { + id: String + meta: Record + reads: List +} diff --git a/workflows/methylseq/main.nf b/workflows/methylseq/main.nf index 67e7faff..8c73f8bf 100644 --- a/workflows/methylseq/main.nf +++ b/workflows/methylseq/main.nf @@ -4,7 +4,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +nextflow.preview.types = true + include { paramsSummaryMap } from 'plugin/nf-schema' +include { WRITE_FILE } from '../../modules/local/writefile/main' include { FASTQC } from '../../modules/nf-core/fastqc/main' include { TRIMGALORE } from '../../modules/nf-core/trimgalore/main' include { QUALIMAP_BAMQC } from '../../modules/nf-core/qualimap/bamqc/main' @@ -19,6 +22,8 @@ include { TARGETED_SEQUENCING } from '../../subworkflows/local/targeted_s include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_methylseq_pipeline' include { validateInputSamplesheet } from '../../subworkflows/local/utils_nfcore_methylseq_pipeline' +include { Sample } from '../../utils/types.nf' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -28,133 +33,82 @@ include { validateInputSamplesheet } from '../../subworkflows/local/utils_nfco workflow METHYLSEQ { take: - samplesheet // channel: [ path(samplesheet.csv) ] - ch_versions // channel: [ path(versions.yml) ] - ch_fasta // channel: [ path(fasta) ] - ch_fasta_index // channel: [ path(fasta index) ] - ch_bismark_index // channel: [ path(bismark index) ] - ch_bwameth_index // channel: [ path(bwameth index) ] + ch_samples: Channel + val_fasta: Value? + val_fasta_index: Value? + val_bismark_index: Value? + val_bwameth_index: Value? + params: MethylseqParams main: - ch_fastq = Channel.empty() - ch_fastqc_html = Channel.empty() - ch_fastqc_zip = Channel.empty() - ch_reads = Channel.empty() - ch_bam = Channel.empty() - ch_bai = Channel.empty() - ch_bedgraph = Channel.empty() - ch_aligner_mqc = Channel.empty() - ch_qualimap = Channel.empty() - ch_preseq = Channel.empty() - ch_multiqc_files = Channel.empty() - - // - // Branch channels from input samplesheet channel - // - ch_samplesheet = samplesheet - .branch { meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } + ch_multiqc_files = channel.empty() // // MODULE: Concatenate FastQ files from same sample if required // - CAT_FASTQ ( - ch_samplesheet.multiple + ch_samples_merged = CAT_FASTQ ( + ch_samples.filter { r -> !r.meta.single_end } ) - ch_fastq = CAT_FASTQ.out.reads.mix(ch_samplesheet.single) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + ch_samples_single = ch_samples.filter { r -> r.meta.single_end } + + ch_fastq = ch_samples_merged.mix(ch_samples_single) // // MODULE: Run FastQC // if (!params.skip_fastqc) { - FASTQC ( - ch_fastq - ) - ch_fastqc_html = FASTQC.out.html - ch_fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions) + ch_fastqc = FASTQC( ch_fastq ) } else { - ch_fastqc_html = Channel.empty() - ch_fastqc_zip = Channel.empty() + ch_fastqc = channel.empty() } // // MODULE: Run TrimGalore! // if (!params.skip_trimming) { - TRIMGALORE( - ch_fastq - ) - ch_reads = TRIMGALORE.out.reads - ch_versions = ch_versions.mix(TRIMGALORE.out.versions) + ch_trimmed_fastq = TRIMGALORE( ch_fastq ) + ch_reads = ch_trimmed_fastq.map { r -> r + record(reads: r.trim_reads) } } else { - ch_reads = ch_fastq + ch_trimmed_fastq = channel.empty() + ch_reads = ch_fastq } // // SUBWORKFLOW: Align reads, deduplicate and extract methylation with Bismark // + val_bismark_summary = null + // Aligner: bismark or bismark_hisat - if ( params.aligner =~ /bismark/ ) { + if ( params.aligner =~ /bismark/ && val_fasta && val_bismark_index ) { // // Run Bismark alignment + downstream processing // - ch_bismark_inputs = ch_reads - .combine(ch_fasta) - .combine(ch_bismark_index) - .multiMap { meta, reads, meta_fasta, fasta, meta_bismark, bismark_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - bismark_index: [ meta_bismark, bismark_index ] - } - - FASTQ_ALIGN_DEDUP_BISMARK ( - ch_bismark_inputs.reads, - ch_bismark_inputs.fasta, - ch_bismark_inputs.bismark_index, + bismark = FASTQ_ALIGN_DEDUP_BISMARK ( + ch_reads, + val_fasta, + val_bismark_index, params.skip_deduplication || params.rrbs, params.cytosine_report || params.nomeseq ) - ch_bam = FASTQ_ALIGN_DEDUP_BISMARK.out.bam - ch_bai = FASTQ_ALIGN_DEDUP_BISMARK.out.bai - ch_bedgraph = FASTQ_ALIGN_DEDUP_BISMARK.out.methylation_bedgraph - ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BISMARK.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BISMARK.out.versions) + ch_alignment = bismark.results + val_bismark_summary = bismark.bismark_summary + ch_aligner_mqc = bismark.multiqc } // Aligner: bwameth - else if ( params.aligner == 'bwameth' ){ - - ch_bwameth_inputs = ch_reads - .combine(ch_fasta) - .combine(ch_fasta_index) - .combine(ch_bwameth_index) - .multiMap { meta, reads, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_bwameth, bwameth_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - bwameth_index: [ meta_bwameth, bwameth_index ] - } + else if ( params.aligner == 'bwameth' && val_fasta && val_fasta_index && val_bwameth_index ) { - FASTQ_ALIGN_DEDUP_BWAMETH ( - ch_bwameth_inputs.reads, - ch_bwameth_inputs.fasta, - ch_bwameth_inputs.fasta_index, - ch_bwameth_inputs.bwameth_index, + bwameth = FASTQ_ALIGN_DEDUP_BWAMETH ( + ch_reads, + val_fasta, + val_fasta_index, + val_bwameth_index, params.skip_deduplication || params.rrbs, workflow.profile.tokenize(',').intersect(['gpu']).size() >= 1 ) - ch_bam = FASTQ_ALIGN_DEDUP_BWAMETH.out.bam - ch_bai = FASTQ_ALIGN_DEDUP_BWAMETH.out.bai - ch_bedgraph = FASTQ_ALIGN_DEDUP_BWAMETH.out.methydackel_extract_bedgraph - ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BWAMETH.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMETH.out.versions) + ch_alignment = bwameth.results + ch_aligner_mqc = bwameth.multiqc } else { error "ERROR: Invalid aligner '${params.aligner}'. Valid options are: 'bismark', 'bismark_hisat', or 'bwameth'" @@ -165,12 +119,12 @@ workflow METHYLSEQ { // skipped by default. to use run with `--run_qualimap` param. // if(params.run_qualimap) { - QUALIMAP_BAMQC ( - ch_bam, - params.bamqc_regions_file ? Channel.fromPath( params.bamqc_regions_file, checkIfExists: true ).toList() : [] + bamqc_regions_file = params.bamqc_regions_file ? file( params.bamqc_regions_file, checkIfExists: true ) : null + ch_qualimap = QUALIMAP_BAMQC ( + ch_alignment.combine(gff: bamqc_regions_file) ) - ch_qualimap = QUALIMAP_BAMQC.out.results - ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions) + } else { + ch_qualimap = channel.empty() } // @@ -181,16 +135,20 @@ workflow METHYLSEQ { if (!params.target_regions_file) { error "ERROR: --target_regions_file must be specified when using --run_targeted_sequencing" } - TARGETED_SEQUENCING ( - ch_bedgraph, - Channel.fromPath(params.target_regions_file, checkIfExists: true), - ch_fasta, - ch_fasta_index, - ch_bam, - ch_bai, + targeted_sequencing = TARGETED_SEQUENCING ( + ch_alignment, + channel.value(file(params.target_regions_file, checkIfExists: true)), + val_fasta, + val_fasta_index, params.collecthsmetrics ) - ch_versions = ch_versions.mix(TARGETED_SEQUENCING.out.versions) + ch_targeted_sequencing = targeted_sequencing.results + val_reference_dict = targeted_sequencing.reference_dict + val_intervallist = targeted_sequencing.intervallist + } else { + ch_targeted_sequencing = channel.empty() + val_reference_dict = null + val_intervallist = null } // @@ -198,92 +156,165 @@ workflow METHYLSEQ { // skipped by default. to use run with `--run_preseq` param. // if(params.run_preseq) { - PRESEQ_LCEXTRAP ( - ch_bam - ) - ch_preseq = PRESEQ_LCEXTRAP.out.lc_extrap - ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions) + ch_preseq = PRESEQ_LCEXTRAP ( ch_alignment ) + } else { + ch_preseq = channel.empty() } - // - // Collate and save software versions - // - ch_collated_versions = softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'methylseq_software_' + 'mqc_' + 'versions.yml', - sort: true, - newLine: true - ) + ch_results = ch_alignment + .join(ch_fastqc, by: 'id', remainder: true) + .join(ch_trimmed_fastq, by: 'id', remainder: true) + .join(ch_qualimap, by: 'id', remainder: true) + .join(ch_targeted_sequencing, by: 'id', remainder: true) + .join(ch_preseq, by: 'id', remainder: true) // // MODULE: MultiQC // if (!params.skip_multiqc) { - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config, checkIfExists: true) : null + multiqc_logo = params.multiqc_logo ? file(params.multiqc_logo, checkIfExists: true) : null + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + workflow_summary = record( + name: 'workflow_summary_mqc.yaml', + items: [paramsSummaryMultiqc(summary_params)] + ) + + multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true - ) + methods_description = record( + name: 'methods_description_mqc.yaml', + items: [methodsDescriptionText(multiqc_custom_methods_description)] ) + val_versions = softwareVersionsToYAML( channel.topic('versions') ) + .collect() + .map { items -> + record(name: 'nf_core_methylseq_software_mqc_versions.yml', items: items.toSorted(), newLine: true) + } + + ch_collected_files = WRITE_FILE( + channel.of(workflow_summary, methods_description).mix(val_versions) + ) + ch_multiqc_files = ch_multiqc_files.mix(ch_collected_files) + if(params.run_qualimap) { - ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_qualimap.map { r -> r.qualimap_bamqc }) } if (params.run_preseq) { - ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_preseq.map { r -> r.lc_log }) } - ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc) if (!params.skip_trimming) { - ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.log.collect{ it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(ch_reads.map { r -> r.trim_log }) } - if (params.run_targeted_sequencing) { - if (params.collecthsmetrics) { - ch_multiqc_files = ch_multiqc_files.mix(TARGETED_SEQUENCING.out.picard_metrics.collect{ it[1] }.ifEmpty([])) - } + if (params.run_targeted_sequencing && params.collecthsmetrics) { + ch_multiqc_files = ch_multiqc_files.mix(ch_targeted_sequencing.map { r -> r.picard_hsmetrics }) } if (!params.skip_fastqc) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc.map { r -> r.fastqc_zip }) } - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) - ch_multiqc_report = MULTIQC.out.report.toList() + val_multiqc_inputs = ch_multiqc_files + .flatMap() + .collect() + .map { multiqc_files -> + record( + multiqc_files: multiqc_files.toSet(), + multiqc_config: multiqc_config, + extra_multiqc_config: multiqc_custom_config, + multiqc_logo: multiqc_logo + ) + } + val_multiqc_report = MULTIQC ( val_multiqc_inputs ).map { r -> r.report } } else { - ch_multiqc_report = Channel.empty() + val_multiqc_report = null } emit: - bam = ch_bam // channel: [ val(meta), path(bam) ] - bai = ch_bai // channel: [ val(meta), path(bai) ] - qualimap = ch_qualimap // channel: [ val(meta), path(qualimap) ] - preseq = ch_preseq // channel: [ val(meta), path(preseq) ] - multiqc_report = ch_multiqc_report // channel: [ path(multiqc_report.html ) ] - versions = ch_versions // channel: [ path(versions.yml) ] + results : Channel = ch_results + bismark_summary : Value? = val_bismark_summary + reference_dict : Value? = val_reference_dict + intervallist : Value? = val_intervallist + multiqc_report : Value? = val_multiqc_report +} + +record MethylseqParams { + skip_fastqc: Boolean + skip_trimming: Boolean + aligner: String + skip_deduplication: Boolean + rrbs: Boolean + cytosine_report: Boolean + nomeseq: Boolean + run_qualimap: Boolean + bamqc_regions_file: String + run_targeted_sequencing: Boolean + target_regions_file: String + collecthsmetrics: Boolean + run_preseq: Boolean + outdir: String + skip_multiqc: Boolean + multiqc_config: String + multiqc_logo: String + multiqc_methods_description: String +} + +record MethylseqResult { + id: String + single_end: Boolean + + // fastqc + fastqc_html: Set + fastqc_zip: Set + + // trimgalore + trim_reads: List + trim_log: List + trim_unpaired: List + trim_html: List + trim_zip: List + + // alignment (bismark / bwameth) + bam: Path + bai: Path + + // bismark + align_report: Path + unmapped: Path? + dedup_report: Path + coverage2cytosine_coverage: Path + coverage2cytosine_report: Path + coverage2cytosine_summary: Path + methylation_bedgraph: Path + methylation_calls: Path + methylation_coverage: Path + methylation_report: Path + methylation_mbias: Path + bismark_report: Path + + // bwameth + samtools_flagstat: Path + samtools_stats: Path + methyldackel_extract_bedgraph: Path + methyldackel_extract_methylkit: Path + methyldackel_mbias: Path + picard_metrics: Path + + // qualimap + qualimap_bamqc: Path? + + // targeted sequencing + bedgraph_intersect: Path + picard_hsmetrics: Path + + // preseq + lc_extrap: Path? + lc_log: Path? + } /* diff --git a/workflows/methylseq/nextflow.config b/workflows/methylseq/nextflow.config index b286800e..67d77b35 100644 --- a/workflows/methylseq/nextflow.config +++ b/workflows/methylseq/nextflow.config @@ -10,13 +10,3 @@ includeConfig "../../conf/subworkflows/fasta_index_bismark_bwameth.config" includeConfig "../../conf/subworkflows/fastq_align_dedup_bismark.config" includeConfig "../../conf/subworkflows/fastq_align_dedup_bwameth.config" includeConfig "../../conf/subworkflows/targeted_sequencing.config" - -process { - - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - -}