diff --git a/conf/modules/bedtools_intersect.config b/conf/modules/bedtools_intersect.config index 629d072b..46f881fb 100644 --- a/conf/modules/bedtools_intersect.config +++ b/conf/modules/bedtools_intersect.config @@ -1,8 +1,6 @@ process { - withName: BEDTOOLS_INTERSECT { - ext.args = '' - ext.prefix = { "${intervals1.baseName}" } - ext.suffix = 'targeted.bedGraph' + withName: FILTER_BEDGRAPH_TARGETS { + ext.prefix = { "${bedgraph.baseName}" } publishDir = [ [ path: { params.aligner == 'bismark' ? "${params.outdir}/bismark/methylation_calls/bedGraph" : "${params.outdir}/methyldackel" }, diff --git a/conf/modules/bedtools_intersect_cov.config b/conf/modules/bedtools_intersect_cov.config new file mode 100644 index 00000000..561925ca --- /dev/null +++ b/conf/modules/bedtools_intersect_cov.config @@ -0,0 +1,14 @@ +process { + withName: BEDTOOLS_INTERSECT_COV { + ext.args = '' + ext.prefix = { "${intervals1.baseName}" } + ext.suffix = 'targeted.cov' + publishDir = [ + [ + path: { "${params.outdir}/bismark/methylation_calls/methylation_coverage" }, + mode: params.publish_dir_mode, + pattern: "*targeted.cov" + ] + ] + } +} diff --git a/conf/subworkflows/targeted_sequencing.config b/conf/subworkflows/targeted_sequencing.config index 666eb0d7..3bb996ad 100644 --- a/conf/subworkflows/targeted_sequencing.config +++ b/conf/subworkflows/targeted_sequencing.config @@ -1,4 +1,5 @@ includeConfig "../modules/bedtools_intersect.config" +includeConfig "../modules/bedtools_intersect_cov.config" includeConfig "../modules/picard_bedtointervallist.config" includeConfig "../modules/picard_createsequencedictionary.config" includeConfig "../modules/picard_collecthsmetrics.config" diff --git a/modules/local/filter_bedgraph_targets.nf b/modules/local/filter_bedgraph_targets.nf new file mode 100644 index 00000000..b9a37735 --- /dev/null +++ b/modules/local/filter_bedgraph_targets.nf @@ -0,0 +1,57 @@ +/* + * Filter bedGraph files against target regions with CpG-aware boundary handling. + * + * Bismark bedGraph files use 0-based half-open coordinates for single-C positions + * (e.g., chr1 10788 10789 for a CpG at 1-based position 10789). When a CpG + * straddles a target boundary — C just outside, G just inside — a naive + * intersection drops it because the single-C interval [10788, 10789) does not + * overlap the target [10789, …). + * + * This process extends each bedGraph entry by 1 bp on the right to represent the + * full CpG dinucleotide, intersects with the target BED, then restores the + * original single-base coordinates. + */ +process FILTER_BEDGRAPH_TARGETS { + tag "${meta.id}" + label 'process_single' + + conda "bioconda::bedtools=2.31.1" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" + + input: + tuple val(meta), path(bedgraph), path(targets) + + output: + tuple val(meta), path("*.targeted.bedGraph"), emit: intersect + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Decompress and extend end coordinate by 1 bp so the interval covers the full CpG dinucleotide + # Skip any track/header lines to avoid corrupting them + zcat ${bedgraph} | awk 'BEGIN{OFS="\t"} /^track/{print; next} {if(NF>=3) \$3=\$3+1; print}' > extended.bedGraph + + # Intersect with target regions (-wa: write original -a entry; -u: unique hits only) + bedtools intersect \\ + -a extended.bedGraph \\ + -b ${targets} \\ + -wa \\ + -u \\ + > intersected.bedGraph + + # Restore original single-base end coordinates + awk 'BEGIN{OFS="\t"} /^track/{print; next} {if(NF>=3) \$3=\$3-1; print}' intersected.bedGraph \\ + > ${prefix}.targeted.bedGraph + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/subworkflows/local/targeted_sequencing/main.nf b/subworkflows/local/targeted_sequencing/main.nf index f9d20aa9..d0dd9bc9 100644 --- a/subworkflows/local/targeted_sequencing/main.nf +++ b/subworkflows/local/targeted_sequencing/main.nf @@ -1,27 +1,28 @@ /* * targeted_sequencing subworkflow * - * Filters bedGraph files with the target_regions BED file so they contain positions only listed in the BED. + * Filters bedGraph and coverage files with the target_regions BED file so they contain positions only listed in the BED. * If specified, it also generates some performance metrics with Picard CollectHsMetrics, such as Fold-80 Base Penalty, * HS Library Size, Percent Duplicates, and Percent Off Bait. This is relevant for methylome experiments with targeted seq. */ -include { BEDTOOLS_INTERSECT } from '../../../modules/nf-core/bedtools/intersect/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' -include { PICARD_BEDTOINTERVALLIST } from '../../../modules/nf-core/picard/bedtointervallist/main' -include { PICARD_COLLECTHSMETRICS } from '../../../modules/nf-core/picard/collecthsmetrics/main' +include { FILTER_BEDGRAPH_TARGETS } from '../../../modules/local/filter_bedgraph_targets' +include { BEDTOOLS_INTERSECT as BEDTOOLS_INTERSECT_COV } from '../../../modules/nf-core/bedtools/intersect/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' +include { PICARD_BEDTOINTERVALLIST } from '../../../modules/nf-core/picard/bedtointervallist/main' +include { PICARD_COLLECTHSMETRICS } from '../../../modules/nf-core/picard/collecthsmetrics/main' workflow TARGETED_SEQUENCING { - take: - ch_bedgraph // channel: [ val(meta), [ bedGraph(s) ]] when bwameth, [ val(meta), bedGraph ] when bismark - ch_target_regions // channel: path(target_regions.bed) - ch_fasta // channel: [ [:], /path/to/genome.fa] - ch_fasta_index // channel: [ val(meta), /path/to/genome.fa.fai] - ch_bam // channel: [ val(meta), [ bam ] ] ## BAM from alignment - ch_bai // channel: [ val(meta), [ bai ] ] ## BAI from alignment - ch_gzi // channel: [ val(meta), [ gzi ] ] ## GZI from fasta - collecthsmetrics // boolean: whether to run Picard CollectHsMetrics + ch_bedgraph // channel: [ val(meta), [ bedGraph(s) ]] when bwameth, [ val(meta), bedGraph ] when bismark + ch_coverage // channel: [ val(meta), cov.gz ] from Bismark methylation extractor (empty for bwameth) + ch_target_regions // channel: path(target_regions.bed) + ch_fasta // channel: [ [:], /path/to/genome.fa] + ch_fasta_index // channel: [ val(meta), /path/to/genome.fa.fai] + ch_bam // channel: [ val(meta), [ bam ] ] ## BAM from alignment + ch_bai // channel: [ val(meta), [ bai ] ] ## BAI from alignment + ch_gzi // channel: [ val(meta), [ gzi ] ] ## GZI from fasta + collecthsmetrics // boolean: whether to run Picard CollectHsMetrics main: @@ -29,19 +30,31 @@ workflow TARGETED_SEQUENCING { ch_picard_metrics = Channel.empty() /* - * Intersect bedGraph files with target regions - * Ensure ch_bedgraph contains the bedGraph file(s) in an array and split into individual bedGraphs + * Intersect bedGraph files with target regions (CpG-aware boundary handling) + * Ensure ch_bedgraph contains the bedGraph file(s) in an array and split into individual bedGraphs. + * The FILTER_BEDGRAPH_TARGETS process extends single-C intervals by 1 bp before + * intersection so that CpGs straddling a target boundary are not lost. */ ch_bedgraphs_target = ch_bedgraph .map { meta, bedgraphs -> tuple(meta, bedgraphs instanceof List ? bedgraphs : [bedgraphs]) } .flatMap { meta, bedgraphs -> bedgraphs.collect { bedgraph -> [meta, bedgraph] } } .combine(ch_target_regions) - BEDTOOLS_INTERSECT( - ch_bedgraphs_target, - [[:], []] + FILTER_BEDGRAPH_TARGETS(ch_bedgraphs_target) + ch_versions = ch_versions.mix(FILTER_BEDGRAPH_TARGETS.out.versions) + + /* + * Intersect Bismark coverage files with target regions + * The .cov.gz files are filtered the same way as bedGraph files so that + * downstream tools (methylKit, bsseq, DSS) receive only on-target CpGs. + */ + ch_coverage_target = ch_coverage.combine(ch_target_regions) + + BEDTOOLS_INTERSECT_COV( + ch_coverage_target, + [[:], []], ) - ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_COV.out.versions) /* * Run Picard CollectHSMetrics @@ -65,7 +78,7 @@ workflow TARGETED_SEQUENCING { PICARD_BEDTOINTERVALLIST( target_regions_with_meta, ch_sequence_dictionary, - [] + [], ) ch_intervals = PICARD_BEDTOINTERVALLIST.out.intervallist.map { it[1] } ch_versions = ch_versions.mix(PICARD_BEDTOINTERVALLIST.out.versions) @@ -75,7 +88,8 @@ workflow TARGETED_SEQUENCING { * Note: Using the same intervals for both target and bait as they are typically * the same for targeted methylation sequencing experiments */ - ch_picard_inputs = ch_bam.join(ch_bai) + ch_picard_inputs = ch_bam + .join(ch_bai) .combine(ch_intervals) .combine(ch_intervals) .combine(ch_fasta) @@ -83,11 +97,11 @@ workflow TARGETED_SEQUENCING { .combine(ch_sequence_dictionary) .combine(ch_gzi) .multiMap { meta, bam, bai, intervals1, intervals2, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_dict, dict, meta_gzi, gzi -> - bam_etc: [ meta, bam, bai, intervals1, intervals2 ] // intervals: baits, targets - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - dict: [ meta_dict, dict ] - gzi : [ meta_gzi, gzi ] + bam_etc: [meta, bam, bai, intervals1, intervals2] + fasta: [meta_fasta, fasta] + fasta_index: [meta_fasta_index, fasta_index] + dict: [meta_dict, dict] + gzi: [meta_gzi, gzi] } PICARD_COLLECTHSMETRICS( @@ -95,14 +109,15 @@ workflow TARGETED_SEQUENCING { ch_picard_inputs.fasta, ch_picard_inputs.fasta_index, ch_picard_inputs.dict, - ch_picard_inputs.gzi + ch_picard_inputs.gzi, ) ch_picard_metrics = PICARD_COLLECTHSMETRICS.out.metrics ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions) } emit: - bedgraph_filtered = BEDTOOLS_INTERSECT.out.intersect // channel: [ val(meta), path("*.bedGraph") ] - picard_metrics = ch_picard_metrics // channel: [ val(meta), path("*_metrics") ] - versions = ch_versions // channel: path("*.version.txt") + bedgraph_filtered = FILTER_BEDGRAPH_TARGETS.out.intersect // channel: [ val(meta), path("*.targeted.bedGraph") ] + coverage_filtered = BEDTOOLS_INTERSECT_COV.out.intersect // channel: [ val(meta), path("*.cov") ] + picard_metrics = ch_picard_metrics // channel: [ val(meta), path("*_metrics") ] + versions = ch_versions // channel: path("*.version.txt") } diff --git a/tests/targeted_sequencing_variants.nf.test.snap b/tests/targeted_sequencing_variants.nf.test.snap index fdf7eb65..96fcf4ff 100644 --- a/tests/targeted_sequencing_variants.nf.test.snap +++ b/tests/targeted_sequencing_variants.nf.test.snap @@ -2,7 +2,7 @@ "Params: bwameth with run_targeted_sequencing": { "content": [ { - "BEDTOOLS_INTERSECT": { + "FILTER_BEDGRAPH_TARGETS": { "bedtools": "2.31.1" }, "BWAMETH_ALIGN": { @@ -40,6 +40,9 @@ "Workflow": { "nf-core/methylseq": "v4.3.0dev" }, + "SAMTOOLS_STATS": { + "samtools": "1.22.1" + }, "FASTQC": { "fastqc": "0.12.1" }, @@ -48,9 +51,6 @@ }, "SAMTOOLS_SORT": { "samtools": "1.22.1" - }, - "SAMTOOLS_STATS": { - "samtools": "1.22.1" } }, [ @@ -316,7 +316,10 @@ "Params: bismark with run_targeted_sequencing and collecthsmetrics": { "content": [ { - "BEDTOOLS_INTERSECT": { + "BEDTOOLS_INTERSECT_COV": { + "bedtools": "2.31.1" + }, + "FILTER_BEDGRAPH_TARGETS": { "bedtools": "2.31.1" }, "BISMARK_DEDUPLICATE": { @@ -431,9 +434,13 @@ "bismark/methylation_calls/methylation_calls/CpG_OT_SRR389222_sub3_trimmed_bismark_bt2.deduplicated.txt.gz", "bismark/methylation_calls/methylation_coverage", "bismark/methylation_calls/methylation_coverage/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/splitting_report", "bismark/methylation_calls/splitting_report/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated_splitting_report.txt", "bismark/methylation_calls/splitting_report/SRR389222_sub1_trimmed_bismark_bt2.deduplicated_splitting_report.txt", @@ -651,9 +658,13 @@ "CpG_OT_SRR389222_sub2_trimmed_bismark_bt2.deduplicated.txt.gz:md5,94f30db9814f457fd6dffa7e45b7883b", "CpG_OT_SRR389222_sub3_trimmed_bismark_bt2.deduplicated.txt.gz:md5,36cdcad80268738caa3e674de21c500c", "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz:md5,8e6ba6fe66416d73e7f94b4da9ca77b1", + "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.targeted.cov:md5,d41d8cd98f00b204e9800998ecf8427e", "SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,5377d9fff4f1bf5b98dc2310fbeb6ec9", + "SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,02abfa056950b4ae8c33b8bed3988a86", "SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,4713bda7b4f2bb16ce23853aea763cbd", + "SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,163091ce70e0b5e6552ac5eb6e7efd04", "SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,f9a07c3e11e0caad6df3cd2e4d94d8ac", + "SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,51fd19975d43808686cf59c7d4a298c8", "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated_splitting_report.txt:md5,9b46b9f49f9b22e0565fb915c667cde2", "SRR389222_sub1_trimmed_bismark_bt2.deduplicated_splitting_report.txt:md5,23036cfc6e2745e5a5bf67400b460bc1", "SRR389222_sub2_trimmed_bismark_bt2.deduplicated_splitting_report.txt:md5,e06738420ad17564d41a789f2bdfd625", @@ -716,7 +727,10 @@ "Params: bismark with run_targeted_sequencing": { "content": [ { - "BEDTOOLS_INTERSECT": { + "BEDTOOLS_INTERSECT_COV": { + "bedtools": "2.31.1" + }, + "FILTER_BEDGRAPH_TARGETS": { "bedtools": "2.31.1" }, "BISMARK_DEDUPLICATE": { @@ -825,9 +839,13 @@ "bismark/methylation_calls/methylation_calls/CpG_OT_SRR389222_sub3_trimmed_bismark_bt2.deduplicated.txt.gz", "bismark/methylation_calls/methylation_coverage", "bismark/methylation_calls/methylation_coverage/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/methylation_coverage/SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.gz", + "bismark/methylation_calls/methylation_coverage/SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov", "bismark/methylation_calls/splitting_report", "bismark/methylation_calls/splitting_report/Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated_splitting_report.txt", "bismark/methylation_calls/splitting_report/SRR389222_sub1_trimmed_bismark_bt2.deduplicated_splitting_report.txt", @@ -1043,9 +1061,13 @@ "CpG_OT_SRR389222_sub2_trimmed_bismark_bt2.deduplicated.txt.gz:md5,94f30db9814f457fd6dffa7e45b7883b", "CpG_OT_SRR389222_sub3_trimmed_bismark_bt2.deduplicated.txt.gz:md5,36cdcad80268738caa3e674de21c500c", "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.gz:md5,8e6ba6fe66416d73e7f94b4da9ca77b1", + "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated.bismark.cov.targeted.cov:md5,d41d8cd98f00b204e9800998ecf8427e", "SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,5377d9fff4f1bf5b98dc2310fbeb6ec9", + "SRR389222_sub1_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,02abfa056950b4ae8c33b8bed3988a86", "SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,4713bda7b4f2bb16ce23853aea763cbd", + "SRR389222_sub2_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,163091ce70e0b5e6552ac5eb6e7efd04", "SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.gz:md5,f9a07c3e11e0caad6df3cd2e4d94d8ac", + "SRR389222_sub3_trimmed_bismark_bt2.deduplicated.bismark.cov.targeted.cov:md5,51fd19975d43808686cf59c7d4a298c8", "Ecoli_10K_methylated_1_val_1_bismark_bt2_pe.deduplicated_splitting_report.txt:md5,9b46b9f49f9b22e0565fb915c667cde2", "SRR389222_sub1_trimmed_bismark_bt2.deduplicated_splitting_report.txt:md5,23036cfc6e2745e5a5bf67400b460bc1", "SRR389222_sub2_trimmed_bismark_bt2.deduplicated_splitting_report.txt:md5,e06738420ad17564d41a789f2bdfd625", @@ -1108,7 +1130,7 @@ "Params: bwameth with run_targeted_sequencing and collecthsmetrics": { "content": [ { - "BEDTOOLS_INTERSECT": { + "FILTER_BEDGRAPH_TARGETS": { "bedtools": "2.31.1" }, "BWAMETH_ALIGN": { @@ -1152,6 +1174,9 @@ "Workflow": { "nf-core/methylseq": "v4.3.0dev" }, + "SAMTOOLS_STATS": { + "samtools": "1.22.1" + }, "FASTQC": { "fastqc": "0.12.1" }, @@ -1160,9 +1185,6 @@ }, "SAMTOOLS_SORT": { "samtools": "1.22.1" - }, - "SAMTOOLS_STATS": { - "samtools": "1.22.1" } }, [ @@ -1430,7 +1452,7 @@ "Params: bwameth with all_contexts and run_targeted_sequencing": { "content": [ { - "BEDTOOLS_INTERSECT": { + "FILTER_BEDGRAPH_TARGETS": { "bedtools": "2.31.1" }, "BWAMETH_ALIGN": { @@ -1468,6 +1490,9 @@ "Workflow": { "nf-core/methylseq": "v4.3.0dev" }, + "SAMTOOLS_STATS": { + "samtools": "1.22.1" + }, "FASTQC": { "fastqc": "0.12.1" }, @@ -1476,9 +1501,6 @@ }, "SAMTOOLS_SORT": { "samtools": "1.22.1" - }, - "SAMTOOLS_STATS": { - "samtools": "1.22.1" } }, [ @@ -1773,4 +1795,4 @@ }, "timestamp": "2025-12-04T11:14:14.394458" } -} \ No newline at end of file +} diff --git a/workflows/methylseq/main.nf b/workflows/methylseq/main.nf index 9630375d..b4d28e01 100644 --- a/workflows/methylseq/main.nf +++ b/workflows/methylseq/main.nf @@ -32,66 +32,66 @@ include { TARGETED_SEQUENCING } from '../../subworkflows/local/targete */ workflow METHYLSEQ { - take: - samplesheet // channel: [ path(samplesheet.csv) ] - ch_versions // channel: [ path(versions.yml) ] - ch_fasta // channel: [ path(fasta) ] - ch_fasta_index // channel: [ path(fasta index) ] - ch_bismark_index // channel: [ path(bismark index) ] - ch_bwameth_index // channel: [ path(bwameth index) ] - ch_bwamem_index // channel: [ path(bwamem_index) ] + samplesheet // channel: [ path(samplesheet.csv) ] + ch_versions // channel: [ path(versions.yml) ] + ch_fasta // channel: [ path(fasta) ] + ch_fasta_index // channel: [ path(fasta index) ] + ch_bismark_index // channel: [ path(bismark index) ] + ch_bwameth_index // channel: [ path(bwameth index) ] + ch_bwamem_index // channel: [ path(bwamem_index) ] main: - ch_fastq = channel.empty() - ch_fastqc_html = channel.empty() - ch_fastqc_zip = channel.empty() - ch_reads = channel.empty() - ch_bam = channel.empty() - ch_bai = channel.empty() - ch_gzi = channel.empty() - ch_bedgraph = channel.empty() - ch_aligner_mqc = channel.empty() + ch_fastq = channel.empty() + ch_fastqc_html = channel.empty() + ch_fastqc_zip = channel.empty() + ch_reads = channel.empty() + ch_bam = channel.empty() + ch_bai = channel.empty() + ch_gzi = channel.empty() + ch_bedgraph = channel.empty() + ch_coverage = channel.empty() + ch_aligner_mqc = channel.empty() ch_rastair_mbias = channel.empty() - ch_rastair_call = channel.empty() - ch_methylkit = channel.empty() - ch_mbias = channel.empty() - ch_qualimap = channel.empty() - ch_preseq = channel.empty() + ch_rastair_call = channel.empty() + ch_methylkit = channel.empty() + ch_mbias = channel.empty() + ch_qualimap = channel.empty() + ch_preseq = channel.empty() ch_multiqc_files = channel.empty() // // Branch channels from input samplesheet channel // - ch_samplesheet = samplesheet - .branch { meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } + ch_samplesheet = samplesheet.branch { meta, fastqs -> + single: fastqs.size() == 1 + return [meta, fastqs.flatten()] + multiple: fastqs.size() > 1 + return [meta, fastqs.flatten()] + } // // MODULE: Concatenate FastQ files from same sample if required // - CAT_FASTQ ( + CAT_FASTQ( ch_samplesheet.multiple ) - ch_fastq = CAT_FASTQ.out.reads.mix(ch_samplesheet.single) + ch_fastq = CAT_FASTQ.out.reads.mix(ch_samplesheet.single) ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) // // MODULE: Run FastQC // if (!params.skip_fastqc) { - FASTQC ( + FASTQC( ch_fastq ) - ch_fastqc_html = FASTQC.out.html - ch_fastqc_zip = FASTQC.out.zip - } else { - ch_fastqc_html = channel.empty() - ch_fastqc_zip = channel.empty() + ch_fastqc_html = FASTQC.out.html + ch_fastqc_zip = FASTQC.out.zip + } + else { + ch_fastqc_html = channel.empty() + ch_fastqc_zip = channel.empty() } // @@ -101,10 +101,11 @@ workflow METHYLSEQ { TRIMGALORE( ch_fastq ) - ch_reads = TRIMGALORE.out.reads + ch_reads = TRIMGALORE.out.reads ch_versions = ch_versions.mix(TRIMGALORE.out.versions) - } else { - ch_reads = ch_fastq + } + else { + ch_reads = ch_fastq } // @@ -112,11 +113,11 @@ workflow METHYLSEQ { // if (params.taps && params.aligner != 'bwamem') { - log.info "TAPS protocol detected and aligner is not 'bwamem'. We recommend using bwa-mem for TAPS protocol as it is optimized for this type of data." + log.info("TAPS protocol detected and aligner is not 'bwamem'. We recommend using bwa-mem for TAPS protocol as it is optimized for this type of data.") } // Aligner: bismark or bismark_hisat - if (params.aligner =~ /bismark/ ) { + if (params.aligner =~ /bismark/) { // // Run Bismark alignment + downstream processing // @@ -124,82 +125,79 @@ workflow METHYLSEQ { .combine(ch_fasta) .combine(ch_bismark_index) .multiMap { meta, reads, meta_fasta, fasta, meta_bismark, bismark_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - bismark_index: [ meta_bismark, bismark_index ] + reads: [meta, reads] + fasta: [meta_fasta, fasta] + bismark_index: [meta_bismark, bismark_index] } - FASTQ_ALIGN_DEDUP_BISMARK ( + FASTQ_ALIGN_DEDUP_BISMARK( ch_bismark_inputs.reads, ch_bismark_inputs.fasta, ch_bismark_inputs.bismark_index, params.skip_deduplication || params.rrbs, - params.cytosine_report || params.nomeseq + params.cytosine_report || params.nomeseq, ) - ch_bam = FASTQ_ALIGN_DEDUP_BISMARK.out.bam - ch_bai = FASTQ_ALIGN_DEDUP_BISMARK.out.bai - ch_bedgraph = FASTQ_ALIGN_DEDUP_BISMARK.out.methylation_bedgraph + ch_bam = FASTQ_ALIGN_DEDUP_BISMARK.out.bam + ch_bai = FASTQ_ALIGN_DEDUP_BISMARK.out.bai + ch_bedgraph = FASTQ_ALIGN_DEDUP_BISMARK.out.methylation_bedgraph + ch_coverage = FASTQ_ALIGN_DEDUP_BISMARK.out.methylation_coverage ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BISMARK.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BISMARK.out.versions) + ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BISMARK.out.versions) } - // Aligner: bwameth - else if (params.aligner == 'bwameth'){ + else if (params.aligner == 'bwameth') { ch_bwameth_inputs = ch_reads .combine(ch_fasta) .combine(ch_fasta_index) .combine(ch_bwameth_index) .multiMap { meta, reads, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_bwameth, bwameth_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - bwameth_index: [ meta_bwameth, bwameth_index ] + reads: [meta, reads] + fasta: [meta_fasta, fasta] + fasta_index: [meta_fasta_index, fasta_index] + bwameth_index: [meta_bwameth, bwameth_index] } - FASTQ_ALIGN_DEDUP_BWAMETH ( + FASTQ_ALIGN_DEDUP_BWAMETH( ch_bwameth_inputs.reads, ch_bwameth_inputs.fasta, ch_bwameth_inputs.fasta_index, ch_bwameth_inputs.bwameth_index, params.skip_deduplication || params.rrbs, - workflow.profile.tokenize(',').intersect(['gpu']).size() >= 1 + workflow.profile.tokenize(',').intersect(['gpu']).size() >= 1, ) - ch_bam = FASTQ_ALIGN_DEDUP_BWAMETH.out.bam - ch_bai = FASTQ_ALIGN_DEDUP_BWAMETH.out.bai + ch_bam = FASTQ_ALIGN_DEDUP_BWAMETH.out.bam + ch_bai = FASTQ_ALIGN_DEDUP_BWAMETH.out.bai ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BWAMETH.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMETH.out.versions) + ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMETH.out.versions) } - - // Aligner: bwamem - else if (params.aligner == 'bwamem'){ + else if (params.aligner == 'bwamem') { ch_bwamem_inputs = ch_reads .combine(ch_fasta) .combine(ch_fasta_index) .combine(ch_bwamem_index) .multiMap { meta, reads, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_bwamem, bwamem_index -> - reads: [ meta, reads ] - fasta: [ meta_fasta, fasta ] - fasta_index: [ meta_fasta_index, fasta_index ] - bwamem_index: [ meta_bwamem, bwamem_index ] + reads: [meta, reads] + fasta: [meta_fasta, fasta] + fasta_index: [meta_fasta_index, fasta_index] + bwamem_index: [meta_bwamem, bwamem_index] } - FASTQ_ALIGN_DEDUP_BWAMEM ( + FASTQ_ALIGN_DEDUP_BWAMEM( ch_bwamem_inputs.reads, ch_bwamem_inputs.fasta, ch_bwamem_inputs.fasta_index, ch_bwamem_inputs.bwamem_index, - params.skip_deduplication + params.skip_deduplication, ) - ch_bam = FASTQ_ALIGN_DEDUP_BWAMEM.out.bam - ch_bai = FASTQ_ALIGN_DEDUP_BWAMEM.out.bai + ch_bam = FASTQ_ALIGN_DEDUP_BWAMEM.out.bam + ch_bai = FASTQ_ALIGN_DEDUP_BWAMEM.out.bai ch_aligner_mqc = FASTQ_ALIGN_DEDUP_BWAMEM.out.multiqc - ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMEM.out.versions.unique{ it.baseName }) + ch_versions = ch_versions.mix(FASTQ_ALIGN_DEDUP_BWAMEM.out.versions.unique { it.baseName }) } - else { - error "ERROR: Invalid aligner '${params.aligner}'. Valid options are: 'bismark', 'bismark_hisat', 'bwameth' or 'bwamem'." + error("ERROR: Invalid aligner '${params.aligner}'. Valid options are: 'bismark', 'bismark_hisat', 'bwameth' or 'bwamem'.") } // @@ -209,62 +207,63 @@ workflow METHYLSEQ { ch_bam_bai = ch_bam.join(ch_bai) ch_taps_inputs = ch_bam_bai - .combine(ch_fasta) // broadcast fasta - .combine(ch_fasta_index) // broadcast fai + .combine(ch_fasta) + .combine(ch_fasta_index) .multiMap { meta, bam, bai, _meta_fasta, fasta, _meta_fai, fai -> - bam: [ meta, bam ] // use sample meta so subworkflow aligns properly - bai: [ meta, bai ] // use sample meta so subworkflow aligns properly - fasta: [ meta, fasta ] // use sample meta so subworkflow aligns properly - fasta_index: [ meta, fai ] // same here + bam: [meta, bam] + bai: [meta, bai] + fasta: [meta, fasta] + fasta_index: [meta, fai] } BAM_TAPS_CONVERSION( ch_taps_inputs.bam, ch_taps_inputs.bai, ch_taps_inputs.fasta, - ch_taps_inputs.fasta_index + ch_taps_inputs.fasta_index, ) - ch_rastair_mbias = BAM_TAPS_CONVERSION.out.mbias // channel: [ val(meta), [ txt ] ] - ch_rastair_call = BAM_TAPS_CONVERSION.out.call // channel: [ val(meta), [ txt ] ] - ch_versions = ch_versions.mix(BAM_TAPS_CONVERSION.out.versions) + ch_rastair_mbias = BAM_TAPS_CONVERSION.out.mbias + // channel: [ val(meta), [ txt ] ] + ch_rastair_call = BAM_TAPS_CONVERSION.out.call + // channel: [ val(meta), [ txt ] ] + ch_versions = ch_versions.mix(BAM_TAPS_CONVERSION.out.versions) } - - // - // Subworkflow: Count negative C->T conversion rates as a readout for DNA methylation - // else if (!params.taps && params.aligner == 'bwameth') { ch_bam_bai = ch_bam.join(ch_bai) ch_methyldackel_inputs = ch_bam_bai - .combine(ch_fasta) // broadcast fasta - .combine(ch_fasta_index) // broadcast fai + .combine(ch_fasta) + .combine(ch_fasta_index) .multiMap { meta, bam, bai, _meta_fasta, fasta, _meta_fai, fai -> - bam: [ meta, bam ] // use sample meta so subworkflow aligns properly - bai: [ meta, bai ] // use sample meta so subworkflow aligns properly - fasta: [ meta, fasta ] // use sample meta so subworkflow aligns properly - fasta_index: [ meta, fai ] // same here + bam: [meta, bam] + bai: [meta, bai] + fasta: [meta, fasta] + fasta_index: [meta, fai] } - BAM_METHYLDACKEL ( + BAM_METHYLDACKEL( ch_methyldackel_inputs.bam, ch_methyldackel_inputs.bai, ch_methyldackel_inputs.fasta, - ch_methyldackel_inputs.fasta_index + ch_methyldackel_inputs.fasta_index, ) - ch_bedgraph = BAM_METHYLDACKEL.out.methydackel_extract_bedgraph // channel: [ val(meta), [ bedgraph ] ] - ch_methylkit = BAM_METHYLDACKEL.out.methydackel_extract_methylkit // channel: [ val(meta), [ methylkit ] ] - ch_mbias = BAM_METHYLDACKEL.out.methydackel_mbias // channel: [ val(meta), [ mbias ] ] - ch_versions = ch_versions.mix(BAM_METHYLDACKEL.out.versions) + ch_bedgraph = BAM_METHYLDACKEL.out.methydackel_extract_bedgraph + // channel: [ val(meta), [ bedgraph ] ] + ch_methylkit = BAM_METHYLDACKEL.out.methydackel_extract_methylkit + // channel: [ val(meta), [ methylkit ] ] + ch_mbias = BAM_METHYLDACKEL.out.methydackel_mbias + // channel: [ val(meta), [ mbias ] ] + ch_versions = ch_versions.mix(BAM_METHYLDACKEL.out.versions) } // // MODULE: Qualimap BamQC // skipped by default. to use run with `--run_qualimap` param. // - if(params.run_qualimap) { - QUALIMAP_BAMQC ( + if (params.run_qualimap) { + QUALIMAP_BAMQC( ch_bam, - params.bamqc_regions_file ? channel.fromPath( params.bamqc_regions_file, checkIfExists: true ).toList() : [] + params.bamqc_regions_file ? channel.fromPath(params.bamqc_regions_file, checkIfExists: true).toList() : [], ) ch_qualimap = QUALIMAP_BAMQC.out.results ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions) @@ -274,22 +273,23 @@ workflow METHYLSEQ { // MODULE: Targeted sequencing analysis // skipped by default. to use run with `--run_targeted_sequencing` param. // - if (params.run_targeted_sequencing){ + if (params.run_targeted_sequencing) { if (params.taps || params.aligner == 'bwamem') { - error "ERROR: --run_targeted_sequencing can't be running using rastair (methylation caller for TAPS) " + error("ERROR: --run_targeted_sequencing can't be running using rastair (methylation caller for TAPS) ") } if (!params.target_regions_file) { - error "ERROR: --target_regions_file must be specified when using --run_targeted_sequencing" + error("ERROR: --target_regions_file must be specified when using --run_targeted_sequencing") } - TARGETED_SEQUENCING ( + TARGETED_SEQUENCING( ch_bedgraph, + ch_coverage, channel.fromPath(params.target_regions_file, checkIfExists: true), ch_fasta, ch_fasta_index, ch_bam, ch_bai, ch_gzi, - params.collecthsmetrics + params.collecthsmetrics, ) ch_versions = ch_versions.mix(TARGETED_SEQUENCING.out.versions) } @@ -298,11 +298,11 @@ workflow METHYLSEQ { // MODULE: Preseq LCEXTRAP // skipped by default. to use run with `--run_preseq` param. // - if(params.run_preseq) { - PRESEQ_LCEXTRAP ( + if (params.run_preseq) { + PRESEQ_LCEXTRAP( ch_bam ) - ch_preseq = PRESEQ_LCEXTRAP.out.lc_extrap + ch_preseq = PRESEQ_LCEXTRAP.out.lc_extrap ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions) } @@ -312,10 +312,11 @@ workflow METHYLSEQ { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'methylseq_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'methylseq_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true - ).set { ch_collated_versions } + newLine: true, + ) + .set { ch_collated_versions } // // Topic channel versions - written separately to avoid blocking MULTIQC @@ -325,93 +326,88 @@ workflow METHYLSEQ { .distinct() .filter { entry -> !(entry instanceof Path) } .map { process, tool, version -> - def processName = process[process.lastIndexOf(':')+1..-1] + def processName = process[process.lastIndexOf(':') + 1..-1] "${processName}:\n ${tool}: ${version}" } .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_methylseq_topic_versions.yml', sort: true, - newLine: true + newLine: true, ) // // MODULE: MultiQC // if (!params.skip_multiqc) { - ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - channel.fromPath(params.multiqc_config, checkIfExists: true) : - channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - channel.fromPath(params.multiqc_logo, checkIfExists: true) : - channel.empty() - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) - - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_config = channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config + ? channel.fromPath(params.multiqc_config, checkIfExists: true) + : channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? channel.fromPath(params.multiqc_logo, checkIfExists: true) + : channel.empty() + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) - if(params.run_qualimap) { - ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect{ it[1] }.ifEmpty([])) + if (params.run_qualimap) { + ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect { it[1] }.ifEmpty([])) } if (params.run_preseq) { - ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect { it[1] }.ifEmpty([])) } ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc.ifEmpty([])) if (!params.skip_trimming) { - ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.log.collect{ it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.log.collect { it[1] }) } if (params.run_targeted_sequencing) { if (params.collecthsmetrics) { - ch_multiqc_files = ch_multiqc_files.mix(TARGETED_SEQUENCING.out.picard_metrics.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TARGETED_SEQUENCING.out.picard_metrics.collect { it[1] }.ifEmpty([])) } } if (!params.skip_fastqc) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }.ifEmpty([])) } - MULTIQC ( + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) ch_multiqc_report = MULTIQC.out.report.toList() - } else { + } + else { ch_multiqc_report = channel.empty() } emit: - bam = ch_bam // channel: [ val(meta), path(bam) ] - bai = ch_bai // channel: [ val(meta), path(bai) ] - rastair_mbias = ch_rastair_mbias // channel: [ val(meta), path(rastair_mbias) ] - rastair_call = ch_rastair_call // channel: [ val(meta), path(rastair_call) ] - methylkit = ch_methylkit // channel: [ val(meta), path(methylkit) ] - mbias = ch_mbias // channel: [ val(meta), path(mbias) ] - bedgraph = ch_bedgraph // channel: [ val(meta), path(bedgraph) ] - qualimap = ch_qualimap // channel: [ val(meta), path(qualimap) ] - preseq = ch_preseq // channel: [ val(meta), path(preseq) ] - multiqc_report = ch_multiqc_report // channel: [ path(multiqc_report.html ) ] - versions = ch_versions // channel: [ path(versions.yml) ] + bam = ch_bam // channel: [ val(meta), path(bam) ] + bai = ch_bai // channel: [ val(meta), path(bai) ] + rastair_mbias = ch_rastair_mbias // channel: [ val(meta), path(rastair_mbias) ] + rastair_call = ch_rastair_call // channel: [ val(meta), path(rastair_call) ] + methylkit = ch_methylkit // channel: [ val(meta), path(methylkit) ] + mbias = ch_mbias // channel: [ val(meta), path(mbias) ] + bedgraph = ch_bedgraph // channel: [ val(meta), path(bedgraph) ] + qualimap = ch_qualimap // channel: [ val(meta), path(qualimap) ] + preseq = ch_preseq // channel: [ val(meta), path(preseq) ] + multiqc_report = ch_multiqc_report // channel: [ path(multiqc_report.html ) ] + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/