nf-core · FelixKrueger · Apr 10, 2026 · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
@@ -1,8 +1,6 @@
 process {
-    withName: BEDTOOLS_INTERSECT {
-        ext.args   = ''
-        ext.prefix = { "${intervals1.baseName}" }
-        ext.suffix = 'targeted.bedGraph'
+    withName: FILTER_BEDGRAPH_TARGETS {
+        ext.prefix = { "${bedgraph.baseName}" }
         publishDir = [
             [
                 path: { params.aligner == 'bismark' ? "${params.outdir}/bismark/methylation_calls/bedGraph" : "${params.outdir}/methyldackel" },

@@ -0,0 +1,14 @@
+process {
+    withName: BEDTOOLS_INTERSECT_COV {
+        ext.args   = ''
+        ext.prefix = { "${intervals1.baseName}" }
+        ext.suffix = 'targeted.cov'
+        publishDir = [
+            [
+                path: { "${params.outdir}/bismark/methylation_calls/methylation_coverage" },
+                mode: params.publish_dir_mode,
+                pattern: "*targeted.cov"
+            ]
+        ]
+    }
+}
@@ -1,4 +1,5 @@
 includeConfig "../modules/bedtools_intersect.config"
+includeConfig "../modules/bedtools_intersect_cov.config"
 includeConfig "../modules/picard_bedtointervallist.config"
 includeConfig "../modules/picard_createsequencedictionary.config"
 includeConfig "../modules/picard_collecthsmetrics.config"
@@ -0,0 +1,57 @@
+/*
+ * Filter bedGraph files against target regions with CpG-aware boundary handling.
+ *
+ * Bismark bedGraph files use 0-based half-open coordinates for single-C positions
+ * (e.g., chr1 10788 10789 for a CpG at 1-based position 10789). When a CpG
+ * straddles a target boundary — C just outside, G just inside — a naive
+ * intersection drops it because the single-C interval [10788, 10789) does not
+ * overlap the target [10789, …).
+ *
+ * This process extends each bedGraph entry by 1 bp on the right to represent the
+ * full CpG dinucleotide, intersects with the target BED, then restores the
+ * original single-base coordinates.
+ */
+process FILTER_BEDGRAPH_TARGETS {
+    tag "${meta.id}"
+    label 'process_single'
+
+    conda "bioconda::bedtools=2.31.1"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0'
+        : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}"
+
+    input:
+    tuple val(meta), path(bedgraph), path(targets)
+
+    output:
+    tuple val(meta), path("*.targeted.bedGraph"), emit: intersect
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    # Decompress and extend end coordinate by 1 bp so the interval covers the full CpG dinucleotide
+    # Skip any track/header lines to avoid corrupting them
+    zcat ${bedgraph} | awk 'BEGIN{OFS="\t"} /^track/{print; next} {if(NF>=3) \$3=\$3+1; print}' > extended.bedGraph
+
+    # Intersect with target regions (-wa: write original -a entry; -u: unique hits only)
+    bedtools intersect \\
+        -a extended.bedGraph \\
+        -b ${targets} \\
+        -wa \\
+        -u \\
+        > intersected.bedGraph
+
+    # Restore original single-base end coordinates
+    awk 'BEGIN{OFS="\t"} /^track/{print; next} {if(NF>=3) \$3=\$3-1; print}' intersected.bedGraph \\
+        > ${prefix}.targeted.bedGraph
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+    END_VERSIONS
+    """
+}
@@ -1,47 +1,60 @@
 /*
  * targeted_sequencing subworkflow
  *
- * Filters bedGraph files with the target_regions BED file so they contain positions only listed in the BED.
+ * Filters bedGraph and coverage files with the target_regions BED file so they contain positions only listed in the BED.
  * If specified, it also generates some performance metrics with Picard CollectHsMetrics, such as Fold-80 Base Penalty,
  * HS Library Size, Percent Duplicates, and Percent Off Bait. This is relevant for methylome experiments with targeted seq.
  */
 
-include { BEDTOOLS_INTERSECT              } from '../../../modules/nf-core/bedtools/intersect/main'
-include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main'
-include { PICARD_BEDTOINTERVALLIST        } from '../../../modules/nf-core/picard/bedtointervallist/main'
-include { PICARD_COLLECTHSMETRICS         } from '../../../modules/nf-core/picard/collecthsmetrics/main'
+include { FILTER_BEDGRAPH_TARGETS                      } from '../../../modules/local/filter_bedgraph_targets'
+include { BEDTOOLS_INTERSECT as BEDTOOLS_INTERSECT_COV } from '../../../modules/nf-core/bedtools/intersect/main'
+include { PICARD_CREATESEQUENCEDICTIONARY              } from '../../../modules/nf-core/picard/createsequencedictionary/main'
+include { PICARD_BEDTOINTERVALLIST                     } from '../../../modules/nf-core/picard/bedtointervallist/main'
+include { PICARD_COLLECTHSMETRICS                      } from '../../../modules/nf-core/picard/collecthsmetrics/main'
 
 workflow TARGETED_SEQUENCING {
-
     take:
-    ch_bedgraph            // channel: [ val(meta), [ bedGraph(s) ]] when bwameth, [ val(meta), bedGraph ] when bismark
-    ch_target_regions      // channel: path(target_regions.bed)
-    ch_fasta               // channel: [ [:], /path/to/genome.fa]
-    ch_fasta_index         // channel: [ val(meta), /path/to/genome.fa.fai]
-    ch_bam                 // channel: [ val(meta), [ bam ] ] ## BAM from alignment
-    ch_bai                 // channel: [ val(meta), [ bai ] ] ## BAI from alignment
-    ch_gzi                 // channel: [ val(meta), [ gzi ] ] ## GZI from fasta
-    collecthsmetrics       // boolean: whether to run Picard CollectHsMetrics
+    ch_bedgraph // channel: [ val(meta), [ bedGraph(s) ]] when bwameth, [ val(meta), bedGraph ] when bismark
+    ch_coverage // channel: [ val(meta), cov.gz ] from Bismark methylation extractor (empty for bwameth)
+    ch_target_regions // channel: path(target_regions.bed)
+    ch_fasta // channel: [ [:], /path/to/genome.fa]
+    ch_fasta_index // channel: [ val(meta), /path/to/genome.fa.fai]
+    ch_bam // channel: [ val(meta), [ bam ] ] ## BAM from alignment
+    ch_bai // channel: [ val(meta), [ bai ] ] ## BAI from alignment
+    ch_gzi // channel: [ val(meta), [ gzi ] ] ## GZI from fasta
+    collecthsmetrics // boolean: whether to run Picard CollectHsMetrics
 
     main:
 
     ch_versions = Channel.empty()
     ch_picard_metrics = Channel.empty()
 
     /*
-     * Intersect bedGraph files with target regions
-     * Ensure ch_bedgraph contains the bedGraph file(s) in an array and split into individual bedGraphs
+     * Intersect bedGraph files with target regions (CpG-aware boundary handling)
+     * Ensure ch_bedgraph contains the bedGraph file(s) in an array and split into individual bedGraphs.
+     * The FILTER_BEDGRAPH_TARGETS process extends single-C intervals by 1 bp before
+     * intersection so that CpGs straddling a target boundary are not lost.
      */
     ch_bedgraphs_target = ch_bedgraph
         .map { meta, bedgraphs -> tuple(meta, bedgraphs instanceof List ? bedgraphs : [bedgraphs]) }
         .flatMap { meta, bedgraphs -> bedgraphs.collect { bedgraph -> [meta, bedgraph] } }
         .combine(ch_target_regions)
 
-    BEDTOOLS_INTERSECT(
-        ch_bedgraphs_target,
-        [[:], []]
+    FILTER_BEDGRAPH_TARGETS(ch_bedgraphs_target)
+    ch_versions = ch_versions.mix(FILTER_BEDGRAPH_TARGETS.out.versions)
+
+    /*
+     * Intersect Bismark coverage files with target regions
+     * The .cov.gz files are filtered the same way as bedGraph files so that
+     * downstream tools (methylKit, bsseq, DSS) receive only on-target CpGs.
+     */
+    ch_coverage_target = ch_coverage.combine(ch_target_regions)
+
+    BEDTOOLS_INTERSECT_COV(
+        ch_coverage_target,
+        [[:], []],
     )
-    ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions)
+    ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_COV.out.versions)
 
     /*
      * Run Picard CollectHSMetrics
@@ -65,7 +78,7 @@ workflow TARGETED_SEQUENCING {
         PICARD_BEDTOINTERVALLIST(
             target_regions_with_meta,
             ch_sequence_dictionary,
-            []
+            [],
         )
         ch_intervals = PICARD_BEDTOINTERVALLIST.out.intervallist.map { it[1] }
         ch_versions = ch_versions.mix(PICARD_BEDTOINTERVALLIST.out.versions)
@@ -75,34 +88,36 @@ workflow TARGETED_SEQUENCING {
          * Note: Using the same intervals for both target and bait as they are typically
          * the same for targeted methylation sequencing experiments
          */
-        ch_picard_inputs = ch_bam.join(ch_bai)
+        ch_picard_inputs = ch_bam
+            .join(ch_bai)
             .combine(ch_intervals)
             .combine(ch_intervals)
             .combine(ch_fasta)
             .combine(ch_fasta_index)
             .combine(ch_sequence_dictionary)
             .combine(ch_gzi)
             .multiMap { meta, bam, bai, intervals1, intervals2, meta_fasta, fasta, meta_fasta_index, fasta_index, meta_dict, dict, meta_gzi, gzi ->
-                bam_etc: [ meta, bam, bai, intervals1, intervals2 ] // intervals: baits, targets
-                fasta: [ meta_fasta, fasta ]
-                fasta_index: [ meta_fasta_index, fasta_index ]
-                dict: [ meta_dict, dict ]
-                gzi : [ meta_gzi, gzi ]
+                bam_etc: [meta, bam, bai, intervals1, intervals2]
+                fasta: [meta_fasta, fasta]
+                fasta_index: [meta_fasta_index, fasta_index]
+                dict: [meta_dict, dict]
+                gzi: [meta_gzi, gzi]
             }
 
         PICARD_COLLECTHSMETRICS(
             ch_picard_inputs.bam_etc,
             ch_picard_inputs.fasta,
             ch_picard_inputs.fasta_index,
             ch_picard_inputs.dict,
-            ch_picard_inputs.gzi
+            ch_picard_inputs.gzi,
         )
         ch_picard_metrics = PICARD_COLLECTHSMETRICS.out.metrics
         ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions)
     }
 
     emit:
-    bedgraph_filtered = BEDTOOLS_INTERSECT.out.intersect  // channel: [ val(meta), path("*.bedGraph") ]
-    picard_metrics    = ch_picard_metrics                 // channel: [ val(meta), path("*_metrics") ]
-    versions          = ch_versions                       // channel: path("*.version.txt")
+    bedgraph_filtered = FILTER_BEDGRAPH_TARGETS.out.intersect // channel: [ val(meta), path("*.targeted.bedGraph") ]
+    coverage_filtered = BEDTOOLS_INTERSECT_COV.out.intersect // channel: [ val(meta), path("*.cov") ]
+    picard_metrics    = ch_picard_metrics // channel: [ val(meta), path("*_metrics") ]
+    versions          = ch_versions // channel: path("*.version.txt")
 }