diff --git a/bin/create_mask_matrix.py b/bin/create_mask_matrix.py
new file mode 100755
index 00000000..ba66a870
--- /dev/null
+++ b/bin/create_mask_matrix.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+"""
+Create Position per Sample Mask Matrix from BED Files
+
+This script aggregates per-sample flagged position BED files into a single
+mask matrix where rows are positions and columns are samples. 
+
+Each sample-specific BED file (*.flagged-pos.bed) contains positions that should
+be masked only for that particular sample.
+
+The output is a TSV with 1/0 values indicating whether a position should be kept (1) 
+or masked (0) for each sample.
+
+Command-line Arguments
+----------------------
+bed-files : str
+    Space-separated list of sample-specific BED files
+
+Usage
+-----
+create_mask_matrix.py \\
+    --bed-files sample1.flagged-pos.bed sample2.flagged-pos.bed ... \\
+"""
+
+import logging
+from pathlib import Path
+
+import click
+import pandas as pd
+
+# Logging
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s - %(message)s",
+    level=logging.INFO,
+    datefmt="%m/%d/%Y %I:%M:%S %p"
+)
+LOG = logging.getLogger("create_mask_matrix")
+
+def add_bed_positions(bed_df: pd.DataFrame, 
+                      sample_name: str, 
+                      masked_positions: set, 
+                      mask_data: list) -> int:
+    """
+    Add BED positions to mask data for a specific sample.
+    
+    Parameters
+    ----------
+    bed_df : pd.DataFrame
+        BED dataframe with CHROM, START, END, FILTER columns
+    sample_name : str
+        Sample name to apply masking to
+    masked_positions : set
+        Set tracking already-masked (CHROM, POS, SAMPLE) tuples
+    mask_data : list
+        List to append mask entries to
+    
+    Returns
+    -------
+    int
+        Number of new entries added
+    """
+    entries_added = 0
+    
+    for row in bed_df.itertuples():
+        for pos in range(row.START, row.END + 1):
+            key = (row.CHROM, pos, sample_name)
+            if key not in masked_positions:
+                mask_data.append({
+                    'CHROM': row.CHROM,
+                    'POS': pos,
+                    'SAMPLE': sample_name,
+                    'KEEP': 0,
+                })
+                masked_positions.add(key)
+                entries_added += 1
+    
+    return entries_added
+
+def prepare_matrix(mask_data: list) -> pd.DataFrame:
+    """
+    Prepare the mask matrix dataframe from collected mask data.
+    
+    Parameters
+    ----------
+    mask_data : list
+        List of dictionaries with CHROM, POS, SAMPLE, KEEP keys
+    
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame in long format ready for pivoting to matrix
+    """
+    # Create dataframe from collected data
+    mask_df = pd.DataFrame(mask_data)
+    
+    # Pivot to matrix format: rows = (CHROM, POS), columns = samples
+    mask_matrix = mask_df.pivot_table(
+        index=['CHROM', 'POS'],
+        columns='SAMPLE',
+        values='KEEP',
+        fill_value=1,    # Positions not in a sample's BED = we should keep
+        aggfunc='first'  # In case of duplicates, take first
+    )
+
+    # Reset index to make CHROM and POS regular columns
+    mask_matrix = mask_matrix.reset_index()
+    
+    # Sort by chromosome and position for readability
+    mask_matrix = mask_matrix.sort_values(['CHROM', 'POS']).reset_index(drop=True)
+
+    return mask_matrix
+
+
+def create_mask_matrix(bed_files: list) -> None:
+    """
+    Create a position per sample mask matrix from per-sample BED files.
+    
+    Parameters
+    ----------
+    bed_files : list
+        List of paths to sample-specific BED files
+    """
+    LOG.info(f"Processing {len(bed_files)} BED files...")
+    
+    # Collect all sample's names
+    all_samples = set()
+    for bed_file in bed_files:
+        sample_name = Path(bed_file).stem.replace('.flagged-pos', '')
+        all_samples.add(sample_name)
+    
+    # Track already-masked positions using a set for lookups
+    masked_positions = set()  # Set of (CHROM, POS, SAMPLE) tuples to avoid duplicates within sample
+    mask_data = []
+    
+    # Process sample-specific BED files
+    sample_count = 0
+    for bed_file in bed_files:
+        sample_name = Path(bed_file).stem.replace('.flagged-pos', '')
+        
+        try:
+            bed_df = pd.read_csv(bed_file, sep="\t", header=None,
+                                names=["CHROM", "START", "END", "FILTER"])
+            
+            if bed_df.empty:
+                LOG.info(f"No flagged positions for {sample_name}")
+                continue
+            
+            # Add sample-specific positions
+            sample_specific = add_bed_positions(bed_df, sample_name, masked_positions, mask_data)
+            
+            LOG.info(f"{sample_name}: {len(bed_df)} positions in BED, {sample_specific} unique entries added")
+            sample_count += sample_specific
+            
+        except Exception as e:
+            LOG.warning(f"Could not process {bed_file}: {e}")
+            continue
+    
+    LOG.info(f"Total mask entries: {sample_count}")
+    
+    # Handle case where no positions need masking
+    if not mask_data:
+        LOG.info("No positions to mask across all samples, creating empty matrix")
+        empty_df = pd.DataFrame(columns=['CHROM', 'POS'])
+        empty_df.to_csv("flagged_positions.mask.tsv.gz", sep="\t", index=False, compression='gzip')
+        return
+    
+    # Create dataframe from collected data
+    mask_matrix = prepare_matrix(mask_data)
+    
+    # Save to compressed TSV
+    mask_matrix.to_csv("flagged_positions.mask.tsv.gz", sep="\t", index=False, compression='gzip')
+    LOG.info(f"Mask matrix saved to: flagged_positions.mask.tsv.gz")
+
+
+@click.command()
+@click.option('--bed-files', multiple=True, required=True, type=click.Path(exists=True),
+              help='Sample-specific flagged position BED files')
+def main(bed_files: tuple):
+    """
+    Create mask matrix from sample BED files.
+    """
+    try:
+        create_mask_matrix(list(bed_files))
+    except Exception as e:
+        LOG.error(f"Error creating mask matrix: {e}")
+        raise
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/bin/filter_cohort.py b/bin/filter_cohort.py
index c5b5837f..378d22c9 100755
--- a/bin/filter_cohort.py
+++ b/bin/filter_cohort.py
@@ -1,199 +1,422 @@
 #!/usr/bin/env python
 
+"""
+Filter Cohort - MAF Processing and Flagging Script
+
+This script processes a Mutation Annotation Format (MAF) file to filter variants by specific criteria and 
+generates a final filtered MAF.
+
+Command-line Arguments
+----------------------
+maf_df-file : str
+    Path to the gzipped input MAF file.
+sample_name : str
+    Output sample name.
+repetitive-variant-threshold : int
+    Minimum occurrences threshold to flag a repetitive variant.
+somatic-vaf-boundary : float
+    VAF threshold to classify somatic mutations.
+n-rich-cohort-proportion : float
+    Proportion threshold for n-rich cohort filtering.
+
+Authors
+-------
+Author  : Ferriol Calvet (@FerriolCalvet)
+Email   : ferriol.calvet@irbbarcelona.org
+
+Contributors
+------------
+- Raquel Blanco - @rblancomi (raquel.blanco@irbbarcelona.org)
+- Federica Brando - @FedericaBrando (federica.brando@irbbarcelona.org)
+- Marta Huertas - @m-huertasp (marta.huertas@irbbarcelona.org)
+
+Usage
+-----
+python filter_cohort.py \\
+    --maf-df-file input.maf.tsv.gz \\
+    --sample-name sample1 \\
+    --repetitive-variant-threshold 5 \\
+    --somatic-vaf-boundary 0.4 \\
+    --n-rich-cohort-proportion 0.1
+
+"""
+import logging
+from pathlib import Path
 
 import click
 import pandas as pd
 from utils import add_filter
 from read_utils import custom_na_values
-
-@click.command()
-@click.option('--maf-df-file', required=True, type=click.Path(exists=True), help='Input gzipped MAF file (TSV)')
-@click.option('--sample-name', required=True, type=str, help='Sample name for output file')
-@click.option('--repetitive-variant-threshold', required=True, type=int, help='Threshold for repetitive variants')
-@click.option('--somatic-vaf-boundary', required=True, type=float, help='VAF boundary for somatic variants')
-@click.option('--n-rich-cohort-proportion', required=True, type=float, help='Proportion for n-rich cohort filtering')
-@click.option('--vaf-ns-threshold', required=False, type=float, default=0.1, help='VAF of Ns threshold for filtering variants')
-def main(maf_df_file, sample_name, repetitive_variant_threshold, somatic_vaf_boundary, n_rich_cohort_proportion, vaf_ns_threshold):
-    maf_df = pd.read_csv(maf_df_file, compression='gzip', header=0, sep='\t', na_values=custom_na_values)
-
-    #######
-    ###  Filter repetitive variants,
-    ###     both based on frequency and including information on position in read
-    #######
-
+from utils_filter import expand_filter_column
+
+# Logging
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s - %(message)s", level=logging.DEBUG, datefmt="%m/%d/%Y %I:%M:%S %p"
+)
+LOG = logging.getLogger("filter_cohort")
+
+def flag_repetitive_variants(maf_df: pd.DataFrame,
+                             repetitive_variant_threshold: int,
+                             somatic_vaf_boundary: float) -> pd.DataFrame:
+    """
+    Flags filter column for repetitive variants from the MAF dataframe. A variant is considered repetitive if it appears in at least
+    ``repetitive_variant_threshold`` samples. Additionally, variants that consistently appear at the same position in reads
+    are also flagged as ``repetitive_mapping_variant``.
+
+    Parameters
+    ----------
+    maf_df : pd.DataFrame
+        MAF dataframe
+    repetitive_variant_threshold : int
+        Minimum number of samples a variant must appear in to be considered repetitive
+    somatic_vaf_boundary : float
+        VAF threshold to classify somatic mutations
+
+    Returns
+    -------
+    pandas.DataFrame
+        MAF dataframe with a new column 'repetitive_variant' that flags repetitive variants
+    """
     max_samples = len(pd.unique(maf_df["SAMPLE_ID"]))
 
     n_samples = list(range(repetitive_variant_threshold, max_samples + 1))
-    if len(n_samples) == 0:
-        print("Not enough samples to identify potential repetitive variants!")
-
-    else:
-
-        # work with already filtered df + somatic only to explore potential artifacts
-        # take only variant and sample info from the df
-        maf_df_f_somatic = maf_df.loc[maf_df["VAF"] <= somatic_vaf_boundary][["MUT_ID","SAMPLE_ID", "PMEAN", "PSTD"]].reset_index(drop = True)
-
-        # add counter column
-        maf_df_f_somatic["count"] = 1
-        maf_df_f_somatic_pivot = maf_df_f_somatic.groupby("MUT_ID")["count"].sum().reset_index()
-
-        repetitive_variants = maf_df_f_somatic_pivot[maf_df_f_somatic_pivot["count"] >= repetitive_variant_threshold]["MUT_ID"]
-        print("Repetitive variants: ", len(repetitive_variants))
-
-        maf_df["repetitive_variant"] = maf_df["MUT_ID"].isin(repetitive_variants)
-
-        maf_df["FILTER"] = maf_df[["FILTER","repetitive_variant"]].apply(lambda x: add_filter(x["FILTER"], x["repetitive_variant"], "repetitive_variant"),
-                                                                            axis = 1
-                                                                        )
-        maf_df = maf_df.drop("repetitive_variant", axis = 1)
-
-
-
-        # use the position in read information to filter repetitive variants with a fixed position (likely artifacts)
-        maf_df_f_somatic_pos_info = maf_df_f_somatic[~(maf_df_f_somatic["PMEAN"].isna()) & 
-                                                        (maf_df_f_somatic["PMEAN"] != -1) &
-                                                        (maf_df_f_somatic["PSTD"] == 0)]
-        
-        if maf_df_f_somatic_pos_info.shape[0] > 0:
-            maf_df_f_somatic_compiled_pos = maf_df_f_somatic_pos_info.groupby("MUT_ID")["PMEAN"].nunique().reset_index()
-
-            variants_with_rep_position = maf_df_f_somatic_compiled_pos[(maf_df_f_somatic_compiled_pos["PMEAN"] == 1)]["MUT_ID"]
-            print("Variants always found in the same position: ", len(variants_with_rep_position))
-
-            variants_with_rep_position = set(variants_with_rep_position).intersection(set(repetitive_variants))
-            print("Repetitive variants always found in the same position: ", len(variants_with_rep_position))
-
-            maf_df["repetitive_mapping_variant"] = maf_df["MUT_ID"].isin(variants_with_rep_position)
-
-            maf_df["FILTER"] = maf_df[["FILTER","repetitive_mapping_variant"]].apply(lambda x: add_filter(x["FILTER"], x["repetitive_mapping_variant"], "repetitive_mapping_variant"),
-                                                                                axis = 1
-                                                                            )
-            maf_df = maf_df.drop("repetitive_mapping_variant", axis = 1)
-
-
-
-
-    #######
-    ###  Filter cohort_n_rich
-    #######
+    if n_samples == 0:
+        LOG.warning("Not enough samples to identify potential repetitive variants!")
+
+        return maf_df
+
+    # Work with already filtered df + somatic only to explore potential artifacts
+    # take only variant and sample info from the df
+    maf_df_f_somatic = maf_df.loc[maf_df["VAF"] <= somatic_vaf_boundary][["MUT_ID","SAMPLE_ID", "PMEAN", "PSTD"]].reset_index(drop = True)
+
+    # Group by 'MUT_ID' and count occurrences
+    maf_df_f_somatic_pivot = maf_df_f_somatic.groupby("MUT_ID").size().reset_index(name="count")
+
+    # Store repetitive variants
+    repetitive_variants = maf_df_f_somatic_pivot[maf_df_f_somatic_pivot["count"] >= repetitive_variant_threshold]["MUT_ID"]
+    LOG.info("%s repetitive_variants", len(repetitive_variants))
+
+    # Flag repetitive variants in the original dataframe
+    maf_df["repetitive_variant"] = maf_df["MUT_ID"].isin(repetitive_variants)
+    maf_df["FILTER"] = maf_df[["FILTER", "repetitive_variant"]].apply(
+        lambda x: add_filter(x["FILTER"], x["repetitive_variant"], "repetitive_variant"), axis=1
+    )
+    maf_df = maf_df.drop("repetitive_variant", axis=1)
+
+    # Use the position in read information to filter repetitive variants with a fixed position (likely artifacts)
+    maf_df_f_somatic_pos_info = maf_df_f_somatic[~(maf_df_f_somatic["PMEAN"].isna()) & 
+                                                    (maf_df_f_somatic["PMEAN"] != -1) &
+                                                    (maf_df_f_somatic["PSTD"] == 0)]
+    
+    # Check if there are any repetitive variants with a fixed position
+    if maf_df_f_somatic_pos_info.shape[0] == 0:
+        LOG.info("No repetitive variants with fixed position found.")
+        return maf_df
+
+    # Count unique PMEAN values for each MUT_ID
+    maf_df_f_somatic_compiled_pos = maf_df_f_somatic_pos_info.groupby("MUT_ID")["PMEAN"].nunique().reset_index()
+
+    # Identify variants always found in the same position -> only one PMEAN value
+    variants_with_rep_position = maf_df_f_somatic_compiled_pos[(maf_df_f_somatic_compiled_pos["PMEAN"] == 1)]["MUT_ID"]
+    LOG.info("Variants always found in the same position: %d", len(variants_with_rep_position))
+
+    # Intersect with previously identified repetitive variants
+    variants_with_rep_position = set(variants_with_rep_position).intersection(set(repetitive_variants))
+    LOG.info("Repetitive variants always found in the same position: %d", len(variants_with_rep_position))
+
+    # Flag these variants in the maf dataframe
+    maf_df["repetitive_mapping_variant"] = maf_df["MUT_ID"].isin(variants_with_rep_position)
+    LOG.info("%s muts flagged as repetitive_mapping_variant", maf_df["repetitive_mapping_variant"].sum())
+    
+    maf_df["FILTER"] = maf_df[["FILTER","repetitive_mapping_variant"]].apply(lambda x: add_filter(x["FILTER"], x["repetitive_mapping_variant"], "repetitive_mapping_variant"),
+                                                                        axis = 1
+                                                                    )
+    maf_df = maf_df.drop("repetitive_mapping_variant", axis = 1)
+
+    return maf_df
+
+def flag_cohort_n_rich(maf_df: pd.DataFrame,
+                       n_rich_cohort_proportion: float,
+                       somatic_vaf_boundary: float) -> pd.DataFrame:
+    """
+    Flags FILTER column for cohort_n_rich variants from the MAF dataframe. 
+
+    Parameters
+    ----------
+    maf_df : pandas.DataFrame
+        MAF dataframe
+    n_rich_cohort_proportion : float
+        Proportion of samples to consider a variant as cohort_n_rich
+    somatic_vaf_boundary : float
+        VAF threshold to classify somatic mutations
+
+    Returns
+    -------
+    maf_df : pandas.DataFrame
+        MAF dataframe with cohort_n_rich variants flagged
+    """
+    LOG.info("Flagging cohort_n_rich...")
 
     max_samples = len(pd.unique(maf_df["SAMPLE_ID"]))
-
     if max_samples < 2:
-        print("Not enough samples to identify cohort_n_rich mutations!")
-
-    else:
-        number_of_samples = max(2, (max_samples * n_rich_cohort_proportion) // 1)
-        print(f"flagging mutations that are n_rich in at least: {number_of_samples} samples as cohort_n_rich")
-
-        # work with already filtered df + somatic only to explore potential artifacts
-        # take only variant and sample info from the df
-        maf_df_f_somatic = maf_df[["MUT_ID", "SAMPLE_ID", "VAF_Ns", "FILTER"]].reset_index(drop = True)
-
-        n_rich_vars_df = maf_df_f_somatic[maf_df_f_somatic["FILTER"].str.contains("n_rich")].groupby("MUT_ID")[
-                                                ['SAMPLE_ID', 'VAF_Ns']
-                                            ].agg({'SAMPLE_ID' : len, 'VAF_Ns' : min})
-        n_rich_vars_df = n_rich_vars_df.rename({'SAMPLE_ID' : 'N_rich_frequency', 'VAF_Ns' : 'VAF_Ns_threshold'}, axis = 'columns')
-
-        n_rich_vars = list(n_rich_vars_df[n_rich_vars_df['N_rich_frequency'] >= number_of_samples].index)
-
-        maf_df["cohort_n_rich"] = maf_df["MUT_ID"].isin(n_rich_vars)
-
-        maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich"], "cohort_n_rich"),
-                                                                            axis = 1
-                                                                        )
-        maf_df = maf_df.drop("cohort_n_rich", axis = 1)
-
-
-
-        # if the variant appeared flagged as n_rich in a single sample it is also filtered out from all other samples
-        n_rich_vars_uni = list(n_rich_vars_df[n_rich_vars_df['N_rich_frequency'] > 0].index)
-
-        maf_df["cohort_n_rich_uni"] = maf_df["MUT_ID"].isin(n_rich_vars_uni)
-
-        maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich_uni"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich_uni"], "cohort_n_rich_uni"),
+        LOG.warning("Not enough samples to identify cohort_n_rich mutations!")
+        return maf_df
+    
+    number_of_samples = max(2, (max_samples * n_rich_cohort_proportion) // 1)
+    LOG.info(f"Flagging mutations that are n_rich in at least: {number_of_samples} samples as cohort_n_rich")
+
+    # Work with already filtered df to explore potential artifacts
+    # take only variant and sample info from the df.
+    maf_df_f = maf_df[["MUT_ID", "SAMPLE_ID", "VAF_Ns", "FILTER"]].reset_index(drop = True)
+
+    # Aggregate n_rich variants
+    n_rich_vars_df = (
+        maf_df_f[maf_df_f["FILTER"].str.contains("n_rich")]
+        .groupby("MUT_ID")
+        .agg(
+            N_rich_frequency=('SAMPLE_ID', 'count'),
+            VAF_Ns_threshold=('VAF_Ns', 'min')
+        )
+        )
+    
+    # Flag variants that are n_rich in at least number_of_samples samples -> cohort_n_rich
+    n_rich_vars = set(n_rich_vars_df[n_rich_vars_df['N_rich_frequency'] >= number_of_samples].index)
+
+    maf_df["cohort_n_rich"] = maf_df["MUT_ID"].isin(n_rich_vars)
+    LOG.info("%s muts flagged as cohort_n_rich", maf_df["cohort_n_rich"].sum())
+
+    maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich"], "cohort_n_rich"),
                                                                             axis = 1
                                                                         )
-        maf_df = maf_df.drop("cohort_n_rich_uni", axis = 1)
-
-
-        # if the variant appeared flagged as n_rich in a single sample it is also filtered out from all other samples
-        maf_df = maf_df.merge(n_rich_vars_df, on = 'MUT_ID', how = 'left')
-        maf_df['N_rich_frequency'] = maf_df['N_rich_frequency'].fillna(0)
-        maf_df['VAF_Ns_threshold'] = maf_df['VAF_Ns_threshold'].fillna(1.1)
-
-        maf_df["cohort_n_rich_threshold"] = maf_df["VAF_Ns"] >= maf_df['VAF_Ns_threshold']
-
-        maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich_threshold"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich_threshold"], "cohort_n_rich_threshold"),
-                                                                            axis = 1
-                                                                        )
-        maf_df = maf_df.drop("cohort_n_rich_threshold", axis = 1)
-
-
-        # flag variants that have a proportion of Ns higher than vaf_ns_threshold
-        maf_df["high_n_vaf"] = maf_df[["VAF_Ns", "VAF_Ns_AM"]].ge(vaf_ns_threshold).any(axis=1)
-        maf_df["FILTER"] = maf_df[["FILTER","high_n_vaf"]].apply(lambda x: add_filter(x["FILTER"], x["high_n_vaf"], "high_n_vaf"),
-                                                                            axis = 1
-                                                                        )
-        maf_df = maf_df.drop("high_n_vaf", axis = 1)
-
-
-
-
-
-    #######
-    ###  Filter other sample's SNP
-    #######
-
-    # this is if we were to consider both unique and no-unique variants
+    
+    # Flag variants that are n_rich in at least 1 sample -> cohort_n_rich_uni
+    n_rich_vars_uni = set(n_rich_vars_df[n_rich_vars_df['N_rich_frequency'] > 0].index)
+
+    maf_df["cohort_n_rich_uni"] = maf_df["MUT_ID"].isin(n_rich_vars_uni)
+    LOG.info("%s muts flagged as cohort_n_rich_uni", maf_df["cohort_n_rich_uni"].sum())
+
+    maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich_uni"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich_uni"], "cohort_n_rich_uni"),
+                                                                        axis = 1
+                                                                    )
+    
+    # Flag variants that exceed the VAF_Ns threshold -> cohort_n_rich_threshold
+    maf_df = maf_df.merge(n_rich_vars_df, on = 'MUT_ID', how = 'left')
+    maf_df['N_rich_frequency'] = maf_df['N_rich_frequency'].fillna(0)
+    maf_df['VAF_Ns_threshold'] = maf_df['VAF_Ns_threshold'].fillna(1.1)
+
+    maf_df["cohort_n_rich_threshold"] = maf_df["VAF_Ns"] >= maf_df['VAF_Ns_threshold']
+    LOG.info("%s muts flagged as cohort_n_rich_threshold", maf_df["cohort_n_rich_threshold"].sum())
+
+    maf_df["FILTER"] = maf_df[["FILTER","cohort_n_rich_threshold"]].apply(lambda x: add_filter(x["FILTER"], x["cohort_n_rich_threshold"], "cohort_n_rich_threshold"),
+                                                                        axis = 1
+                                                                    )
+    # Drop temporary columns
+    maf_df = maf_df.drop(["cohort_n_rich", "cohort_n_rich_uni", "cohort_n_rich_threshold"], axis = 1)
+ 
+    return maf_df
+
+def flag_other_samples_snp(maf_df,
+                           somatic_vaf_boundary: float) -> pd.DataFrame:
+    """
+    Filters out SNPs from other samples from the MAF dataframe
+
+    Parameters
+    ----------
+    maf_df : pandas.DataFrame
+        MAF dataframe
+    somatic_vaf_boundary : float
+        VAF boundary to consider a variant as somatic
+
+    Returns
+    -------
+    maf_df : pandas.DataFrame
+        MAF dataframe with other sample SNPs flagged
+    """
+    LOG.info("Flagging SNPs from other samples...")
+    # Get all germline variants from all samples, consider both unique and non-unique variants
     germline_vars_all_samples = maf_df.loc[(maf_df["VAF"] > somatic_vaf_boundary) &
-                                        (maf_df["VAF_AM"] > somatic_vaf_boundary) &
-                                        (maf_df["vd_VAF"] > somatic_vaf_boundary),
-                                        "MUT_ID"].unique()
-    print(len(germline_vars_all_samples), "using all germline variants of all samples")
-
+                                    (maf_df["VAF_AM"] > somatic_vaf_boundary) &
+                                    (maf_df["vd_VAF"] > somatic_vaf_boundary),
+                                    "MUT_ID"].unique()
+    
+    LOG.info(f"Using all germline variants of all samples, total: {len(germline_vars_all_samples)} variants.")
 
+    # Identify variants that are germline in other samples but somatic in the current sample
     maf_df["other_sample_SNP"] = False
     maf_df.loc[(maf_df["MUT_ID"].isin(germline_vars_all_samples)) &
-                (maf_df["VAF"] <= somatic_vaf_boundary), "other_sample_SNP"] = True
+            (maf_df["VAF"] <= somatic_vaf_boundary), "other_sample_SNP"] = True
+    LOG.info("%s muts flagged as other_sample_SNP", maf_df['other_sample_SNP'].sum())
 
+    # Flag variants that are germline in other samples but somatic in the current sample
     maf_df["FILTER"] = maf_df[["FILTER","other_sample_SNP"]].apply(
                                                     lambda x: add_filter(x["FILTER"], x["other_sample_SNP"], "other_sample_SNP"),
                                                     axis = 1
                                                 )
     maf_df = maf_df.drop("other_sample_SNP", axis = 1)
 
+    return maf_df
 
+def flag_gnomad_snp(maf_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Flags gnomAD SNPs in the MAF dataframe
 
-    #######
-    ###  Filter gnomad SNP
-    #######
+    Parameters
+    ----------
+    maf_df : pandas.DataFrame
+        MAF dataframe
 
+    Returns
+    -------
+    maf_df : pandas.DataFrame
+        MAF dataframe with gnomAD SNPs flagged
+    """
+    LOG.info("Flagging gnomAD SNPs...")
+
+    # Flag gnomAD SNPs
     if "gnomAD_SNP" in maf_df.columns:
         maf_df["gnomAD_SNP"] = maf_df["gnomAD_SNP"].replace({"True": True, "False": False, '-' : False}).fillna(False).astype(bool)
-        print("Out of ", maf_df["gnomAD_SNP"].shape[0], "positions", maf_df["gnomAD_SNP"].sum(), "are gnomAD SNPs (>0.1)")
+        LOG.info("Out of %d positions, %d are gnomAD SNPs", maf_df["gnomAD_SNP"].shape[0], maf_df["gnomAD_SNP"].sum())
+        
         maf_df["FILTER"] = maf_df[["FILTER","gnomAD_SNP"]].apply(
                                                                     lambda x: add_filter(x["FILTER"], x["gnomAD_SNP"], "gnomAD_SNP"),
                                                                     axis = 1
                                                                 )
+    
+    maf_df = maf_df.drop("gnomAD_SNP", axis = 1)
 
-    if 'VAF_distorted_expanded_sq' in maf_df.columns:
-        maf_df["FILTER"] = maf_df[["FILTER","VAF_distorted_expanded_sq"]].apply(lambda x: add_filter(x["FILTER"], x["VAF_distorted_expanded_sq"], "VAF_distorted_expanded_sq"),
-                                                                                        axis = 1
-                                                                                    )
+    return maf_df
+
+def flag_vaf_ns_threshold(maf_df: pd.DataFrame, vaf_ns_threshold: float) -> pd.DataFrame:
+
+    """
+    Flag variants that have a proportion of Ns higher than vaf_ns_threshold
+
+    Parameters
+    ----------
+    maf_df : pandas.DataFrame
+        MAF dataframe
+    vaf_ns_threshold : float
+        VAF of Ns threshold for filtering variants
+
+    Returns
+    -------
+    maf_df : pandas.DataFrame
+        MAF dataframe with variants exceeding VAF_Ns threshold flagged
+    """
+    LOG.info("Flagging variants with proportion of Ns higher than VAF_Ns threshold...")
+
+    maf_df["high_n_vaf"] = maf_df[["VAF_Ns", "VAF_Ns_AM"]].ge(vaf_ns_threshold).any(axis=1)
+    LOG.info("%s muts flagged as high_n_vaf", maf_df["high_n_vaf"].sum())
+
+    maf_df["FILTER"] = maf_df[["FILTER","high_n_vaf"]].apply(
+                                                                lambda x: add_filter(x["FILTER"], x["high_n_vaf"], "high_n_vaf"),
+                                                                axis = 1
+                                                            )
+    maf_df = maf_df.drop("high_n_vaf", axis = 1)
+
+    return maf_df
 
-    for filt in pd.unique(maf_df["FILTER"].str.split(";").explode()):
-        maf_df[f"FILTER.{filt}"] = maf_df["FILTER"].apply(lambda x: filt in x.split(";"))
+def flag_distorted_expanded(maf_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    If there is a column named VAF_distorted_expanded_sq, add a filter flag for variants with distorted VAF distribution.
 
-    for filtt in [ "not_covered", "not_in_exons"]:
-        if f"FILTER.{filtt}" not in maf_df.columns:
-            maf_df[f"FILTER.{filtt}"] = False
+    Parameters
+    ----------
+    maf_df : pandas.DataFrame
+        MAF dataframe
 
+    Returns
+    -------
+    maf_df : pandas.DataFrame
+        MAF dataframe with variants having distorted VAF distribution flagged
+    """
+    LOG.info("Flagging variants with distorted VAF distribution...")
 
+    if 'VAF_distorted_expanded_sq' in maf_df.columns:
+        maf_df["FILTER"] = maf_df[["FILTER","VAF_distorted_expanded_sq"]].apply(
+                                                lambda x: add_filter(x["FILTER"], x["VAF_distorted_expanded_sq"], "VAF_distorted_expanded_sq"),
+                                                axis = 1
+                                                                                )
+
+    return maf_df
+
+def flag_maf(maf_df: pd.DataFrame, sample_name: str,
+               repetitive_variant_threshold: int,
+               somatic_vaf_boundary: float,
+               n_rich_cohort_proportion: float,
+               vaf_ns_threshold: float) -> None:
+    """
+    Script to process a MAF (Mutation Annotation Format) file.
+    It filters out repetitive variants, cohort_n_rich variants, and SNPs from other samples.
+
+    Parameters
+    ----------
+    maf_df : pd.DataFrame
+        Input MAF dataframe
+    sample_name : str
+        Name for the output sample
+    repetitive_variant_threshold : int
+        Minimum number of occurrences to flag a variant as repetitive
+    somatic_vaf_boundary : float
+        VAF threshold to distinguish somatic mutations
+    n_rich_cohort_proportion : float
+        Proportion threshold for n-rich cohort filtering
+    vaf_ns_threshold : float
+        VAF of Ns threshold for filtering variants
+    """
+    ## Filter repetitive variants,
+    # both based on frequency and including information on position in read
+    maf_df = flag_repetitive_variants(maf_df, repetitive_variant_threshold, somatic_vaf_boundary)
+
+    ## Filter cohort_n_rich variants
+    maf_df = flag_cohort_n_rich(maf_df, n_rich_cohort_proportion, somatic_vaf_boundary)
+
+    ## Filter variants with high VAF of Ns
+    maf_df = flag_vaf_ns_threshold(maf_df, vaf_ns_threshold)
+
+    ## Filter other sample's SNP
+    maf_df = flag_other_samples_snp(maf_df, somatic_vaf_boundary)
+
+    ## Filter gnomad SNP
+    maf_df = flag_gnomad_snp(maf_df)
+
+    ## Optionally, flag variants with distorted VAF distribution if the corresponding column is present
+    maf_df = flag_distorted_expanded(maf_df)
+
+    ## Expand FILTER column
+    maf_df = expand_filter_column(maf_df)
+
+    ## Save final filtered MAF
     maf_df.to_csv(f"{sample_name}.cohort.filtered.tsv.gz",
                   sep = "\t",
                   header = True,
                   index = False)
+    
+    LOG.info("Cohort flagging complete!")
+
+@click.command()
+@click.option('--maf-df-file', required=True, type=click.Path(exists=True), help='Input gzipped MAF file (TSV)')
+@click.option('--sample-name', required=True, type=str, help='Sample name for output file')
+@click.option('--repetitive-variant-threshold', required=True, type=int, help='Threshold for repetitive variants')
+@click.option('--somatic-vaf-boundary', required=True, type=float, help='VAF boundary for somatic variants')
+@click.option('--n-rich-cohort-proportion', required=True, type=float, help='Proportion for n-rich cohort filtering')
+@click.option('--vaf-ns-threshold', required=False, type=float, default=0.1, help='VAF of Ns threshold for filtering variants')
+def main(maf_df_file: str, sample_name: str, repetitive_variant_threshold: int,
+         somatic_vaf_boundary: float, n_rich_cohort_proportion: float, vaf_ns_threshold: float):
+    """
+    CLI wrapper for flag_maf function.
+    """
+    # Load MAF dataframe
+    maf_df = pd.read_csv(maf_df_file, compression='gzip', header=0, sep='\t', na_values=custom_na_values)
+    LOG.debug(f"{maf_df_file}")
+    # Flag MAF file
+    flag_maf(maf_df,
+        sample_name,
+        repetitive_variant_threshold,
+        somatic_vaf_boundary, 
+        n_rich_cohort_proportion,
+        vaf_ns_threshold)
+    
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file
diff --git a/bin/merge_annotation_depths.py b/bin/merge_annotation_depths.py
index 3f9294ad..cdbf5af1 100755
--- a/bin/merge_annotation_depths.py
+++ b/bin/merge_annotation_depths.py
@@ -1,81 +1,200 @@
 #!/usr/bin/env python
 
+"""
+Merge annotation depths - Depth Annotation and Flagging Script
+
+This script processes a depths annotation file to merge and annotate depth information across samples.
+
+Command-line Arguments
+----------------------
+
+Authors
+-------
+Author  : Ferriol Calvet (@FerriolCalvet)
+Email   : ferriol.calvet@irbbarcelona.org
+
+Contributors
+------------
+- Federica Brando - @FedericaBrando (federica.brando@irbbarcelona.org)
+- Marta Huertas - @m-huertasp (marta.huertas@irbbarcelona.org)
+"""
 
 import click
 import json
-import pandas as pd
+import subprocess
+import polars as pl
+import logging
 
-def annotate_depths(annotation_file, depths_file, json_f, input_file):
+# Logging
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s - %(message)s", level=logging.DEBUG, datefmt="%m/%d/%Y %I:%M:%S %p"
+)
+LOG = logging.getLogger("merge_annotation_depths")
+
+# Globals
+COLS = ["CHROM", "POS", "CONTEXT"]
+
+# Functions
+def preprocess(annotation_file: str, depths_file: str) -> tuple[pl.DataFrame, list]:
     """
-    INFO
+    Merge annotation and depth files.
+
+    Parameters
+    ----------
+    annotation_file : str
+        Path to the annotation file.
+    depths_file : str
+        Path to the depths file.
+
+    Returns
+    -------
+    pl.DataFrame
+        Merged DataFrame with annotations and depths.
+    list
+        List of sample column names without suffixes.
     """
+    LOG.info("Preprocessing annotation and depths files...")
+    _depths = pl.read_csv(depths_file, separator="\t")
+    _annots = pl.read_csv(annotation_file, separator="\t")
 
-    raw_depths = pd.read_csv(depths_file, sep = "\t", header = 0)
-    annotations = pd.read_csv(annotation_file, sep = "\t", header = 0)
+    annot_depth = _depths.join(_annots, on=["CHROM", "POS"], how='left').fill_null(pl.lit('-'))
 
-    annotated_depths = raw_depths.merge(annotations, on = ["CHROM", "POS"], how = 'left')
+    sample_columns = [col for col in annot_depth.columns if col not in COLS]
 
-    del raw_depths
-    del annotations
+    # Dict to remove the .*.bam suffix if there is one
+    rename_map = {col: col.split('.')[0] if "bam" in col else col for col in sample_columns}
 
-    print(annotated_depths.head())
+    # Ensure all columns but CHROM and CONTEXT are numeric
+    annot_depth = annot_depth.with_columns([
+        pl.col(col).cast(pl.Int64) if col not in ["CHROM", "CONTEXT"] else pl.col(col)
+        for col in annot_depth.columns
+    ])
 
-    annotated_depths["CONTEXT"] = annotated_depths["CONTEXT"].fillna('-')
-    sample_columns = [x for x in annotated_depths.columns if x not in ["CHROM", "POS", "CONTEXT"] ]
-    annotated_depths = annotated_depths[["CHROM", "POS", "CONTEXT"] + sample_columns]
+    LOG.info("Samples: %s", list(rename_map.values()))  # List of samples without the .*.bam suffix
 
-    input_csv = pd.read_table(input_file, sep = ',', header = 0)
-    if 'bam' in input_csv.columns:  
-        bam_basenames = input_csv["bam"].astype(str).str.split("/").str[-1]  
-        bam2sample_dict = dict(zip(bam_basenames, input_csv["sample"].astype(str)))  
+    # Place COLS=[CHROM, POS, CONTEXT] columns at the beginning then the rest of the columns
+    return annot_depth.select(COLS + sample_columns).rename(rename_map), list(rename_map.values())
 
-    else:  
-        samples = input_csv["sample"].astype(str)  
-        bam2sample_dict = dict(zip(samples, samples))  
+def apply_mask_matrix(annotated_depths: pl.DataFrame, mask_matrix_file: str) -> pl.DataFrame:
+    """
+    Apply position per sample mask matrix to depths.
     
-    sample_columns_correct = [ bam2sample_dict[str(x)] for x in sample_columns ]
-    annotated_depths.columns = ["CHROM", "POS", "CONTEXT"] + sample_columns_correct
-    annotated_depths.to_csv("all_samples_indv.depths.tsv.gz",
-                                header=True,
-                                index=False,
-                                sep="\t")
-
-    if json_f:
-        with open(json_f, 'r') as file:
-            groups_info = json.load(file)
-
-        for group_name, samples in groups_info.items():
-            annotated_depths[group_name] = annotated_depths.loc[:,samples].sum(axis=1)
-            annotated_depths[["CHROM", "POS", "CONTEXT", group_name]].to_csv(f"{group_name}.depths.annotated.tsv.gz",
-                                                                                sep = "\t",
-                                                                                header = True,
-                                                                                index = False)
+    Sets depth = 0 where mask value is 0 (position should be masked).
+    Keeps depth as-is where mask value is 1 (position should be kept).
+    
+    Parameters
+    ----------
+    annotated_depths : pl.DataFrame
+        Depths dataframe with CHROM, POS, CONTEXT, and sample columns
+    mask_matrix_file : str
+        Path to mask matrix file (TSV, gzipped)
+    
+    Returns
+    -------
+    pl.DataFrame
+        Depths with masked positions (where mask=0) set to 0
+    """
+    LOG.info("Loading mask matrix...")
+    mask_df = pl.read_csv(mask_matrix_file, separator="\t")
+    
+    if mask_df.is_empty():
+        LOG.info("Mask matrix is empty, no masking applied")
+        return annotated_depths
+    
+    LOG.info(f"Mask matrix loaded: {len(mask_df)} positions")
+    
+    
+    # Obtain sample columns present in both annotated_depths and mask_df
+    sample_cols = [col for col in mask_df.columns if col in annotated_depths.columns and col not in ["CHROM", "POS"]]
+    
+    LOG.info(f"Applying mask to {len(sample_cols)} samples")
+    
+    # Join on CHROM and POS. For each sample, multiply depth by mask (0 or 1). If there is no mask, keep original depth (multiply by 1).
+    annotated_depths_filtered = (
+        annotated_depths.join(
+            mask_df.select(["CHROM", "POS", *sample_cols]), 
+            on=["CHROM", "POS"], 
+            how="left", 
+            suffix="_mask"
+        )
+        .with_columns([
+            # If the mask is missing (null), we fill with 1 to keep original depth
+            (pl.col(s) * pl.col(f"{s}_mask").fill_null(1)).alias(s)
+            for s in sample_cols
+        ])
+        .drop([f"{col}_mask" for col in sample_cols])
+    )
+    
+    return annotated_depths_filtered
 
-    else:
-        for sample in sample_columns_correct:
-            annotated_depths[["CHROM", "POS", "CONTEXT", f"{sample}"]].to_csv(f"{sample}.depths.annotated.tsv.gz",
-                                                                                sep = "\t",
-                                                                                header = True,
-                                                                                index = False)
+def output_annotate_dephts(annotated_depths: pl.DataFrame, json_f: str, samples: list):
+    """
+    Output annotated depths file
+
+    Parameters
+    ----------
+    annotated_depths : pl.DataFrame
+        Annotated depths dataframe
+    json_f : str
+        JSON file with groups information
+    samples : list
+        List of samples
+    """
+    LOG.info("Outputting annotated depths file...")
 
-        annotated_depths["all_samples"] = annotated_depths.iloc[:,3:].sum(axis=1)
-        annotated_depths[["CHROM", "POS", "CONTEXT", "all_samples"]].to_csv(f"all_samples.depths.annotated.tsv.gz",
-                                                                            sep = "\t",
-                                                                            header = True,
-                                                                            index = False)
+    # Output annotated depths file for all samples
+    # Polars functionality to write gzipped files is currently not working, so we write the file and then gzip it with subprocess
+    temp_file = "all_samples_indv.depths.tsv"
+    annotated_depths.write_csv(temp_file, include_header=True, separator="\t")
+    subprocess.run(["gzip", "-f", temp_file], check=True)
 
+    try:
+        with open(json_f, 'r') as file:
+            groups_info = json.load(file)
+            LOG.info("JSON file found. Outputting annotated depths file for each group.")
 
+        # Per group
+        for group_name, samples in groups_info.items():
+            annotated_depths = annotated_depths.with_columns(
+                pl.sum_horizontal([pl.col(sample) for sample in samples]).alias(group_name)
+            )
+            temp_file = f"{group_name}.depths.annotated.tsv"
+            annotated_depths.select(COLS + [group_name]).write_csv(temp_file, separator="\t", include_header=True)
+            subprocess.run(["gzip", "-f", temp_file], check=True)
+    except (TypeError, FileNotFoundError):
+        LOG.warning("JSON file not found. Outputting annotated depths file for each sample.")
+        
+        # Per sample
+        for sample in samples:
+            temp_file = f"{sample}.depths.annotated.tsv"
+            annotated_depths.select(COLS + [str(sample)]).write_csv(temp_file, separator="\t", include_header=True)
+            subprocess.run(["gzip", "-f", temp_file], check=True)
+
+        # All the samples
+        annotated_depths = annotated_depths.with_columns(
+            pl.sum_horizontal(pl.exclude(COLS)).alias("all_samples"))
+        
+        temp_file = "all_samples.depths.annotated.tsv"
+        annotated_depths.select(COLS + ["all_samples"]).write_csv(temp_file, separator="\t", include_header=True)
+        subprocess.run(["gzip", "-f", temp_file], check=True)
 
 @click.command()
 @click.option('--annotation', type=click.Path(exists=True), help='Input annotation file')
 @click.option('--depths', type=click.Path(exists=True), help='Input depths file')
 @click.option('--json_file', type=click.Path(exists=True), help='JSON groups file')
-@click.option('--input_csv', type=click.Path(exists=True), help='Input CSV file')
-# @click.option('--output', type=click.Path(), help='Output annotated depths file')
+@click.option('--mask-matrix', type=click.Path(exists=True), help='Position per sample mask matrix file (1=keep, 0=mask)')
+def main(annotation, depths, json_file, mask_matrix):
+    LOG.info("Annotating depths file...")
+
+    # Preprocess annotation and depths files
+    annotated_depths, samples  = preprocess(annotation, depths)
+
+    # Apply position masking
+    annotated_depths = apply_mask_matrix(annotated_depths, mask_matrix)
+
+    output_annotate_dephts(annotated_depths, json_file, samples)
 
-def main(annotation, depths, json_file, input_csv):
-    click.echo(f"Annotating depths file...")
-    annotate_depths(annotation, depths, json_file, input_csv)
+    LOG.info("Done!")
 
 if __name__ == '__main__':
     main()
diff --git a/bin/plot_maf.py b/bin/plot_maf.py
index b4a70d53..d89287fb 100755
--- a/bin/plot_maf.py
+++ b/bin/plot_maf.py
@@ -9,7 +9,7 @@
 import seaborn as sns
 from matplotlib.backends.backend_pdf import PdfPages
 
-from utils import filter_maf
+from utils_filter import filter_maf
 from read_utils import custom_na_values
 
 
diff --git a/bin/subset_maf.py b/bin/subset_maf.py
index c4180df7..b01b71ee 100755
--- a/bin/subset_maf.py
+++ b/bin/subset_maf.py
@@ -5,7 +5,7 @@
 import json
 import pandas as pd
 
-from utils import filter_maf
+from utils_filter import filter_maf
 from read_utils import custom_na_values
 
 
diff --git a/bin/test/test_mask_matrix.py b/bin/test/test_mask_matrix.py
new file mode 100755
index 00000000..6ccc1d7a
--- /dev/null
+++ b/bin/test/test_mask_matrix.py
@@ -0,0 +1,561 @@
+#!/usr/bin/env python3
+"""
+Unit tests for create_mask_matrix.py
+
+Tests the functionality of creating position × sample mask matrices from BED files.
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+import unittest
+from pathlib import Path
+import polars as pl
+
+# Add the bin directory to the path to import the module
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from create_mask_matrix import create_mask_matrix
+from merge_annotation_depths import apply_mask_matrix
+
+
+class TestCreateMaskMatrix(unittest.TestCase):
+    """Test suite for create_mask_matrix.py"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.test_dir = tempfile.mkdtemp()
+        self.original_dir = os.getcwd()
+        os.chdir(self.test_dir)
+        
+    def tearDown(self):
+        """Clean up test fixtures"""
+        os.chdir(self.original_dir)
+        shutil.rmtree(self.test_dir)
+
+    def create_mock_bed_file(self, sample_name, positions):
+        """
+        Create a mock BED file with specified positions.
+        
+        Parameters
+        ----------
+        sample_name : str
+            Name of the sample
+        positions : list of tuples
+            List of (chrom, start, end, filter) tuples
+        """
+        bed_file = f"{sample_name}.flagged-pos.bed"
+        with open(bed_file, 'w') as f:
+            for chrom, start, end, filter_val in positions:
+                f.write(f"{chrom}\t{start}\t{end}\t{filter_val}\n")
+        return bed_file
+
+    def read_mask_matrix(self, filename="flagged_positions.mask.tsv.gz"):
+        """Read and return the mask matrix"""
+        return pl.read_csv(filename, separator="\t").to_pandas()
+
+    def test_single_sample_single_position(self):
+        """Test creating mask matrix with one sample and one position"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [("chr1", 100, 100, "n_rich")])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        # Check output exists
+        self.assertTrue(Path("flagged_positions.mask.tsv.gz").exists())
+        
+        # Check content
+        mask_df = self.read_mask_matrix()
+        self.assertEqual(len(mask_df), 1)
+        self.assertEqual(mask_df.iloc[0]['CHROM'], 'chr1')
+        self.assertEqual(mask_df.iloc[0]['POS'], 100)
+        self.assertEqual(mask_df.iloc[0]['sample1'], 0)  # Should be masked
+
+    def test_multiple_samples(self):
+        """Test creating mask matrix with multiple samples"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [("chr1", 100, 100, "n_rich")]),
+            self.create_mock_bed_file("sample2", [("chr1", 200, 200, "nanoseq_snp")]),
+            self.create_mock_bed_file("sample3", [("chr2", 300, 300, "nanoseq_noise")])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Should have 3 positions total
+        self.assertEqual(len(mask_df), 3)
+        
+        # Check that each sample has correct columns
+        self.assertIn('sample1', mask_df.columns)
+        self.assertIn('sample2', mask_df.columns)
+        self.assertIn('sample3', mask_df.columns)
+        
+        # Check masking pattern: position should be 0 for its sample, 1 for others
+        row1 = mask_df[(mask_df['CHROM'] == 'chr1') & (mask_df['POS'] == 100)].iloc[0]
+        self.assertEqual(row1['sample1'], 0)  # Masked
+        self.assertEqual(row1['sample2'], 1)  # Not masked
+        self.assertEqual(row1['sample3'], 1)  # Not masked
+
+    def test_overlapping_positions(self):
+        """Test when multiple samples have the same position flagged"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [("chr1", 100, 100, "n_rich")]),
+            self.create_mock_bed_file("sample2", [("chr1", 100, 100, "nanoseq_snp")]),
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Should have only 1 position
+        self.assertEqual(len(mask_df), 1)
+        
+        # Both samples should have 0 for this position
+        row = mask_df.iloc[0]
+        self.assertEqual(row['CHROM'], 'chr1')
+        self.assertEqual(row['POS'], 100)
+        self.assertEqual(row['sample1'], 0)
+        self.assertEqual(row['sample2'], 0)
+
+    def test_range_expansion(self):
+        """Test that ranges are expanded to individual positions (1-based inclusive)"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [("chr1", 100, 102, "nanoseq_noise")])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Should have 3 positions: 100, 101, 102 (1-based inclusive)
+        self.assertEqual(len(mask_df), 3)
+        positions = sorted(mask_df['POS'].tolist())
+        self.assertEqual(positions, [100, 101, 102])
+        
+        # All should be masked for sample1
+        for pos in [100, 101, 102]:
+            row = mask_df[mask_df['POS'] == pos].iloc[0]
+            self.assertEqual(row['sample1'], 0)
+
+    def test_empty_bed_file(self):
+        """Test handling of empty BED file"""
+        # Create an empty BED file
+        bed_file = "sample1.flagged-pos.bed"
+        with open(bed_file, 'w') as f:
+            pass  # Empty file
+        
+        bed_files = [bed_file]
+        create_mask_matrix(bed_files)
+        
+        # Should create empty matrix with just CHROM and POS columns
+        mask_df = self.read_mask_matrix()
+        self.assertEqual(len(mask_df), 0)
+        self.assertIn('CHROM', mask_df.columns)
+        self.assertIn('POS', mask_df.columns)
+
+    def test_multiple_chromosomes(self):
+        """Test handling of multiple chromosomes"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [
+                ("chr1", 100, 100, "n_rich"),
+                ("chr2", 200, 200, "nanoseq_snp"),
+                ("chrX", 300, 300, "nanoseq_noise")
+            ])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Should have 3 positions on different chromosomes
+        self.assertEqual(len(mask_df), 3)
+        chroms = sorted(mask_df['CHROM'].unique())
+        self.assertIn('chr1', chroms)
+        self.assertIn('chr2', chroms)
+        self.assertIn('chrX', chroms)
+
+    def test_sorting(self):
+        """Test that output is sorted by CHROM and POS"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [
+                ("chr2", 200, 200, "nanoseq_noise"),
+                ("chr1", 150, 150, "nanoseq_snp"),
+                ("chr1", 100, 100, "n_rich"),
+            ])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Check sorting
+        chroms = mask_df['CHROM'].tolist()
+        positions = mask_df['POS'].tolist()
+        
+        # chr1 should come before chr2
+        chr1_indices = [i for i, c in enumerate(chroms) if c == 'chr1']
+        chr2_indices = [i for i, c in enumerate(chroms) if c == 'chr2']
+        
+        if chr1_indices and chr2_indices:
+            self.assertTrue(max(chr1_indices) < min(chr2_indices))
+        
+        # Within chr1, positions should be sorted
+        chr1_positions = [positions[i] for i in chr1_indices]
+        self.assertEqual(chr1_positions, sorted(chr1_positions))
+
+    def test_complex_scenario(self):
+        """Test a complex scenario with multiple samples, overlaps, and ranges"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", [
+                ("chr1", 100, 102, "n_rich"),  # 3 positions
+                ("chr2", 200, 200, "nanoseq_snp"),  # 1 position
+            ]),
+            self.create_mock_bed_file("sample2", [
+                ("chr1", 101, 101, "nanoseq_noise"),  # Overlaps with sample1
+                ("chr3", 300, 301, "n_rich"),  # 2 positions
+            ]),
+            self.create_mock_bed_file("sample3", [
+                ("chr2", 200, 200, "nanoseq_snp"),  # Overlaps with sample1
+            ])
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Total unique positions: chr1:100,101,102 + chr2:200 + chr3:300,301 = 6
+        self.assertEqual(len(mask_df), 6)
+        
+        # Check specific position: chr1:101 (masked in sample1 and sample2)
+        row_101 = mask_df[(mask_df['CHROM'] == 'chr1') & (mask_df['POS'] == 101)].iloc[0]
+        self.assertEqual(row_101['sample1'], 0)
+        self.assertEqual(row_101['sample2'], 0)
+        self.assertEqual(row_101['sample3'], 1)
+        
+        # Check specific position: chr2:200 (masked in sample1 and sample3)
+        row_200 = mask_df[(mask_df['CHROM'] == 'chr2') & (mask_df['POS'] == 200)].iloc[0]
+        self.assertEqual(row_200['sample1'], 0)
+        self.assertEqual(row_200['sample2'], 1)
+        self.assertEqual(row_200['sample3'], 0)
+
+    def test_sample_name_extraction(self):
+        """Test that sample names are correctly extracted from filenames"""
+        # Create BED file with complex name
+        bed_file = "complex_sample_name-123.flagged-pos.bed"
+        with open(bed_file, 'w') as f:
+            f.write("chr1\t100\t100\tn_rich\n")
+        
+        create_mask_matrix([bed_file])
+        
+        mask_df = self.read_mask_matrix()
+        
+        # Sample name should be extracted correctly
+        self.assertIn('complex_sample_name-123', mask_df.columns)
+
+    def test_all_samples_empty(self):
+        """Test when all samples have empty BED files"""
+        bed_files = [
+            self.create_mock_bed_file("sample1", []),
+            self.create_mock_bed_file("sample2", []),
+        ]
+        
+        create_mask_matrix(bed_files)
+        
+        # Should create empty matrix
+        mask_df = self.read_mask_matrix()
+        self.assertEqual(len(mask_df), 0)
+
+
+class TestApplyMaskMatrix(unittest.TestCase):
+    """Test suite for apply_mask_matrix function"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.test_dir = tempfile.mkdtemp()
+        self.original_dir = os.getcwd()
+        os.chdir(self.test_dir)
+        
+    def tearDown(self):
+        """Clean up test fixtures"""
+        os.chdir(self.original_dir)
+        shutil.rmtree(self.test_dir)
+
+    def create_mock_depths(self, data):
+        """
+        Create a mock annotated depths dataframe.
+        
+        Parameters
+        ----------
+        data : dict
+            Dictionary with columns as keys and lists as values
+        
+        Returns
+        -------
+        pl.DataFrame
+            Mock depths dataframe
+        """
+        return pl.DataFrame(data)
+
+    def create_mock_mask_matrix(self, data, filename="mask_matrix.tsv.gz"):
+        """
+        Create a mock mask matrix file.
+        
+        Parameters
+        ----------
+        data : dict
+            Dictionary with columns as keys and lists as values
+        filename : str
+            Output filename
+        """
+        mask_df = pl.DataFrame(data)
+        mask_df.write_csv(filename, separator="\t")
+        return filename
+
+    def test_basic_masking(self):
+        """Test basic masking: positions with 0 in mask should have depth=0"""
+        # Create mock depths
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1', 'chr1'],
+            'POS': [100, 200, 300],
+            'CONTEXT': ['ACA', 'TCG', 'GGC'],
+            'sample1': [50, 60, 70],
+            'sample2': [40, 50, 60]
+        })
+        
+        # Create mask matrix: sample1 position 200 should be masked
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1', 'chr1'],
+            'POS': [100, 200, 300],
+            'sample1': [1, 0, 1],  # Position 200 masked
+            'sample2': [1, 1, 1]   # All kept
+        })
+        
+        # Apply masking
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Verify results
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample1'][0], 50)  # Kept
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample1'][0], 0)   # Masked
+        self.assertEqual(result.filter(pl.col('POS') == 300)['sample1'][0], 70)  # Kept
+        
+        # sample2 should remain unchanged
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample2'][0], 40)
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample2'][0], 50)
+        self.assertEqual(result.filter(pl.col('POS') == 300)['sample2'][0], 60)
+
+    def test_multiple_samples_masked(self):
+        """Test masking across multiple samples at the same position"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'CONTEXT': ['ACA', 'TCG'],
+            'sample1': [50, 60],
+            'sample2': [40, 50],
+            'sample3': [30, 40]
+        })
+        
+        # Mask chr1:100 for sample1 and sample2
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'sample1': [0, 1],  # 100 masked
+            'sample2': [0, 1],  # 100 masked
+            'sample3': [1, 1]   # All kept
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Position 100 should be 0 for sample1 and sample2, kept for sample3
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample1'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample2'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample3'][0], 30)
+        
+        # Position 200 should be kept for all
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample1'][0], 60)
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample2'][0], 50)
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample3'][0], 40)
+
+    def test_realistic_example(self):
+        """Test with realistic example from user's data"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+            'POS': [1071610, 11059842, 19851697, 23495229, 23495334, 26729792],
+            'CONTEXT': ['ACA', 'TCG', 'GGC', 'TAT', 'CGC', 'AAA'],
+            'P19_0001_BDO_01': [45, 50, 55, 60, 65, 70],
+            'P19_0001_BTR_01': [35, 40, 45, 50, 55, 60],
+            'P19_0004_BTR_01': [25, 30, 35, 40, 45, 50],
+            'P19_0005_BTR_01': [15, 20, 25, 30, 35, 40]
+        })
+        
+        # Create mask based on user's example
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+            'POS': [1071610, 11059842, 19851697, 23495229, 23495334, 26729792],
+            'P19_0001_BDO_01': [1, 0, 1, 1, 1, 0],
+            'P19_0001_BTR_01': [1, 0, 1, 1, 1, 1],
+            'P19_0004_BTR_01': [0, 0, 1, 0, 1, 1],
+            'P19_0005_BTR_01': [1, 0, 1, 1, 1, 1]
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Check specific positions
+        # chr1:1071610 - P19_0004_BTR_01 should be masked (0)
+        self.assertEqual(result.filter(pl.col('POS') == 1071610)['P19_0004_BTR_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 1071610)['P19_0001_BDO_01'][0], 45)
+        
+        # chr1:11059842 - All samples should be masked except (P19_0001_BTR_01 and others have 0)
+        self.assertEqual(result.filter(pl.col('POS') == 11059842)['P19_0001_BDO_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 11059842)['P19_0001_BTR_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 11059842)['P19_0004_BTR_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 11059842)['P19_0005_BTR_01'][0], 0)
+        
+        # chr1:23495229 - P19_0004_BTR_01 should be masked
+        self.assertEqual(result.filter(pl.col('POS') == 23495229)['P19_0004_BTR_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 23495229)['P19_0001_BDO_01'][0], 60)
+        
+        # chr1:26729792 - P19_0001_BDO_01 should be masked
+        self.assertEqual(result.filter(pl.col('POS') == 26729792)['P19_0001_BDO_01'][0], 0)
+        self.assertEqual(result.filter(pl.col('POS') == 26729792)['P19_0001_BTR_01'][0], 60)
+
+    def test_all_positions_kept(self):
+        """Test when all positions have mask=1 (nothing masked)"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'CONTEXT': ['ACA', 'TCG'],
+            'sample1': [50, 60],
+            'sample2': [40, 50]
+        })
+        
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'sample1': [1, 1],
+            'sample2': [1, 1]
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # All depths should remain unchanged
+        expected = depths.select(['CHROM', 'POS', 'sample1', 'sample2'])
+        actual = result.select(['CHROM', 'POS', 'sample1', 'sample2'])
+        self.assertTrue(expected.equals(actual))
+
+    def test_all_positions_masked(self):
+        """Test when all positions have mask=0 (everything masked)"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'CONTEXT': ['ACA', 'TCG'],
+            'sample1': [50, 60],
+            'sample2': [40, 50]
+        })
+        
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'sample1': [0, 0],
+            'sample2': [0, 0]
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # All depths should be 0
+        self.assertTrue(result['sample1'].to_list() == [0, 0])
+        self.assertTrue(result['sample2'].to_list() == [0, 0])
+
+    def test_empty_mask_matrix(self):
+        """Test handling of empty mask matrix"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1'],
+            'POS': [100],
+            'CONTEXT': ['ACA'],
+            'sample1': [50]
+        })
+        
+        # Create empty mask matrix
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': [],
+            'POS': []
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Should return original depths unchanged
+        self.assertTrue(result.equals(depths))
+
+    def test_partial_overlap(self):
+        """Test when mask matrix has only some positions from depths"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1', 'chr1'],
+            'POS': [100, 200, 300],
+            'CONTEXT': ['ACA', 'TCG', 'GGC'],
+            'sample1': [50, 60, 70]
+        })
+        
+        # Mask only has positions 100 and 200
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'sample1': [0, 1]
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Position 100 should be masked
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample1'][0], 0)
+        # Position 200 should be kept
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample1'][0], 60)
+        # Position 300 not in mask, should remain unchanged
+        self.assertEqual(result.filter(pl.col('POS') == 300)['sample1'][0], 70)
+
+    def test_structure_preservation(self):
+        """Test that CHROM, POS, and CONTEXT columns are preserved"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr2'],
+            'POS': [100, 200],
+            'CONTEXT': ['ACA', 'TCG'],
+            'sample1': [50, 60]
+        })
+        
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr2'],
+            'POS': [100, 200],
+            'sample1': [0, 1]
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Check structure is preserved
+        self.assertListEqual(list(result.columns), ['CHROM', 'POS', 'CONTEXT', 'sample1'])
+        self.assertListEqual(result['CHROM'].to_list(), ['chr1', 'chr2'])
+        self.assertListEqual(result['POS'].to_list(), [100, 200])
+        self.assertListEqual(result['CONTEXT'].to_list(), ['ACA', 'TCG'])
+
+    def test_zero_depth_remains_zero(self):
+        """Test that positions already at depth=0 remain at 0 regardless of mask"""
+        depths = self.create_mock_depths({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'CONTEXT': ['ACA', 'TCG'],
+            'sample1': [0, 60]  # Already 0
+        })
+        
+        mask_file = self.create_mock_mask_matrix({
+            'CHROM': ['chr1', 'chr1'],
+            'POS': [100, 200],
+            'sample1': [1, 0]  # Keep 100, mask 200
+        })
+        
+        result = apply_mask_matrix(depths, mask_file)
+        
+        # Position 100 was already 0, should remain 0 even with mask=1
+        self.assertEqual(result.filter(pl.col('POS') == 100)['sample1'][0], 0)
+        # Position 200 should be masked to 0
+        self.assertEqual(result.filter(pl.col('POS') == 200)['sample1'][0], 0)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/bin/utils.py b/bin/utils.py
index 1ea2181b..9418d59a 100755
--- a/bin/utils.py
+++ b/bin/utils.py
@@ -5,9 +5,16 @@
 
 def add_filter(old_filt, add_filt, filt_name):
     """
-    old filt is the current FILTER field value
-    add_filt is a boolean, either True or False
-    filt_name is the name that should be added in the FILTER field in case the add_filt value is True
+    Adds a filter to the FILTER field in the MAF dataframe
+
+    Parameters
+    ----------
+    old_filt : str
+        The current FILTER field value
+    add_filt : bool
+        Either True or False
+    filt_name : str
+        The name that should be added in the FILTER field in case the add_filt value is True
     """
     if add_filt:
         if old_filt == "PASS":
@@ -23,68 +30,6 @@ def to_int_if_possible(string):
     except ValueError:
         return None
 
-
-def filter_maf(maf_df, filter_criteria):
-    '''
-    Filter a MAF dataframe with filtering information coming from a list of tuples.
-    This can be either a dictionary transformed to list with the .items() method or by directly creating a list of tuples.
-    [('VAF', 'le 0.3'), ('VAF_AM', 'le 0.3'), ('vd_VAF', 'le 0.3'),
-    ('DEPTH', 'ge 40'), ('FILTER', 'notcontains n_rich'),
-    ('FILTER', 'notcontains cohort_n_rich_uni'), ('FILTER', 'notcontains NM20'),
-    ('FILTER', 'notcontains no_pileup_support'), ('FILTER', 'notcontains other_sample_SNP'),
-    ('FILTER', 'notcontains low_mappability')]
-    '''
-
-    # Define mappings for operators used in criteria
-    operators = {
-        'eq': lambda x, y: x == y,
-        'ne': lambda x, y: x != y,
-        'lt': lambda x, y: x < y,
-        'le': lambda x, y: x <= y,
-        'gt': lambda x, y: x > y,
-        'ge': lambda x, y: x >= y,
-        'not': lambda x, y: x != y,
-        'notcontains': lambda x, y: x.apply(lambda z : y not in z.split(";")), # (~maf_df["FILTER"].str.contains("not_in_panel"))
-        'contains': lambda x, y: x.apply(lambda z : y in z.split(";"))
-    }
-
-    # Apply filters based on criteria from the JSON file
-    for col, criterion in filter_criteria:
-
-        if isinstance(criterion, bool):
-            pref_len = maf_df.shape[0]
-            maf_df = maf_df[maf_df[col] == criterion]
-            print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
-
-        elif ' ' in criterion:
-            operator, value = criterion.split(maxsplit=1)
-
-            if len(operator) == 2 and operator in operators:
-                # 'VAF' : 'le 0.35'
-                pref_len = maf_df.shape[0]
-                maf_df = maf_df[operators[operator](maf_df[col], float(value))]
-                print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
-
-            elif operator in operators:
-                # 'FILTER' : 'notcontains n_rich',
-                pref_len = maf_df.shape[0]
-                maf_df = maf_df[operators[operator](maf_df[col], value)]
-                print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
-
-            else:
-                print(f"We have no filtering criteria defined for {col}:{criterion} filter.")
-
-
-        else:
-            # 'TYPE' : 'SNV'
-            pref_len = maf_df.shape[0]
-            maf_df = maf_df[maf_df[col] == criterion]
-            print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
-
-    return maf_df
-
-
-
 def vartype(x,
             letters = ['A', 'T', 'C', 'G'],
             len_SV_lim = 50
diff --git a/bin/utils_filter.py b/bin/utils_filter.py
new file mode 100644
index 00000000..8539b29f
--- /dev/null
+++ b/bin/utils_filter.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+import logging
+import pandas as pd
+from pathlib import Path
+"""
+Utility functions for extracting filters from a MAF DataFrame.
+"""
+
+LOG = logging.getLogger(__name__)
+
+def filter_maf(maf_df, filter_criteria):
+    '''
+    Filter a MAF dataframe with filtering information coming from a list of tuples.
+    This can be either a dictionary transformed to list with the .items() method or by directly creating a list of tuples.
+    [('VAF', 'le 0.3'), ('VAF_AM', 'le 0.3'), ('vd_VAF', 'le 0.3'),
+    ('DEPTH', 'ge 40'), ('FILTER', 'notcontains n_rich'),
+    ('FILTER', 'notcontains cohort_n_rich_uni'), ('FILTER', 'notcontains NM20'),
+    ('FILTER', 'notcontains no_pileup_support'), ('FILTER', 'notcontains other_sample_SNP'),
+    ('FILTER', 'notcontains low_mappability')]
+    '''
+
+    # Define mappings for operators used in criteria
+    operators = {
+        'eq': lambda x, y: x == y,
+        'ne': lambda x, y: x != y,
+        'lt': lambda x, y: x < y,
+        'le': lambda x, y: x <= y,
+        'gt': lambda x, y: x > y,
+        'ge': lambda x, y: x >= y,
+        'not': lambda x, y: x != y,
+        'notcontains': lambda x, y: x.apply(lambda z : y not in z.split(";")), # (~maf_df["FILTER"].str.contains("not_in_panel"))
+        'contains': lambda x, y: x.apply(lambda z : y in z.split(";"))
+    }
+
+    # Apply filters based on criteria from the JSON file
+    for col, criterion in filter_criteria:
+
+        if isinstance(criterion, bool):
+            pref_len = maf_df.shape[0]
+            maf_df = maf_df[maf_df[col] == criterion]
+            print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
+
+        elif ' ' in criterion:
+            operator, value = criterion.split(maxsplit=1)
+
+            if len(operator) == 2 and operator in operators:
+                # 'VAF' : 'le 0.35'
+                pref_len = maf_df.shape[0]
+                maf_df = maf_df[operators[operator](maf_df[col], float(value))]
+                print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
+
+            elif operator in operators:
+                # 'FILTER' : 'notcontains n_rich',
+                pref_len = maf_df.shape[0]
+                maf_df = maf_df[operators[operator](maf_df[col], value)]
+                print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
+
+            else:
+                print(f"We have no filtering criteria defined for {col}:{criterion} filter.")
+
+
+        else:
+            # 'TYPE' : 'SNV'
+            pref_len = maf_df.shape[0]
+            maf_df = maf_df[maf_df[col] == criterion]
+            print(f"Applying {col}:{criterion} filter implied going from {pref_len} mutations to {maf_df.shape[0]} mutations.")
+
+    return maf_df
+
+def load_filter_criteria(filters: str, somatic_filters: str) -> list[str]:
+    """
+    Parse filter criteria from comma-separated strings.
+    
+    Parameters
+    ----------
+    filters : str
+        Comma-separated list of filter criteria
+    somatic_filters : str
+        Comma-separated list of somatic filter criteria
+    
+    Returns
+    -------
+    list[str]
+        List of filter names to apply
+    """
+    # Parse comma-separated strings into lists
+    filter_list = [f.strip() for f in filters.split(',') if f.strip()]
+    somatic_filter_list = [f.strip() for f in somatic_filters.split(',') if f.strip()]
+    
+    # Combine both lists
+    all_filters = filter_list + somatic_filter_list
+
+    result = [f.replace("notcontains ", "") for f in all_filters if f.startswith("notcontains ")]
+
+    LOG.info(f"Loaded {len(result)} filter criteria: {result}")
+    return result
+
+def expand_filter_column(maf_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Expands the FILTER column by creating new columns for each unique filter.
+    Each new column indicates if the corresponding filter is present (True/False).
+    """
+    # Split FILTER column once per row and convert to set for O(1) lookup
+    filter_sets = maf_df["FILTER"].str.split(";").apply(lambda x: set(x) if x != [''] else set())
+    
+    # Get all unique filter values (excluding empty strings)
+    all_filters = set(
+        filter_val 
+        for filter_val in maf_df["FILTER"].str.split(";").explode().unique() 
+        if filter_val and filter_val != ''
+    )
+    
+    # Ensure "not_covered" and "not_in_exons" exist
+    required_filters = {"not_covered", "not_in_exons"}
+    all_filters.update(required_filters)
+
+    # Create boolean columns efficiently
+    for filt in sorted(all_filters):
+        maf_df[f"FILTER.{filt}"] = filter_sets.apply(lambda x: filt in x)
+
+    return maf_df
+
+def extract_flagged_regions_bed(maf_df: pd.DataFrame, name: str, FILTERS: list[str], specification: str = "") -> pd.DataFrame | None:
+    """
+    Returns a BED file with the regions discarded, including the list of filters applied to each mutation.
+    Creates a properly formatted BED file with 0-based coordinates and half-open intervals.
+
+    Parameters
+    ----------
+    maf_df : pd.DataFrame
+        Input MAF dataframe with filter columns. POS column should contain 1-based coordinates.
+    name : str
+        Sample name to be used in the output BED file name.
+    FILTERS : list[str]
+        List of filter criteria to check for in the MAF dataframe.
+    specification : str, optional
+        Additional string to include in the output BED file name (e.g., "cohort-"), by default "".
+
+    Returns
+    -------
+    pd.DataFrame
+        A BED dataframe with discarded mutations and filters applied to each region.
+        Output coordinates are 0-based with half-open intervals [start, end).
+    """
+    # List of filter columns you want to check for
+    filter_columns = [f"FILTER.{f}" for f in FILTERS if f in ','.join(list(maf_df.columns))]
+
+    maf_df_filters = maf_df[maf_df[filter_columns].any(axis=1)] if filter_columns else pd.DataFrame()
+
+    if maf_df_filters.empty:
+        LOG.warning("No mutations were flagged based on the applied filters. Creating empty BED file.")
+        # Create empty BED file to satisfy pipeline requirements
+        Path(f"{name}.flagged-pos.bed").touch()
+        return
+
+    # Create BED-like dataframe with filter columns
+    bed_df = maf_df_filters[["CHROM", "POS"] + filter_columns]
+
+    # Transform to long format
+    _bed_melt = (pd.melt(bed_df,
+                    id_vars=["CHROM", "POS"],
+                    value_vars=filter_columns,
+                    var_name="FILTERS")
+            .query("value == True")
+            )
+
+    LOG.info("Mutations flagged: %s", _bed_melt.shape[0])
+
+    # Aggregate filters per position
+    bed_annotated = (
+                _bed_melt
+                .drop_duplicates()
+                .sort_values(by=["CHROM", "POS"])
+                .groupby(["CHROM","POS"])["FILTERS"]
+                .agg(','.join)
+                .reset_index()
+                .rename(columns={"POS": "START"})
+    )
+
+    # The idea is to filter depth files at these positions, so make END = START (1-based)
+    bed_annotated["END"] = bed_annotated["START"]
+
+    LOG.info("Unique regions flagged: %s", bed_annotated.shape[0])
+
+    # Write BED file without header or index
+    (bed_annotated[["CHROM", "START", "END", "FILTERS"]]
+        .to_csv(f"{name}.{specification}flagged-pos.bed", sep="\t", header=False, index=False)
+    )
\ No newline at end of file
diff --git a/bin/write_mafs.py b/bin/write_mafs.py
index 76f098da..4048b0c7 100755
--- a/bin/write_mafs.py
+++ b/bin/write_mafs.py
@@ -1,38 +1,141 @@
 #!/usr/bin/env python
 
+"""
+Write MAFs - Create MAF file per sample and group. Extract flagged regions to BED file.
 
+This script processes a Mutation Annotation Format (MAF) file to separate it
+into individual MAF files for group defined in a provided JSON file.
+It also extracts regions flagged by specified filters into a BED file for downstream analysis.
+We obtain the flagged regions BED for both individual samples and the entire cohort.
+
+Command-line Arguments
+----------------------
+maf-file : str
+    Path to the gzipped input MAF file.
+groups-json : str
+    Path to the JSON file containing group/sample mapping.
+filters : str
+    Comma-separated list of filter criteria to apply.
+somatic-filters : str
+    Comma-separated list of somatic filter criteria to apply.
+
+Authors
+-------
+Author  : Ferriol Calvet (@FerriolCalvet)
+Email   : ferriol.calvet@irbbarcelona.org
+
+Contributors
+------------
+- Marta Huertas - @m-huertasp (marta.huertas@irbbarcelona.org)
+
+Usage
+-----
+python write_mafs.py \\
+    --maf-file input.maf.tsv.gz \\
+    --groups-json groups.json \\
+    --filters "filter1,filter2,filter3" \\
+    --somatic-filters "somatic_filter1,somatic_filter2"
+"""
+import logging
 import click
 import pandas as pd
 import json
 from read_utils import custom_na_values
+from utils_filter import extract_flagged_regions_bed, load_filter_criteria
+
+# Logging
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s - %(message)s",
+    level=logging.INFO,
+    datefmt="%m/%d/%Y %I:%M:%S %p"
+)
+LOG = logging.getLogger("write_mafs")
+
+# Constants
+# Define flags that are applied for all samples equally.
+COHORT_FLAGS = ["cohort_n_rich", "cohort_n_rich_uni", "repetitive_variant", "gnomAD_SNP", "nanoseq_snp", "nanoseq_noise", "not_covered", "not_in_exons"]
+
+def create_group_mafs(maf_df: pd.DataFrame, group_name: str, samples: list[str]) -> None:
+    """
+    Create separate MAF files for each group defined in the groups_info.
+
+    Parameters
+    ----------
+    maf_df : pd.DataFrame
+        Input MAF DataFrame
+    group_name : str
+        Name of the group
+    samples : list[str]
+        List of sample IDs belonging to the group
+    """
+    # Create MAF for the group by filtering the samples
+    group_maf = maf_df[maf_df["SAMPLE_ID"].isin(samples)].sort_values(by=["CHROM", "POS"])
+    # Generate output file
+    group_maf.to_csv(f"{group_name}.filtered.tsv.gz", sep="\t", header=True, index=False, compression='gzip')
+    LOG.info(f"Written MAF for group '{group_name}' with {len(group_maf)} records.")
+
+def create_sample_flagged_bed(maf_df: pd.DataFrame, samples: list[str], filter_criteria: list[str]) -> None:
+    """
+    Extract the flagged regions for each sample and write them to BED files.
+
+    Parameters
+    ----------
+    maf_df : pd.DataFrame
+        Input MAF DataFrame
+    samples : list[str]
+        List of sample IDs to process
+    filter_criteria : list[str]
+        List of filter criteria to apply when extracting flagged regions
+    """
+    for sample in samples:
+        sample_maf = maf_df[maf_df["SAMPLE_ID"] == sample]
+        LOG.info(f"Extracting flagged positions from {sample}.")
+        extract_flagged_regions_bed(sample_maf, sample, filter_criteria)  
+
 
 @click.command()
 @click.option('--maf-file', required=True, type=click.Path(exists=True), help='Input gzipped MAF file (TSV)')
 @click.option('--groups-json', required=True, type=click.Path(exists=True), help='Optional JSON file with group/sample mapping')
-def main(maf_file, groups_json):
+@click.option('--filters', required=True, type=str, default='', help='Comma-separated list of filter criteria')
+@click.option('--somatic-filters', required=True, type=str, default='', help='Comma-separated list of somatic filter criteria')
+def main(maf_file, groups_json, filters: str, somatic_filters: str):
     maf_df = pd.read_csv(maf_file, compression='gzip', header=0, sep='\t', na_values=custom_na_values)
     maf_df["SAMPLE_ID"] = maf_df["SAMPLE_ID"].astype(str)
-
+    filter_criteria = load_filter_criteria(filters, somatic_filters)
+    
     with open(groups_json, 'r') as file:
         groups_info = json.load(file)
 
-    all_samples = set([item for sublist in groups_info.values() for item in sublist])
-    available_samples = set(maf_df["SAMPLE_ID"])
+    # Obtain the set of all samples to be analyzed
+    all_samples = set(groups_info.get("all_samples"))
+    # Obtain the set of samples for which we have information in the MAF file
+    available_samples = set(maf_df["SAMPLE_ID"].unique())
+
     requested_n_available_samples = available_samples.intersection(all_samples)
+
     if len(all_samples) != len(requested_n_available_samples):
         missing_samples = all_samples - available_samples
-        raise ValueError("Some SAMPLE_IDs listed in the features table have no matching entries in the MAF file."
-                            "Missing SAMPLE_IDs: " + ", ".join(missing_samples))
-
 
+        error = f"Some SAMPLE_IDs listed in the features table have no matching entries in the MAF file. Missing SAMPLE_IDs: {', '.join(missing_samples)}"
+        LOG.error(error)
+        raise ValueError(error)
+        
     for group_name, samples in groups_info.items():
         samples = [str(x) for x in samples]
-        maf_df[maf_df["SAMPLE_ID"].isin(samples)].sort_values(by=["CHROM", "POS"]).to_csv(
-            f"{group_name}.filtered.tsv.gz",
-            sep="\t",
-            header=True,
-            index=False
-        )
+        # Extract group-specific MAFs
+        create_group_mafs(maf_df, group_name, samples)
+
+    # Extract flagged regions for each sample in the group if not already done
+    create_sample_flagged_bed(maf_df, list(all_samples), filter_criteria)
+
+    # Extract flagged regions to BED file (cohort-wide, applies to all samples)
+    LOG.info("Extracting all flagged positions to BED file.")
+    extract_flagged_regions_bed(maf_df, "all_samples", filter_criteria, "all-")
+
+    # Extract flagged regions applied only for all the cohort, not sample-specific
+    LOG.info("Extracting only cohort-wide flagged positions to BED file.")
+    cohort_wide_filters = [f for f in COHORT_FLAGS if f in filter_criteria]
+    extract_flagged_regions_bed(maf_df, "all_samples", cohort_wide_filters, "cohort-wide-")
 
 if __name__ == '__main__':
     main()
diff --git a/conf/modules.config b/conf/modules.config
index cd46f8b9..a8a4cf35 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -228,11 +228,19 @@ process {
 
 
     withName: WRITEMAF {
+        ext.filters         = { params.filter_criteria.join(',') }
+        ext.somatic_filters = { params.filter_criteria_somatic.join(',') }
+
         publishDir = [
             [
                 mode: params.publish_dir_mode,
                 path: { "${params.outdir}/mutations/germline_somatic" },
                 pattern: "*{tsv.gz}",
+            ],
+            [
+                mode: params.publish_dir_mode,
+                path: { "${params.outdir}/flagged_positions" },
+                pattern: "*{bed}",
             ]
         ]
     }
@@ -311,14 +319,12 @@ process {
         ext.downsample_prop = params.downsample_proportion
     }
 
-
     withName: FILTERBATCH {
         ext.repetitive_variant = params.repetitive_variant_thres
         ext.germline_threshold = params.germline_threshold
+        ext.prop_samples_nrich = params.prop_samples_nrich
     }
-
-
-
+    
     withName: "TABLE2GROUP" {
         ext.unique_identifier   = params.features_unique_identifier
         ext.feature_groups      = params.features_groups_list
@@ -627,7 +633,7 @@ process {
     }
 
     withLabel : deepcsa_core {
-        container = "docker.io/bbglab/deepcsa-core:0.0.2-alpha"
+        container = "docker.io/bbglab/deepcsa-core:0.1.0"
     }
 }
 
diff --git a/conf/tools/panels.config b/conf/tools/panels.config
index 7f1fc712..85559456 100644
--- a/conf/tools/panels.config
+++ b/conf/tools/panels.config
@@ -54,7 +54,6 @@ process {
         ]
     }
 
-
     withName: 'BBGTOOLS:DEEPCSA:CREATEPANELS:CREATESAMPLEPANELSALL' {
         publishDir = [
             [
diff --git a/modules/local/annotatedepth/main.nf b/modules/local/annotatedepth/main.nf
index 903912cf..cdc9856f 100644
--- a/modules/local/annotatedepth/main.nf
+++ b/modules/local/annotatedepth/main.nf
@@ -4,12 +4,12 @@ process ANNOTATE_DEPTHS {
     label 'time_low'
 
     label 'deepcsa_core'
-
+    
     input:
     tuple val(meta) , path(depths)
     tuple val(meta2), path(panel_all)
     path (json_groups)
-    path (input_csv)
+    path (mask_matrix)
 
     output:
     // tuple val(meta), path("*.depths.annotated.tsv.gz") , emit: annotated_depths
@@ -25,8 +25,8 @@ process ANNOTATE_DEPTHS {
         --annotation ${panel_all}.contexts \\
         --depths ${depths} \\
         --json_file ${json_groups} \\
-        --input_csv ${input_csv}
-
+        --mask-matrix ${mask_matrix}
+        
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version | sed 's/Python //g')
diff --git a/modules/local/createmaskmatrix/main.nf b/modules/local/createmaskmatrix/main.nf
new file mode 100644
index 00000000..e703c218
--- /dev/null
+++ b/modules/local/createmaskmatrix/main.nf
@@ -0,0 +1,37 @@
+process CREATE_MASK_MATRIX {
+    tag "mask_matrix"
+
+    label 'process_low'
+    label 'time_low'
+
+    container "docker.io/bbglab/deepcsa-core:0.1.0"
+
+    input:
+    path(bed_files)  // List of sample-specific flagged position BED files
+
+    output:
+    path("flagged_positions.mask.tsv.gz")          , emit: mask_matrix
+    path "versions.yml"                            , topic: versions
+
+    script:
+    def bed_args = bed_files.collect { "\"${it}\"" }.join(' --bed-files ')
+    """
+    create_mask_matrix.py \\
+        --bed-files ${bed_args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch flagged_positions.mask.tsv.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/createmaskmatrix/meta.yml b/modules/local/createmaskmatrix/meta.yml
new file mode 100644
index 00000000..6aace8c2
--- /dev/null
+++ b/modules/local/createmaskmatrix/meta.yml
@@ -0,0 +1,38 @@
+name: "create_mask_matrix"
+description: Create position × sample mask matrix from sample-specific flagged position BED files
+keywords:
+  - masking
+  - bed
+  - matrix
+  - depths
+tools:
+  - pandas:
+      description: |
+        Pandas is a Python library providing high-performance, easy-to-use data structures
+        and data analysis tools.
+      homepage: https://pandas.pydata.org/
+      documentation: https://pandas.pydata.org/docs/
+
+input:
+  - bed_files:
+      type: file
+      description: |
+        List of sample-specific flagged position BED files (*.flagged-pos.bed).
+        Each file contains regions to be masked for a specific sample.
+      pattern: "*.flagged-pos.bed"
+
+output:
+  - mask_matrix:
+      type: file
+      description: |
+        Position × sample mask matrix (TSV, gzipped). Contains CHROM, POS columns
+        followed by one boolean column per sample indicating whether the position
+        should be masked (TRUE) or not (FALSE).
+      pattern: "*.mask.tsv.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@mhuertas"
diff --git a/modules/local/filtermaf/main.nf b/modules/local/filtermaf/main.nf
index c070511e..7c0f8a98 100644
--- a/modules/local/filtermaf/main.nf
+++ b/modules/local/filtermaf/main.nf
@@ -10,23 +10,23 @@ process FILTER_BATCH {
     tuple val(meta), path(maf)
 
     output:
-    tuple val(meta), path("*.cohort.filtered.tsv.gz") , emit: cohort_maf
-    path "versions.yml"                               , topic: versions
+    tuple val(meta), path("*.cohort.filtered.tsv.gz")  , emit: cohort_maf
+    path "versions.yml"                                , topic: versions
 
 
     script:
     def prefix = task.ext.prefix ?: ""
     prefix = "${meta.id}${prefix}"
-    def repetitive_variant = task.ext.repetitive_variant ?: "${params.repetitive_variant_thres}"
-    def germline_threshold = task.ext.germline_threshold ?: "${params.germline_threshold}"
-    def proportion_samples_nrich = task.ext.prop_samples_nrich ?: "${params.prop_samples_nrich}"
+    def repetitive_variant = task.ext.repetitive_variant ? "--repetitive-variant-threshold ${task.ext.repetitive_variant}" : ""
+    def germline_threshold = task.ext.germline_threshold ? "--somatic-vaf-boundary ${task.ext.germline_threshold}" : ""
+    def proportion_samples_nrich = task.ext.prop_samples_nrich ? "--n-rich-cohort-proportion ${task.ext.prop_samples_nrich}" : ""
     """
     filter_cohort.py \\
         --maf-df-file ${maf} \\
         --sample-name ${prefix} \\
-        --repetitive-variant-threshold ${repetitive_variant} \\
-        --somatic-vaf-boundary ${germline_threshold} \\
-        --n-rich-cohort-proportion ${proportion_samples_nrich}
+        ${repetitive_variant} \\
+        ${germline_threshold} \\
+        ${proportion_samples_nrich}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -36,7 +36,7 @@ process FILTER_BATCH {
 
     stub:
     """
-    touch all_samples.cohort.filtered.tsv.gz
+    touch shared_cohort.filtered.tsv.gz
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/writemaf/main.nf b/modules/local/writemaf/main.nf
index dd82a01b..e11f1d0b 100644
--- a/modules/local/writemaf/main.nf
+++ b/modules/local/writemaf/main.nf
@@ -1,7 +1,7 @@
 process WRITE_MAFS {
 
     tag "${meta.id}"
-    label 'process_low'
+    label 'process_high_memory'
 
     label 'deepcsa_core'
 
@@ -10,14 +10,21 @@ process WRITE_MAFS {
     path (json_groups)
 
     output:
-    path("*.filtered.tsv.gz") , emit: mafs
-    path "versions.yml"       , topic: versions
+    path("*.filtered.tsv.gz")                   , emit: mafs
+    path("*.flagged-pos.bed")                   , emit: sample_flagged_beds
+    path("all_samples.all-flagged-pos.bed")
+    path("all_samples.cohort-wide-flagged-pos.bed")
+    path "versions.yml"                         , topic: versions
 
     script:
+    def filters = task.ext.filters ?: ""
+    def somatic_filters = task.ext.somatic_filters ?: ""
     """
     write_mafs.py \\
         --maf-file ${maf} \\
-        --groups-json ${json_groups}
+        --groups-json ${json_groups} \\
+        --filters "${filters}" \\
+        --somatic-filters "${somatic_filters}"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -27,7 +34,10 @@ process WRITE_MAFS {
 
     stub:
     """
-    touch all_samples.cohort.tsv.gz
+    touch all_samples.filtered.tsv.gz
+    touch all_samples.all-flagged-pos.bed
+    touch all_samples.cohort-wide-flagged-pos.bed
+    touch *.flagged-pos.bed
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/nextflow.config b/nextflow.config
index 9a2e74e1..4faa435f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -83,8 +83,6 @@ params {
     use_custom_depths                        = false
     custom_depths_table                      = null
 
-    create_sample_panels                     = false
-
     contamination                            = false
 
     downsample                               = false
diff --git a/subworkflows/local/createpanels/main.nf b/subworkflows/local/createpanels/main.nf
index ec86d14f..f8303044 100644
--- a/subworkflows/local/createpanels/main.nf
+++ b/subworkflows/local/createpanels/main.nf
@@ -1,33 +1,23 @@
-include { SITESFROMPOSITIONS                                            } from '../../../modules/local/sitesfrompositions/main'
-include { VCF_ANNOTATE_ENSEMBLVEP       as VCFANNOTATEPANEL             } from '../../../subworkflows/nf-core/vcf_annotate_ensemblvep_panel/main'
+include { SITESFROMPOSITIONS                                                        } from '../../../modules/local/sitesfrompositions/main'
+include { VCF_ANNOTATE_ENSEMBLVEP       as VCFANNOTATEPANEL                         } from '../../../subworkflows/nf-core/vcf_annotate_ensemblvep_panel/main'
 
-include { POSTPROCESS_VEP_ANNOTATION    as POSTPROCESSVEPPANEL          } from '../../../modules/local/process_annotation/panel/main'
+include { POSTPROCESS_VEP_ANNOTATION    as POSTPROCESSVEPPANEL                      } from '../../../modules/local/process_annotation/panel/main'
 
-include { CUSTOM_ANNOTATION_PROCESSING  as CUSTOMPROCESSING             } from '../../../modules/local/process_annotation/panelcustom/main'
-include { CUSTOM_ANNOTATION_PROCESSING  as CUSTOMPROCESSINGRICH         } from '../../../modules/local/process_annotation/panelcustom/main'
+include { CUSTOM_ANNOTATION_PROCESSING  as CUSTOMPROCESSING                         } from '../../../modules/local/process_annotation/panelcustom/main'
+include { CUSTOM_ANNOTATION_PROCESSING  as CUSTOMPROCESSINGRICH                     } from '../../../modules/local/process_annotation/panelcustom/main'
 
-include { DOMAIN_ANNOTATION             as DOMAINANNOTATION             } from '../../../modules/local/process_annotation/domain/main'
+include { DOMAIN_ANNOTATION             as DOMAINANNOTATION                         } from '../../../modules/local/process_annotation/domain/main'
 
+include { CREATECAPTUREDPANELS                                                      } from '../../../modules/local/createpanels/captured/main'
 
-include { CREATECAPTUREDPANELS                                          } from '../../../modules/local/createpanels/captured/main'
-
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSALL                  } from '../../../modules/local/createpanels/sample/main'
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSPROTAFFECT           } from '../../../modules/local/createpanels/sample/main'
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSNONPROTAFFECT        } from '../../../modules/local/createpanels/sample/main'
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSEXONS                } from '../../../modules/local/createpanels/sample/main'
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSINTRONS              } from '../../../modules/local/createpanels/sample/main'
-include { CREATESAMPLEPANELS as  CREATESAMPLEPANELSSYNONYMOUS           } from '../../../modules/local/createpanels/sample/main'
-
-
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSALL            } from '../../../modules/local/createpanels/consensus/main'
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSPROTAFFECT     } from '../../../modules/local/createpanels/consensus/main'
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSNONPROTAFFECT  } from '../../../modules/local/createpanels/consensus/main'
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSEXONS          } from '../../../modules/local/createpanels/consensus/main'
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSINTRONS        } from '../../../modules/local/createpanels/consensus/main'
-include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSSYNONYMOUS     } from '../../../modules/local/createpanels/consensus/main'
-
-include { COMPARE_TRINUCLEOTIDE_PROPORTIONS_PANELS as  COMPAREPANELPROPORTIONS     } from '../../../modules/local/createpanels/compare/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSALL                        } from '../../../modules/local/createpanels/consensus/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSPROTAFFECT                 } from '../../../modules/local/createpanels/consensus/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSNONPROTAFFECT              } from '../../../modules/local/createpanels/consensus/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSEXONS                      } from '../../../modules/local/createpanels/consensus/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSINTRONS                    } from '../../../modules/local/createpanels/consensus/main'
+include { CREATECONSENSUSPANELS as  CREATECONSENSUSPANELSSYNONYMOUS                 } from '../../../modules/local/createpanels/consensus/main'
 
+include { COMPARE_TRINUCLEOTIDE_PROPORTIONS_PANELS as  COMPAREPANELPROPORTIONS      } from '../../../modules/local/createpanels/compare/main'
 
 def restructurePanel(panel) {
         // return panel.map{ it -> [[id: it[0].name.tokenize('.')[0..1].join('.')], it[1]] }
@@ -35,15 +25,6 @@ def restructurePanel(panel) {
         return panel.map { it -> [[id: it.name.tokenize('.')[1]], it] }
     }
 
-
-def restructureSamplePanel(panel) {
-        // return panel.map{ it -> [[id: it[0].name.tokenize('.')[0..1].join('.')], it[1]] }
-        // return panel.map { it -> [[id: it.name.tokenize('.')[0..1].join('.')], it] }
-        return panel.map { it -> [[id: it.name.tokenize('.')[0]], it] }
-    }
-
-
-
 workflow CREATE_PANELS {
 
     take:
@@ -109,17 +90,6 @@ workflow CREATE_PANELS {
     restructurePanel(CREATECAPTUREDPANELS.out.captured_panel_synonymous).set{synonymous_panel}
     restructurePanel(CREATECAPTUREDPANELS.out.captured_panel_synonymous_bed).set{synonymous_bed}
 
-    if (params.create_sample_panels){
-        // Create sample-specific panels: all modalities
-        CREATESAMPLEPANELSALL(all_panel, depths, params.sample_panel_min_depth)
-        CREATESAMPLEPANELSPROTAFFECT(prot_panel, depths, params.sample_panel_min_depth)
-        CREATESAMPLEPANELSNONPROTAFFECT(nonprot_panel, depths, params.sample_panel_min_depth)
-        CREATESAMPLEPANELSEXONS(exons_panel, depths, params.sample_panel_min_depth)
-        CREATESAMPLEPANELSINTRONS(introns_panel, depths, params.sample_panel_min_depth)
-        CREATESAMPLEPANELSSYNONYMOUS(synonymous_panel, depths, params.sample_panel_min_depth)
-    }
-
-
     // Create consensus panel: all modalities
     CREATECONSENSUSPANELSALL(all_panel, depths)
     CREATECONSENSUSPANELSPROTAFFECT(prot_panel, depths)
@@ -128,6 +98,7 @@ workflow CREATE_PANELS {
     CREATECONSENSUSPANELSINTRONS(introns_panel, depths)
     CREATECONSENSUSPANELSSYNONYMOUS(synonymous_panel, depths)
 
+    // Compare trinucleotide proportions in the panels to WGS background - QC plots
     channel.empty()
     .concat(CREATECONSENSUSPANELSALL.out.consensus_panel.map{ it -> it[1]}.flatten())
     .concat(CREATECONSENSUSPANELSPROTAFFECT.out.consensus_panel.map{ it -> it[1]}.flatten())
@@ -140,6 +111,7 @@ workflow CREATE_PANELS {
 
     COMPAREPANELPROPORTIONS(all_consensus_panels, wgs_trinucleotides)
 
+
     emit:
     full_panel_annotated        = VCFANNOTATEPANEL.out.tab.first()
     all_panel                   = all_panel.first()
@@ -169,6 +141,7 @@ workflow CREATE_PANELS {
     synonymous_consensus_panel  = CREATECONSENSUSPANELSSYNONYMOUS.out.consensus_panel.first()
     synonymous_consensus_bed    = CREATECONSENSUSPANELSSYNONYMOUS.out.consensus_panel_bed.first()
 
+
     panel_annotated_rich        = rich_annotated.first()
     added_custom_regions        = added_regions.first()
     domains_panel_bed           = DOMAINANNOTATION.out.domains_bed.first()
@@ -176,15 +149,4 @@ workflow CREATE_PANELS {
 
     postprocessed_panel         = POSTPROCESSVEPPANEL.out.compact_panel_annotation.first()
     postprocessed_panel_rich    = POSTPROCESSVEPPANEL.out.rich_panel_annotation.first()
-    // all_sample_panel        = restructureSamplePanel(CREATESAMPLEPANELSALL.out.sample_specific_panel.flatten())
-    // all_sample_bed          = restructureSamplePanel(CREATESAMPLEPANELSALL.out.sample_specific_panel_bed.flatten())
-    // prot_sample_panel       = restructureSamplePanel(CREATESAMPLEPANELSPROTAFFECT.out.sample_specific_panel.flatten())
-    // prot_sample_bed         = restructureSamplePanel(CREATESAMPLEPANELSPROTAFFECT.out.sample_specific_panel_bed.flatten())
-    // nonprot_sample_panel    = restructureSamplePanel(CREATESAMPLEPANELSNONPROTAFFECT.out.sample_specific_panel.flatten())
-    // nonprot_sample_bed      = restructureSamplePanel(CREATESAMPLEPANELSNONPROTAFFECT.out.sample_specific_panel_bed.flatten())
-    // exons_sample_panel      = restructureSamplePanel(CREATESAMPLEPANELSEXONS.out.sample_specific_panel.flatten())
-    // exons_sample_bed        = restructureSamplePanel(CREATESAMPLEPANELSEXONS.out.sample_specific_panel_bed.flatten())
-    // introns_sample_panel    = restructureSamplePanel(CREATESAMPLEPANELSINTRONS.out.sample_specific_panel.flatten())
-    // introns_sample_bed      = restructureSamplePanel(CREATESAMPLEPANELSINTRONS.out.sample_specific_panel_bed.flatten())
-
 }
diff --git a/subworkflows/local/createpanels/meta.yml b/subworkflows/local/createpanels/meta.yml
index e69de29b..45312099 100644
--- a/subworkflows/local/createpanels/meta.yml
+++ b/subworkflows/local/createpanels/meta.yml
@@ -0,0 +1,213 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "CREATE_PANELS"
+description: |
+  Creates comprehensive genomic panels from depth files through a multi-stage annotation pipeline.
+  This workflow generates both captured panels (all possible mutations in sequenced regions) and 
+  consensus panels (regions with adequate depth coverage across samples). It performs VEP annotation,
+  optional custom annotation processing, protein domain mapping, and creates panel variants for 
+  different genomic features (protein-affecting, exons, synonymous, etc.). The outputs are used 
+  throughout the pipeline for mutation filtering, depth analysis, and selection analysis.
+keywords:
+  - annotation
+  - panel
+  - vep
+  - ensembl
+  - domains
+  - consensus
+  - captured regions
+  - depth coverage
+components:
+  - SITESFROMPOSITIONS
+  - VCF_ANNOTATE_ENSEMBLVEP
+  - POSTPROCESS_VEP_ANNOTATION
+  - CUSTOM_ANNOTATION_PROCESSING
+  - DOMAIN_ANNOTATION
+  - CREATECAPTUREDPANELS
+  - CREATECONSENSUSPANELS
+
+input:
+  - depths:
+      type: file
+      description: |
+        Channel containing per-sample depth information files for all captured/sequenced regions.
+        These files are used to (1) generate all possible mutations within the captured panel and 
+        (2) determine which regions have consensus coverage (adequate depth across the cohort).
+      pattern: "*.{txt,tsv,bed,gz}"
+
+output:
+  - full_panel_annotated:
+      type: file
+      description: |
+        Complete VEP annotation output in tab-separated format (raw VEP output).
+        Contains all transcript annotations before postprocessing.
+      pattern: "*.tab"
+  - complete_annotated_panel:
+      type: file
+      description: |
+        Compact annotated panel with one annotation per mutation (postprocessed).
+        Optionally includes custom annotation modifications if params.customize_annotation is true.
+        This is the primary annotation reference used throughout the pipeline.
+      pattern: "*.tsv"
+  - panel_annotated_rich:
+      type: file
+      description: |
+        Rich annotated panel with extended VEP annotation fields.
+        Used for detailed analyses and domain annotation mapping.
+      pattern: "*.tsv"
+  - added_custom_regions:
+      type: file
+      description: |
+        Regions added or modified during custom annotation processing.
+        Empty channel if params.customize_annotation is false.
+      pattern: "*.tsv"
+  - domains_panel_bed:
+      type: file
+      description: |
+        BED file with protein domain coordinates mapped to genomic positions.
+        Used for domain-level selection analysis.
+      pattern: "*.bed"
+  - domains_in_panel:
+      type: file
+      description: |
+        Tab-separated file listing all protein domains present in the captured panel.
+      pattern: "*.tsv"
+  - postprocessed_panel:
+      type: file
+      description: |
+        Compact panel annotation immediately after VEP postprocessing (before custom annotation).
+      pattern: "*.tsv"
+  - postprocessed_panel_rich:
+      type: file
+      description: |
+        Rich panel annotation immediately after VEP postprocessing (before custom annotation).
+      pattern: "*.tsv"
+  - all_panel:
+      type: file
+      description: |
+        Captured panel containing all possible mutations in sequenced regions (TSV format).
+      pattern: "*.tsv"
+  - all_bed:
+      type: file
+      description: |
+        Captured panel BED file containing all sequenced genomic regions.
+      pattern: "*.bed"
+  - prot_panel:
+      type: file
+      description: |
+        Captured panel subset: protein-affecting mutations only (missense, nonsense, etc.).
+      pattern: "*.tsv"
+  - prot_bed:
+      type: file
+      description: |
+        Captured panel BED file for protein-affecting regions.
+      pattern: "*.bed"
+  - nonprot_panel:
+      type: file
+      description: |
+        Captured panel subset: non-protein-affecting mutations (introns, intergenic, UTRs).
+      pattern: "*.tsv"
+  - nonprot_bed:
+      type: file
+      description: |
+        Captured panel BED file for non-protein-affecting regions.
+      pattern: "*.bed"
+  - exons_panel:
+      type: file
+      description: |
+        Captured panel subset: exons and splice sites.
+      pattern: "*.tsv"
+  - exons_bed:
+      type: file
+      description: |
+        Captured panel BED file for exons and splice sites. Used by FILTEREXONS process.
+      pattern: "*.bed"
+  - introns_panel:
+      type: file
+      description: |
+        Captured panel subset: introns and intergenic regions.
+      pattern: "*.tsv"
+  - introns_bed:
+      type: file
+      description: |
+        Captured panel BED file for introns and intergenic regions.
+      pattern: "*.bed"
+  - synonymous_panel:
+      type: file
+      description: |
+        Captured panel subset: synonymous mutations only.
+      pattern: "*.tsv"
+  - synonymous_bed:
+      type: file
+      description: |
+        Captured panel BED file for regions containing synonymous mutations.
+      pattern: "*.bed"
+  - all_consensus_panel:
+      type: file
+      description: |
+        Consensus panel: regions with adequate depth coverage across the cohort (all regions).
+        Used for downstream depth analysis and mutation filtering.
+      pattern: "*.tsv"
+  - all_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for all regions with adequate coverage.
+        Used by FILTERPANEL to mark mutations outside consensus regions as "not_covered".
+      pattern: "*.bed"
+  - prot_consensus_panel:
+      type: file
+      description: |
+        Consensus panel subset: protein-affecting regions with adequate coverage.
+      pattern: "*.tsv"
+  - prot_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for protein-affecting regions with adequate coverage.
+      pattern: "*.bed"
+  - nonprot_consensus_panel:
+      type: file
+      description: |
+        Consensus panel subset: non-protein-affecting regions with adequate coverage.
+      pattern: "*.tsv"
+  - nonprot_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for non-protein-affecting regions with adequate coverage.
+      pattern: "*.bed"
+  - exons_consensus_panel:
+      type: file
+      description: |
+        Consensus panel subset: exons and splice sites with adequate coverage.
+      pattern: "*.tsv"
+  - exons_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for exons and splice sites with adequate coverage.
+        Used for exon-specific depth plots and analyses.
+      pattern: "*.bed"
+  - introns_consensus_panel:
+      type: file
+      description: |
+        Consensus panel subset: introns and intergenic regions with adequate coverage.
+      pattern: "*.tsv"
+  - introns_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for introns and intergenic regions with adequate coverage.
+      pattern: "*.bed"
+  - synonymous_consensus_panel:
+      type: file
+      description: |
+        Consensus panel subset: synonymous mutation sites with adequate coverage.
+      pattern: "*.tsv"
+  - synonymous_consensus_bed:
+      type: file
+      description: |
+        Consensus panel BED file for synonymous mutation sites with adequate coverage.
+        Used for synonymous-specific selection analyses.
+      pattern: "*.bed"
+
+authors:
+  - Ferriol Calvet (ferriol.calvet@irbbarcelona.org)
+maintainers:
+  - Ferriol Calvet (ferriol.calvet@irbbarcelona.org)
+  - Marta Huertas (marta.huertas@irbbarcelona.org)
\ No newline at end of file
diff --git a/subworkflows/local/mutationpreprocessing/main.nf b/subworkflows/local/mutationpreprocessing/main.nf
index de7f2918..d5265230 100644
--- a/subworkflows/local/mutationpreprocessing/main.nf
+++ b/subworkflows/local/mutationpreprocessing/main.nf
@@ -1,25 +1,26 @@
 // Annotation
-include { VCF_ANNOTATE_ENSEMBLVEP       as VCFANNOTATE      } from '../../nf-core/vcf_annotate_ensemblvep/main'
-
-
-include { SUMMARIZE_ANNOTATION          as SUMANNOTATION    } from '../../../modules/local/process_annotation/mutations/main'
-include { CUSTOM_MUTATION_PROCESSING    as CUSTOMANNOTATION } from '../../../modules/local/process_annotation/mutations_custom/main'
-include { VCF2MAF                       as VCF2MAF          } from '../../../modules/local/vcf2maf/main'
-include { FILTERBED                     as FILTERPANEL      } from '../../../modules/local/filterbed/main'
-include { FILTERBED                     as FILTEREXONS      } from '../../../modules/local/filterbed/main'
-include { FILTERBED                     as FILTERNANOSEQSNP } from '../../../modules/local/filterbed/main'
-include { FILTERBED                     as FILTERNANOSEQNOISE} from '../../../modules/local/filterbed/main'
-include { MERGE_BATCH                   as MERGEBATCH       } from '../../../modules/local/mergemafs/main'
-include { FILTER_BATCH                  as FILTERBATCH      } from '../../../modules/local/filtermaf/main'
-include { WRITE_MAFS                    as WRITEMAF         } from '../../../modules/local/writemaf/main'
-include { SUBSET_MAF                    as SOMATICMUTATIONS } from '../../../modules/local/subsetmaf/main'
-include { SUBSET_MAF                    as CLEANMUTATIONS   } from '../../../modules/local/subsetmaf/main'
-include { BLACKLIST_MUTATIONS           as BLACKLISTMUTS    } from '../../../modules/local/blacklistmuts/main'
-include { PLOT_MUTATIONS                as PLOTMAF          } from '../../../modules/local/plot/mutations_summary/main'
-include { PLOT_MUTATIONS                as PLOTSOMATICMAF   } from '../../../modules/local/plot/mutations_summary/main'
-include { PLOT_NEEDLES                  as PLOTNEEDLES      } from '../../../modules/local/plot/needles/main'
-include { DOWNSAMPLE_MUTATIONS          as DOWNSAMPLEMUTS   } from '../../../modules/local/downsample/mutations/main'
-include { COMPUTE_CONTAMINATION         as CONTAMINATION    } from '../../../modules/local/contamination/main'
+include { VCF_ANNOTATE_ENSEMBLVEP       as VCFANNOTATE      }   from '../../nf-core/vcf_annotate_ensemblvep/main'
+
+
+include { SUMMARIZE_ANNOTATION          as SUMANNOTATION    }   from '../../../modules/local/process_annotation/mutations/main'
+include { CUSTOM_MUTATION_PROCESSING    as CUSTOMANNOTATION }   from '../../../modules/local/process_annotation/mutations_custom/main'
+include { VCF2MAF                       as VCF2MAF          }   from '../../../modules/local/vcf2maf/main'
+include { FILTERBED                     as FILTERPANEL      }   from '../../../modules/local/filterbed/main'
+include { FILTERBED                     as FILTEREXONS      }   from '../../../modules/local/filterbed/main'
+include { FILTERBED                     as FILTERNANOSEQSNP }   from '../../../modules/local/filterbed/main'
+include { FILTERBED                     as FILTERNANOSEQNOISE } from '../../../modules/local/filterbed/main'
+include { CREATE_MASK_MATRIX            as CREATEMASKMATRIX }   from '../../../modules/local/createmaskmatrix/main'
+include { MERGE_BATCH                   as MERGEBATCH       }   from '../../../modules/local/mergemafs/main'
+include { FILTER_BATCH                  as FILTERBATCH      }   from '../../../modules/local/filtermaf/main'
+include { WRITE_MAFS                    as WRITEMAF         }   from '../../../modules/local/writemaf/main'
+include { SUBSET_MAF                    as SOMATICMUTATIONS }   from '../../../modules/local/subsetmaf/main'
+include { SUBSET_MAF                    as CLEANMUTATIONS   }   from '../../../modules/local/subsetmaf/main'
+include { BLACKLIST_MUTATIONS           as BLACKLISTMUTS    }   from '../../../modules/local/blacklistmuts/main'
+include { PLOT_MUTATIONS                as PLOTMAF          }   from '../../../modules/local/plot/mutations_summary/main'
+include { PLOT_MUTATIONS                as PLOTSOMATICMAF   }   from '../../../modules/local/plot/mutations_summary/main'
+include { PLOT_NEEDLES                  as PLOTNEEDLES      }   from '../../../modules/local/plot/needles/main'
+include { DOWNSAMPLE_MUTATIONS          as DOWNSAMPLEMUTS   }   from '../../../modules/local/downsample/mutations/main'
+include { COMPUTE_CONTAMINATION         as CONTAMINATION    }   from '../../../modules/local/contamination/main'
 
 
 workflow MUTATION_PREPROCESSING {
@@ -88,9 +89,10 @@ workflow MUTATION_PREPROCESSING {
         filtered_maf_masks = params.nanoseq_noise ? FILTERNANOSEQNOISE.out.maf : filtered_maf_snp
     }
     filtered_maf_panels = masks_applied ? filtered_maf_masks : FILTERPANEL.out.maf
+
     // Join all samples' MAFs and put them in a channel to be merged
     filtered_maf_panels.map{ it -> it[1] }.collect().map{ it -> [[ id:"all_samples" ], it]}.set{ samples_maf }
-
+    
     MERGEBATCH(samples_maf)
 
     FILTERBATCH(MERGEBATCH.out.cohort_maf)
@@ -98,6 +100,9 @@ workflow MUTATION_PREPROCESSING {
     PLOTMAF(FILTERBATCH.out.cohort_maf)
 
     WRITEMAF(FILTERBATCH.out.cohort_maf, all_groups)
+    
+    // Create the mask matrix used to mask positions in the depths
+    CREATEMASKMATRIX(WRITEMAF.out.sample_flagged_beds)
 
     // Here we flatten the output of the WRITEMAF module to have a channel where each item is a sample-maf pair
     WRITEMAF.out.mafs.flatten().map{ it -> [ [id : it.name.tokenize('.')[0]] , it]  }.set{ named_mafs }
@@ -125,7 +130,6 @@ workflow MUTATION_PREPROCESSING {
     // Keep only somatic mutations
     SOMATICMUTATIONS(CLEANMUTATIONS.out.mutations)
 
-    
 
     channel.of([["id": "all_samples"]])
     .join(named_mafs).first()
@@ -170,6 +174,7 @@ workflow MUTATION_PREPROCESSING {
 
     emit:
     mafs                    = named_mafs
+    mask_matrix             = CREATEMASKMATRIX.out.mask_matrix
     somatic_mafs            = SOMATICMUTATIONS.out.mutations
     clean_mafs              = CLEANMUTATIONS.out.mutations
     mutations_all_samples   = muts_all_samples
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index b04082f2..7ad9dfb7 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -23,7 +23,7 @@ include { PLOT_DEPTHS               as PLOTDEPTHSEXONSCONS  } from '../subworkfl
 
 include { MUTATION_PREPROCESSING    as MUT_PREPROCESSING    } from '../subworkflows/local/mutationpreprocessing/main'
 
-include { ENRICHPANELS              as ENRICHPANELS           } from '../subworkflows/local/enrichpanels/main'
+include { ENRICHPANELS              as ENRICHPANELS         } from '../subworkflows/local/enrichpanels/main'
 
 
 include { MUTATION_DENSITY          as MUTDENSITYALL           } from '../subworkflows/local/mutationdensity/main'
@@ -194,10 +194,23 @@ workflow DEEPCSA{
     // Depth analysis: compute and plots
     DEPTHANALYSIS(INPUT_CHECK.out.sample_inputs, custom_bed_file)
 
-    // Panels generation: all modalities
+    // Panels annotation
     CREATEPANELS(DEPTHANALYSIS.out.depths, wgs_trinucs)
 
-    ANNOTATEDEPTHS(DEPTHANALYSIS.out.depths, CREATEPANELS.out.all_panel, TABLE2GROUP.out.json_allgroups, file(params.input))
+    // Mutation preprocessing
+    MUT_PREPROCESSING(meta_vcfs_alone,
+                        CREATEPANELS.out.all_consensus_bed,
+                        CREATEPANELS.out.exons_bed,
+                        TABLE2GROUP.out.json_allgroups,
+                        group_keys_ch,
+                        seqinfo_df,
+                        CREATEPANELS.out.added_custom_regions
+                        )
+    somatic_mutations = MUT_PREPROCESSING.out.somatic_mafs
+
+    positive_selection_results = somatic_mutations
+
+    ANNOTATEDEPTHS(DEPTHANALYSIS.out.depths, CREATEPANELS.out.all_panel, TABLE2GROUP.out.json_allgroups, MUT_PREPROCESSING.out.mask_matrix)
     ANNOTATEDEPTHS.out.annotated_depths.flatten().map{ it -> [ [id : it.name.tokenize('.')[0]] , it]  }.set{ annotated_depths_full }
 
     // if (params.downsample && params.downsample_proportion < 1) {
@@ -214,20 +227,6 @@ workflow DEEPCSA{
     }
     PLOTDEPTHSEXONSCONS(ANNOTATEDEPTHS.out.all_samples_depths, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel)
 
-    // Mutation preprocessing
-    MUT_PREPROCESSING(meta_vcfs_alone,
-                        CREATEPANELS.out.all_consensus_bed,
-                        CREATEPANELS.out.exons_bed,
-                        TABLE2GROUP.out.json_allgroups,
-                        group_keys_ch,
-                        seqinfo_df,
-                        CREATEPANELS.out.added_custom_regions
-                        )
-    somatic_mutations = MUT_PREPROCESSING.out.somatic_mafs
-
-    positive_selection_results = somatic_mutations
-
-
     // Enrich regions in consensus panels
     ENRICHPANELS(MUT_PREPROCESSING.out.mutations_all_samples,
                  ANNOTATEDEPTHS.out.all_samples_depths,