From 8d08d1f9d54fb139eb53587754e569c4317ddc37 Mon Sep 17 00:00:00 2001 From: Madeline Scyphers Date: Mon, 30 Mar 2026 16:30:54 -0600 Subject: [PATCH 1/3] feat: Add antiSMASH, CARD, RGI, TCDB Add antiSMASH nextflow module, right now just collect antismash raw output while we work on incorporating raw output into larger pipeline Add rgi nextflow module, right now like antiSMASH, only collect raw output while we work on incorporating ADD CARD db processing with mmseqs ADD TCDB processing with mmseqs --- modules.json | 15 + modules/local/annotate/mmseqs_search.nf | 10 +- .../antismash/antismash/environment.yml | 7 + modules/nf-core/antismash/antismash/main.nf | 85 +++++ modules/nf-core/antismash/antismash/meta.yml | 245 ++++++++++++++ .../antismash/antismash/tests/main.nf.test | 92 ++++++ .../antismash/tests/main.nf.test.snap | 308 ++++++++++++++++++ .../antismash/antismash/tests/nextflow.config | 5 + .../environment.yml | 7 + .../antismashdownloaddatabases/main.nf | 49 +++ .../antismashdownloaddatabases/meta.yml | 46 +++ .../tests/main.nf.test | 57 ++++ .../tests/main.nf.test.snap | 51 +++ .../tests/nextflow.config | 5 + modules/nf-core/rgi/main/environment.yml | 7 + modules/nf-core/rgi/main/main.nf | 91 ++++++ modules/nf-core/rgi/main/meta.yml | 102 ++++++ modules/nf-core/rgi/main/tests/main.nf.test | 94 ++++++ .../nf-core/rgi/main/tests/main.nf.test.snap | 143 ++++++++ nextflow.config | 10 + nextflow_schema.json | 31 ++ subworkflows/local/annotate.nf | 30 ++ subworkflows/local/call.nf | 2 + subworkflows/local/db_search.nf | 76 ++++- .../local/utils_nfcore_dram_pipeline/main.nf | 8 +- subworkflows/local/utils_pipeline_setup.nf | 6 +- workflows/dram.nf | 40 ++- 27 files changed, 1596 insertions(+), 26 deletions(-) create mode 100644 modules/nf-core/antismash/antismash/environment.yml create mode 100644 modules/nf-core/antismash/antismash/main.nf create mode 100644 modules/nf-core/antismash/antismash/meta.yml create mode 100644 modules/nf-core/antismash/antismash/tests/main.nf.test create mode 100644 modules/nf-core/antismash/antismash/tests/main.nf.test.snap create mode 100644 modules/nf-core/antismash/antismash/tests/nextflow.config create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/environment.yml create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/main.nf create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/meta.yml create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test.snap create mode 100644 modules/nf-core/antismash/antismashdownloaddatabases/tests/nextflow.config create mode 100644 modules/nf-core/rgi/main/environment.yml create mode 100644 modules/nf-core/rgi/main/main.nf create mode 100644 modules/nf-core/rgi/main/meta.yml create mode 100644 modules/nf-core/rgi/main/tests/main.nf.test create mode 100644 modules/nf-core/rgi/main/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 48c04632..918b2ca5 100644 --- a/modules.json +++ b/modules.json @@ -5,10 +5,25 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "antismash/antismash": { + "branch": "master", + "git_sha": "96c57dfd98a0641886a67bd449fe33ee2ec0e374", + "installed_by": ["modules"] + }, + "antismash/antismashdownloaddatabases": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] + }, + "rgi/main": { + "branch": "master", + "git_sha": "5e748ff2b0f990949081c9e49792622eb3fe9ee9", + "installed_by": ["modules"] } } }, diff --git a/modules/local/annotate/mmseqs_search.nf b/modules/local/annotate/mmseqs_search.nf index 5407e3fe..fa2074b3 100644 --- a/modules/local/annotate/mmseqs_search.nf +++ b/modules/local/annotate/mmseqs_search.nf @@ -36,14 +36,14 @@ process MMSEQS_SEARCH { # Perform search mmseqs search query_database/${input_fasta}.mmsdb ${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}.mmsdb mmseqs_out/tmp --threads ${task.cpus} - # Filter to only best hit - mmseqs filterdb mmseqs_out/${input_fasta}_${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}_tophit.mmsdb --extract-lines 1 - # Filter to only hits with minimum bit score - mmseqs filterdb --filter-column 2 --comparison-operator ge --comparison-value ${bit_score_threshold} --threads ${task.cpus} mmseqs_out/${input_fasta}_${db_name}_tophit.mmsdb mmseqs_out/${input_fasta}_${db_name}_tophit_minbitscore${bit_score_threshold}.mmsdb + mmseqs filterdb --filter-column 2 --comparison-operator ge --comparison-value ${bit_score_threshold} --threads ${task.cpus} mmseqs_out/${input_fasta}_${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}.mmsdb + + # Filter to only best hit + mmseqs filterdb mmseqs_out/${input_fasta}_${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}.mmsdb --extract-lines 1 # Convert results to BLAST outformat 6 - mmseqs convertalis query_database/${input_fasta}.mmsdb ${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}_tophit_minbitscore${bit_score_threshold}.mmsdb mmseqs_out/${input_fasta}___mmseqs_${db_name}.tsv --threads ${task.cpus} + mmseqs convertalis query_database/${input_fasta}.mmsdb ${db_name}.mmsdb mmseqs_out/${input_fasta}_${db_name}.mmsdb mmseqs_out/${input_fasta}___mmseqs_${db_name}.tsv --threads ${task.cpus} # if statement for kegg rbh goes here elif [ "${db_name}" == "pfam" ]; then diff --git a/modules/nf-core/antismash/antismash/environment.yml b/modules/nf-core/antismash/antismash/environment.yml new file mode 100644 index 00000000..f03e68e2 --- /dev/null +++ b/modules/nf-core/antismash/antismash/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::antismash=8.0.1" diff --git a/modules/nf-core/antismash/antismash/main.nf b/modules/nf-core/antismash/antismash/main.nf new file mode 100644 index 00000000..42b51699 --- /dev/null +++ b/modules/nf-core/antismash/antismash/main.nf @@ -0,0 +1,85 @@ +process ANTISMASH_ANTISMASH { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "nf-core/antismash:8.0.1--pyhdfd78af_0" + + input: + tuple val(meta), path(sequence_input) + path databases + path gff + + output: + tuple val(meta), path("${prefix}/{css,images,js}") , emit: html_accessory_files + tuple val(meta), path("${prefix}/*.gbk") , emit: gbk_input + tuple val(meta), path("${prefix}/*.json") , emit: json_results + tuple val(meta), path("${prefix}/*.log") , emit: log + tuple val(meta), path("${prefix}/*.zip") , emit: zip + tuple val(meta), path("${prefix}/index.html") , emit: html + tuple val(meta), path("${prefix}/regions.js") , emit: json_sideloading + tuple val(meta), path("${prefix}/clusterblast/*_c*.txt") , emit: clusterblast_file , optional: true + tuple val(meta), path("${prefix}/knownclusterblast/region*/ctg*.html"), emit: knownclusterblast_html , optional: true + tuple val(meta), path("${prefix}/knownclusterblast/") , emit: knownclusterblast_dir , optional: true + tuple val(meta), path("${prefix}/knownclusterblast/*_c*.txt") , emit: knownclusterblast_txt , optional: true + tuple val(meta), path("${prefix}/svg/clusterblast*.svg") , emit: svg_files_clusterblast , optional: true + tuple val(meta), path("${prefix}/svg/knownclusterblast*.svg") , emit: svg_files_knownclusterblast, optional: true + tuple val(meta), path("${prefix}/*region*.gbk") , emit: gbk_results , optional: true + tuple val(meta), path("${prefix}/clusterblastoutput.txt") , emit: clusterblastoutput , optional: true + tuple val(meta), path("${prefix}/knownclusterblastoutput.txt") , emit: knownclusterblastoutput , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + gff_flag = gff ? "--genefinding-gff3 ${gff}" : "" + + """ + ## We specifically do not include on-the-fly annotations (--genefinding-tool none) as + ## this should be run as a separate module for versioning purposes + + antismash \\ + ${args} \\ + ${gff_flag} \\ + -c ${task.cpus} \\ + --output-dir ${prefix} \\ + --output-basename ${prefix} \\ + --genefinding-tool none \\ + --logfile ${prefix}/${prefix}.log \\ + --databases ${databases} \\ + ${sequence_input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash: \$(echo \$(antismash --version) | sed 's/antiSMASH //;s/-.*//g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}/css + mkdir ${prefix}/images + mkdir ${prefix}/js + touch ${prefix}/NZ_CP069563.1.region001.gbk + touch ${prefix}/NZ_CP069563.1.region002.gbk + touch ${prefix}/css/bacteria.css + touch ${prefix}/genome.gbk + touch ${prefix}/genome.json + touch ${prefix}/genome.zip + touch ${prefix}/images/about.svg + touch ${prefix}/index.html + touch ${prefix}/js/antismash.js + touch ${prefix}/js/jquery.js + touch ${prefix}/regions.js + touch ${prefix}/test.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash: \$(echo \$(antismash --version) | sed 's/antiSMASH //;s/-.*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/antismash/antismash/meta.yml b/modules/nf-core/antismash/antismash/meta.yml new file mode 100644 index 00000000..8dcb610b --- /dev/null +++ b/modules/nf-core/antismash/antismash/meta.yml @@ -0,0 +1,245 @@ +name: antismash_antismash +description: | + antiSMASH allows the rapid genome-wide identification, annotation + and analysis of secondary metabolite biosynthesis gene clusters. +keywords: + - secondary metabolites + - BGC + - biosynthetic gene cluster + - genome mining + - NRPS + - RiPP + - antibiotics + - prokaryotes + - bacteria + - eukaryotes + - fungi + - antismash +tools: + - antismash: + description: "antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell" + homepage: "https://docs.antismash.secondarymetabolites.org" + documentation: "https://docs.antismash.secondarymetabolites.org" + tool_dev_url: "https://github.com/antismash/antismash" + doi: "10.1093/nar/gkab335" + licence: ["AGPL v3"] + identifier: biotools:antismash +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequence_input: + type: file + description: Nucleotide sequence file (annotated) + pattern: "*.{gbk, gb, gbff, genbank, embl, fasta, fna}" + ontologies: [] + - databases: + type: directory + description: | + Downloaded AntiSMASH databases (e.g. in the AntiSMASH installation directory + "data/databases") + pattern: "*/" + - gff: + type: file + description: Optional GFF3 file containing premade annotations of the input sequence + pattern: "*.gff" + ontologies: [] +output: + html_accessory_files: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/{css,images,js}: + type: directory + description: Accessory files for the HTML output + pattern: "{css/,images/,js/}" + gbk_input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/*.gbk: + type: file + description: Nucleotide sequence and annotations in GenBank format; converted + from input file + pattern: "*.gbk" + ontologies: [] + json_results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/*.json: + type: file + description: Nucleotide sequence and annotations in JSON format; converted + from GenBank file (gbk_input) + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/*.log: + type: file + description: Contains all the logging output that antiSMASH produced during + its run + pattern: "*.log" + ontologies: [] + zip: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/*.zip: + type: file + description: Contains a compressed version of the output folder in zip format + pattern: "*.zip" + ontologies: + - edam: http://edamontology.org/format_3987 # ZIP format + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/index.html: + type: file + description: Graphical web view of results in HTML format + patterN: "index.html" + ontologies: [] + json_sideloading: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/regions.js: + type: file + description: Sideloaded annotations of protoclusters and/or subregions (see + antiSMASH documentation "Annotation sideloading") + pattern: "regions.js" + ontologies: [] + clusterblast_file: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/clusterblast/*_c*.txt: + type: file + description: Output of ClusterBlast algorithm + pattern: "clusterblast/*_c*.txt" + ontologies: [] + knownclusterblast_html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/knownclusterblast/region*/ctg*.html: + type: file + description: Tables with MIBiG hits in HTML format + pattern: "knownclusterblast/region*/ctg*.html" + ontologies: [] + knownclusterblast_dir: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/knownclusterblast/: + type: directory + description: Directory with MIBiG hits + pattern: "knownclusterblast/" + knownclusterblast_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/knownclusterblast/*_c*.txt: + type: file + description: Tables with MIBiG hits + pattern: "knownclusterblast/*_c*.txt" + ontologies: [] + svg_files_clusterblast: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/svg/clusterblast*.svg: + type: file + description: SVG images showing the % identity of the aligned hits against + their queries + pattern: "svg/clusterblast*.svg" + ontologies: [] + svg_files_knownclusterblast: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/svg/knownclusterblast*.svg: + type: file + description: SVG images showing the % identity of the aligned hits against + their queries + pattern: "svg/knownclusterblast*.svg" + ontologies: [] + gbk_results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/*region*.gbk: + type: file + description: Nucleotide sequence and annotations in GenBank format; one file + per antiSMASH hit + pattern: "*region*.gbk" + ontologies: [] + clusterblastoutput: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/clusterblastoutput.txt: + type: file + description: Raw BLAST output of known clusters previously predicted by antiSMASH + using the built-in ClusterBlast algorithm + pattern: "clusterblastoutput.txt" + ontologies: [] + knownclusterblastoutput: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}/knownclusterblastoutput.txt: + type: file + description: Raw BLAST output of known clusters of the MIBiG database + pattern: "knownclusterblastoutput.txt" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" + - "@jfy133" diff --git a/modules/nf-core/antismash/antismash/tests/main.nf.test b/modules/nf-core/antismash/antismash/tests/main.nf.test new file mode 100644 index 00000000..9cfd3a01 --- /dev/null +++ b/modules/nf-core/antismash/antismash/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASH" + script "../main.nf" + process "ANTISMASH_ANTISMASH" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismash" + tag "antismash/antismashdownloaddatabases" + tag "gunzip" + + + setup { + + run("ANTISMASH_ANTISMASHDOWNLOADDATABASES") { + script "../../../antismash/antismashdownloaddatabases" + process { + """ + """ + } + } + + run("GUNZIP") { + script "../../../gunzip" + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz', checkIfExists: true) + ] + """ + } + } + } + + test("antismash - bacteroides_fragilis - genome") { + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHDOWNLOADDATABASES.out.database + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.gbk_results.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-START##"), + path(process.out.gbk_input.get(0).get(1).get(0)).text.contains("##antiSMASH-Data-END##"), + path(process.out.zip.get(0).get(1)).exists(), + path(process.out.html.get(0).get(1)).text.contains("https://antismash.secondarymetabolites.org/"), + path(process.out.json_sideloading.get(0).get(1)).text.contains("NZ_CP069563.1"), + path(process.out.log.get(0).get(1)).text.contains("antiSMASH status: SUCCESS"), + process.out.html_accessory_files, + process.out.versions, + path(process.out.versions[0]).yaml + ).match()} + ) + } + } + + test("antismash - bacteroides_fragilis - genome - stub") { + options "-stub" + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = ANTISMASH_ANTISMASHDOWNLOADDATABASES.out.database + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match()} + ) + } + } +} diff --git a/modules/nf-core/antismash/antismash/tests/main.nf.test.snap b/modules/nf-core/antismash/antismash/tests/main.nf.test.snap new file mode 100644 index 00000000..85735c18 --- /dev/null +++ b/modules/nf-core/antismash/antismash/tests/main.nf.test.snap @@ -0,0 +1,308 @@ +{ + "antismash - bacteroides_fragilis - genome": { + "content": [ + true, + true, + true, + true, + true, + true, + [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,e5b4d3ceaa91b03f6393d9b3d5f072e7" + ], + [ + "about.svg:md5,2573f954dd506e2d0878daed04f5420a", + "bacteria_about.png:md5,99cdc2aa09aee37553b10ca86b172170", + "bacteria_antismash_icon.svg:md5,23a265b0e1cf293a4743fe13030b636f", + "bacteria_antismash_logo.svg:md5,f80f639969ee6506571ffda2e197df93", + "bacteria_antismash_white.svg:md5,2c9da15cc168d8f796269d037b5e7f60", + "bacteria_download.png:md5,c3428df1cf17cb97e2897ca6daa93d48", + "bacteria_help.png:md5,359b68f90c73208eb389759c0f5c1091", + "bacteria_home.png:md5,6595d97ee49d251fe038207f82012eff", + "bacteria_logo.png:md5,013f84d6dd93cde96f07084ff63d855c", + "contact.svg:md5,53b878c2af4f8a80a647ac30f61e6bf6", + "download.svg:md5,722038156f4ece46747cbf6908501974", + "expand-arrows-alt-solid.svg:md5,21b37749f54320135a455ed266a7fc3a", + "external-link-alt-solid.svg:md5,ca337694c74e57f73d15ca9db30081ba", + "fungi_about.png:md5,4d55bf14df0340dca01a286487fa8448", + "fungi_antismash_icon.svg:md5,2acc19cc91d5d7285a72f0b3912e108a", + "fungi_antismash_icon_white.svg:md5,961f1c41e25036a625f115f209a961c7", + "fungi_antismash_logo.svg:md5,36560983a36f46786c98a05125b15724", + "fungi_download.png:md5,782580852674aab0b69b2b94a94c7615", + "fungi_help.png:md5,0ac06748f3177d150ab90997117c4f64", + "fungi_home.png:md5,880071898062d6dafe989ac73bb7bbea", + "fungi_logo.png:md5,29294392a3953fd1ba12d1a39cebaeeb", + "help.svg:md5,e7565a3cd74893422f2886a0af748df2", + "mail.png:md5,049f51233b29663e4e4e4c8097c2d096", + "minus-circle.svg:md5,b523305570d06b6e34cd7099bed22015", + "nostructure_icon.png:md5,fc982a5b84a1a99db607731625a87f88", + "plant_antismash_icon.svg:md5,e031de9570ef2809e52502481a5e77ea", + "plant_antismash_icon_white.svg:md5,10d25996b023dbdaed4a382471ab4877", + "plus-circle.svg:md5,cba2cdd9ef893274f572228b354718cf", + "question-circle-solid.svg:md5,6dbc83547e29ecedc7f2a5b81354353b", + "search-solid.svg:md5,aeab848c26357f3d120f3e58f1efa8f5" + ], + [ + "antismash.js:md5,b452a926645e2d4dd93f8a685275aa79", + "jquery.js:md5,397754ba49e9e0cf4e7c190da78dda05", + "jquery.tablesorter.min.js:md5,5e9e08cef4d1be0eaa538e6eb28809a7" + ] + ] + ] + ], + [ + "versions.yml:md5,48e6949487e113c0b097dcee63dc894d" + ], + { + "ANTISMASH_ANTISMASH": { + "antismash": "8.0.1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-03T08:17:33.268167622" + }, + "antismash - bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + "versions.yml:md5,48e6949487e113c0b097dcee63dc894d" + ], + "2": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "clusterblast_file": [ + + ], + "clusterblastoutput": [ + + ], + "gbk_input": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "gbk_results": [ + [ + { + "id": "test" + }, + [ + "NZ_CP069563.1.region001.gbk:md5,d41d8cd98f00b204e9800998ecf8427e", + "NZ_CP069563.1.region002.gbk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "html": [ + [ + { + "id": "test" + }, + "index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html_accessory_files": [ + [ + { + "id": "test" + }, + [ + [ + "bacteria.css:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "about.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "antismash.js:md5,d41d8cd98f00b204e9800998ecf8427e", + "jquery.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "json_results": [ + [ + { + "id": "test" + }, + "genome.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json_sideloading": [ + [ + { + "id": "test" + }, + "regions.js:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "knownclusterblast_dir": [ + + ], + "knownclusterblast_html": [ + + ], + "knownclusterblast_txt": [ + + ], + "knownclusterblastoutput": [ + + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "svg_files_clusterblast": [ + + ], + "svg_files_knownclusterblast": [ + + ], + "versions": [ + "versions.yml:md5,48e6949487e113c0b097dcee63dc894d" + ], + "zip": [ + [ + { + "id": "test" + }, + "genome.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + }, + { + "ANTISMASH_ANTISMASH": { + "antismash": "8.0.1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-01T15:56:34.83807227" + } +} diff --git a/modules/nf-core/antismash/antismash/tests/nextflow.config b/modules/nf-core/antismash/antismash/tests/nextflow.config new file mode 100644 index 00000000..d76b72bd --- /dev/null +++ b/modules/nf-core/antismash/antismash/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: ANTISMASH_ANTISMASH { + memory = 7.GB + } +} diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/environment.yml b/modules/nf-core/antismash/antismashdownloaddatabases/environment.yml new file mode 100644 index 00000000..f03e68e2 --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::antismash=8.0.1" diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/main.nf b/modules/nf-core/antismash/antismashdownloaddatabases/main.nf new file mode 100644 index 00000000..3c1d33b0 --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/main.nf @@ -0,0 +1,49 @@ +process ANTISMASH_ANTISMASHDOWNLOADDATABASES { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "nf-core/antismash:8.0.1--pyhdfd78af_0" + + output: + path "antismash_db", emit: database + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + download-antismash-databases \\ + --database-dir antismash_db \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash: \$(echo \$(antismash --version) | sed 's/antiSMASH //;s/-.*//g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + echo "download-antismash-databases --database-dir antismash_db ${args}" + + mkdir antismash_db + mkdir antismash_db/as-js + mkdir antismash_db/clusterblast + mkdir antismash_db/clustercompare + mkdir antismash_db/comparippson + mkdir antismash_db/knownclusterblast + mkdir antismash_db/mite + mkdir antismash_db/nrps_pks + mkdir antismash_db/pfam + mkdir antismash_db/resfam + mkdir antismash_db/tigrfam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + antismash: \$(echo \$(antismash --version) | sed 's/antiSMASH //;s/-.*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/meta.yml b/modules/nf-core/antismash/antismashdownloaddatabases/meta.yml new file mode 100644 index 00000000..ad1e6cbc --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/meta.yml @@ -0,0 +1,46 @@ +name: antismash_antismashdownloaddatabases +description: antiSMASH allows the rapid genome-wide identification, annotation and + analysis of secondary metabolite biosynthesis gene clusters. This module downloads + the antiSMASH databases for conda and docker/singularity runs. +keywords: + - secondary metabolites + - BGC + - biosynthetic gene cluster + - genome mining + - NRPS + - RiPP + - antibiotics + - prokaryotes + - bacteria + - eukaryotes + - fungi + - antismash + - database +tools: + - antismash: + description: antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell + homepage: https://docs.antismash.secondarymetabolites.org + documentation: https://docs.antismash.secondarymetabolites.org + tool_dev_url: https://github.com/antismash/antismash + doi: "10.1093/nar/gkab335" + licence: ["AGPL v3"] + identifier: biotools:antismash +input: [] +output: + database: + - antismash_db: + type: directory + description: Download directory for antiSMASH databases + pattern: "antismash_db" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jasmezz" +maintainers: + - "@jasmezz" + - "@jfy133" diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test b/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test new file mode 100644 index 00000000..e25dff2a --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process ANTISMASH_ANTISMASHDOWNLOADDATABASES" + script "../main.nf" + process "ANTISMASH_ANTISMASHDOWNLOADDATABASES" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "antismash" + tag "antismash/antismashdownloaddatabases" + + test("antismash/downloaddatabases") { + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.database.get(0)).list().sort(), + path(process.out.versions[0]).yaml, + file(process.out.versions[0]).name, + ).match() + } + ) + } + } + + test("antismash/downloaddatabases - stub") { + + options "-stub" + + when { + process { + """ + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.database.get(0)).list().sort(), + file(process.out.versions[0]).name, + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test.snap b/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test.snap new file mode 100644 index 00000000..6c8a33d7 --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "antismash/downloaddatabases - stub": { + "content": [ + [ + "as-js", + "clusterblast", + "clustercompare", + "comparippson", + "knownclusterblast", + "mite", + "nrps_pks", + "pfam", + "resfam", + "tigrfam" + ], + "versions.yml" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-22T07:52:56.373189968" + }, + "antismash/downloaddatabases": { + "content": [ + [ + "as-js", + "clusterblast", + "clustercompare", + "comparippson", + "knownclusterblast", + "mite", + "nrps_pks", + "pfam", + "resfam", + "tigrfam" + ], + { + "ANTISMASH_ANTISMASHDOWNLOADDATABASES": { + "antismash": "8.0.1" + } + }, + "versions.yml" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-03T08:25:39.61984576" + } +} diff --git a/modules/nf-core/antismash/antismashdownloaddatabases/tests/nextflow.config b/modules/nf-core/antismash/antismashdownloaddatabases/tests/nextflow.config new file mode 100644 index 00000000..63ec101f --- /dev/null +++ b/modules/nf-core/antismash/antismashdownloaddatabases/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: ANTISMASH_ANTISMASHDOWNLOADDATABASES { + memory = 7.GB + } +} diff --git a/modules/nf-core/rgi/main/environment.yml b/modules/nf-core/rgi/main/environment.yml new file mode 100644 index 00000000..b6b2d343 --- /dev/null +++ b/modules/nf-core/rgi/main/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rgi=6.0.5 diff --git a/modules/nf-core/rgi/main/main.nf b/modules/nf-core/rgi/main/main.nf new file mode 100644 index 00000000..744dbc92 --- /dev/null +++ b/modules/nf-core/rgi/main/main.nf @@ -0,0 +1,91 @@ +process RGI_MAIN { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/rgi:6.0.5--pyh05cac1d_0' + : 'biocontainers/rgi:6.0.5--pyh05cac1d_0'}" + + input: + tuple val(meta), path(fasta) + path card + path wildcard + + output: + tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.txt"), emit: tsv + tuple val(meta), path("temp/"), emit: tmp + env 'RGI_VERSION', emit: tool_version + env 'DB_VERSION', emit: db_version + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // This customizes the command: rgi load + def args2 = task.ext.args2 ?: '' + // This customizes the command: rgi main + def prefix = task.ext.prefix ?: "${meta.id}" + def load_wildcard = "" + + if (wildcard) { + load_wildcard = """ \\ + --wildcard_annotation ${wildcard}/wildcard_database_v\$DB_VERSION.fasta \\ + --wildcard_annotation_all_models ${wildcard}/wildcard_database_v\$DB_VERSION\\_all.fasta \\ + --wildcard_index ${wildcard}/wildcard/index-for-model-sequences.txt \\ + --amr_kmers ${wildcard}/wildcard/all_amr_61mers.txt \\ + --kmer_database ${wildcard}/wildcard/61_kmer_db.json \\ + --kmer_size 61 + """ + } + + """ + DB_VERSION=\$(ls ${card}/card_database_*_all.fasta | sed "s/${card}\\/card_database_v\\([0-9].*[0-9]\\).*/\\1/") + + rgi \\ + load \\ + ${args} \\ + --card_json ${card}/card.json \\ + --debug --local \\ + --card_annotation ${card}/card_database_v\$DB_VERSION.fasta \\ + --card_annotation_all_models ${card}/card_database_v\$DB_VERSION\\_all.fasta \\ + ${load_wildcard} + + rgi \\ + main \\ + ${args2} \\ + --num_threads ${task.cpus} \\ + --output_file ${prefix} \\ + --input_sequence ${fasta} + + mkdir temp/ + for FILE in *.xml *.fsa *.{nhr,nin,nsq} *.draft *.potentialGenes *{variant,rrna,protein,predictedGenes,overexpression,homolog}.json; do [[ -e \$FILE ]] && mv \$FILE temp/; done + + RGI_VERSION=\$(rgi main --version) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ + + stub: + """ + mkdir -p temp + touch test.json + touch test.txt + + RGI_VERSION=\$(rgi main --version) + DB_VERSION=stub_version + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rgi: \$(echo \$RGI_VERSION) + rgi-database: \$(echo \$DB_VERSION) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rgi/main/meta.yml b/modules/nf-core/rgi/main/meta.yml new file mode 100644 index 00000000..f8b102f6 --- /dev/null +++ b/modules/nf-core/rgi/main/meta.yml @@ -0,0 +1,102 @@ +name: rgi_main +description: Predict antibiotic resistance from protein or nucleotide data +keywords: + - bacteria + - fasta + - antibiotic resistance +tools: + - rgi: + description: This tool provides a preliminary annotation of your DNA sequence(s) + based upon the data available in The Comprehensive Antibiotic Resistance Database + (CARD). Hits to genes tagged with Antibiotic Resistance ontology terms will + be highlighted. As CARD expands to include more pathogens, genomes, plasmids, + and ontology terms this tool will grow increasingly powerful in providing first-pass + detection of antibiotic resistance associated genes. See license at CARD website + homepage: https://card.mcmaster.ca + documentation: https://github.com/arpcard/rgi + tool_dev_url: https://github.com/arpcard/rgi + doi: "10.1093/nar/gkz935" + licence: ["https://card.mcmaster.ca/about"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + ontologies: [] + - card: + type: directory + description: Directory containing the CARD database. This is expected to be the + unarchived but otherwise unaltered download folder (see RGI documentation for + download instructions). + pattern: "*/" + - wildcard: + type: directory + description: Directory containing the WildCARD database (optional). This is expected + to be the unarchived but otherwise unaltered download folder (see RGI documentation + for download instructions). + pattern: "*/" +output: + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: JSON formatted file with RGI results + pattern: "*.{json}" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tab-delimited file with RGI results + pattern: "*.{txt}" + ontologies: [] + tmp: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - temp/: + type: directory + description: Directory containing various intermediate files + pattern: "temp/" + tool_version: + - RGI_VERSION: + type: string + description: The version of the tool in string format (useful for downstream + tools such as hAMRronization) + db_version: + - DB_VERSION: + type: string + description: The version of the used database in string format (useful for downstream + tools such as hAMRronization) + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" +maintainers: + - "@rpetit3" + - "@jfy133" + - "@jasmezz" diff --git a/modules/nf-core/rgi/main/tests/main.nf.test b/modules/nf-core/rgi/main/tests/main.nf.test new file mode 100644 index 00000000..fc4a5616 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process RGI_MAIN" + script "../main.nf" + process "RGI_MAIN" + + tag "modules" + tag "modules_nfcore" + tag "rgi" + tag "rgi/main" + tag "rgi/cardannotation" + tag "untar" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + file('https://card.mcmaster.ca/latest/data', checkIfExists: true).copyTo('card-data.tar.bz2') + + input[0] = [ + [ ], + file("card-data.tar.bz2") + ] + """ + } + } + + run("RGI_CARDANNOTATION") { + script "modules/nf-core/rgi/cardannotation" + process { + """ + input[0] = UNTAR.out.untar.map{ it[1] } + """ + } + } + } + + + test("rgi/main - haemophilus_influenzae - genome_fna_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.tsv, + process.out.json, + file(process.out.tmp.get(0).get(1)).list().sort(), + process.out.tool_version, + process.out.db_version, + ).match() } + ) + } + } + + test("rgi/main - haemophilus_influenzae - genome_fna_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = RGI_CARDANNOTATION.out.db + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rgi/main/tests/main.nf.test.snap b/modules/nf-core/rgi/main/tests/main.nf.test.snap new file mode 100644 index 00000000..bb326ac6 --- /dev/null +++ b/modules/nf-core/rgi/main/tests/main.nf.test.snap @@ -0,0 +1,143 @@ +{ + "rgi/main - haemophilus_influenzae - genome_fna_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "3": [ + "6.0.5" + ], + "4": [ + "stub_version" + ], + "5": [ + "versions.yml:md5,b0808f9aef5a00d6542969c6dbd1c891" + ], + "db_version": [ + "stub_version" + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tmp": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "tool_version": [ + "6.0.5" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b0808f9aef5a00d6542969c6dbd1c891" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-03T10:03:18.046807023" + }, + "rgi/main - haemophilus_influenzae - genome_fna_gz": { + "content": [ + [ + "versions.yml:md5,1c882aa66647fa7275d0c9fd6d2dda5f" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,9d7754551163e020beed52a8bc14ce83" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,6c403fb8e2f24b7c88be27ba5a30ca70" + ] + ], + [ + "genome.fna.gz.temp.uncompressed.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.blastRes.rrna.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.contig.fsa.blastRes.xml", + "genome.fna.gz.temp.uncompressed.fsa.temp.contigToORF.fsa", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nhr", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nin", + "genome.fna.gz.temp.uncompressed.fsa.temp.db.nsq", + "genome.fna.gz.temp.uncompressed.fsa.temp.draft", + "genome.fna.gz.temp.uncompressed.fsa.temp.homolog.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.overexpression.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.potentialGenes", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.predictedGenes.protein.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.rrna.json", + "genome.fna.gz.temp.uncompressed.fsa.temp.variant.json" + ], + [ + "6.0.5" + ], + [ + "4.0.1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-03T10:03:00.004608398" + } +} diff --git a/nextflow.config b/nextflow.config index 3dab9ef2..623e4770 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,6 +59,10 @@ params { use_merops = false use_uniref = false use_metals = false + use_antismash = false + use_tcdb = false + use_rgi = false + use_card = false use_vog = false // TODO: Add vog annotation, not well supported currently use_viral = false // TODO: Add viral annotation, not well supported currently // use_viral = false // TODO: Add viral annotation @@ -140,6 +144,12 @@ params { methyl_db = "${launchDir}/databases/methyl/" // Metals metals_db = "${launchDir}/databases/metals/" + // antiSMASH + antismash_db = "${launchDir}/databases/antismash/" + // rgi card + card_db = "${launchDir}/scratch/card" + // TCDB + tcdb_db = "${launchDir}/databases/tcdb" // SQL annotation descriptions database sql_descriptions_db = "${launchDir}/databases/db_descriptions/description_db.sqlite" diff --git a/nextflow_schema.json b/nextflow_schema.json index 44854bf1..d0d0f99e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -186,6 +186,22 @@ "type": "boolean", "description": "Use the Metals database for annotation." }, + "use_antismash": { + "type": "boolean", + "description": "Use the antiSMASH database, currently experimental. Raw antiSMASH output only." + }, + "use_rgi": { + "type": "boolean", + "description": "Use RGI AMR analysis tool, currently experimental. Raw RGI output only." + }, + "use_card": { + "type": "boolean", + "description": "Use the CARD database for annotation." + }, + "use_tcdb": { + "type": "boolean", + "description": "Use the TCDB database for annotation." + }, "use_vog": { "type": "boolean" }, @@ -340,6 +356,21 @@ "default": "${launchDir}/databases/metals/", "hidden": true }, + "antismash_db": { + "type": "string", + "default": "${launchDir}/databases/antismash/", + "hidden": true + }, + "card_db": { + "type": "string", + "default": "${launchDir}/databases/card/", + "hidden": true + }, + "tcdb_db": { + "type": "string", + "default": "${launchDir}/databases/tcdb/", + "hidden": true + }, "pfam_mmseq_db": { "type": "string", "default": "${launchDir}/databases/pfam/mmseqs/", diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 3316fb82..94f48ad3 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -28,6 +28,10 @@ workflow ANNOTATE { use_merops use_uniref use_metals + use_antismash + use_rgi + use_card + use_tcdb use_vog main: @@ -38,6 +42,9 @@ workflow ANNOTATE { ch_quast_stats = default_sheet ch_collected_fna = default_sheet + ch_gene_gff = default_sheet + ch_filtered_fasta = default_sheet + ch_called_genes = default_sheet if (call){ fasta_name = ch_fasta.map { it[0] } @@ -65,6 +72,9 @@ workflow ANNOTATE { ch_gene_locs = CALL.out.ch_gene_locs ch_called_proteins = CALL.out.ch_called_proteins ch_collected_fna = CALL.out.ch_collected_fna + ch_gene_gff = CALL.out.ch_gene_gff + ch_filtered_fasta = CALL.out.ch_filtered_fasta + ch_called_genes = CALL.out.ch_called_genes } else { @@ -98,12 +108,28 @@ workflow ANNOTATE { ch_gene_locs = GENE_LOCS.out.prodigal_locs_tsv // n_fastas = file("$params.input_genes/${params.genes_fmt}").size() } + ch_antismash_map = ch_filtered_fasta + .map { file -> + def meta = [:] + meta.id = file.getBaseName() + tuple(meta, file) + } + ch_rgi_map = ch_called_genes + .map { + file_name, file -> + def meta = [:] + meta.id = file_name + tuple(meta, file) + } if (params.annotate){ DB_SEARCH( ch_gene_locs, ch_called_proteins, + ch_antismash_map, + ch_rgi_map, + ch_gene_gff, default_sheet, use_kegg, use_kofam, @@ -118,6 +144,10 @@ workflow ANNOTATE { use_merops, use_uniref, use_metals, + use_antismash, + use_rgi, + use_card, + use_tcdb, use_vog ) ch_combined_annotations = DB_SEARCH.out.ch_combined_annotations diff --git a/subworkflows/local/call.nf b/subworkflows/local/call.nf index fd34c1c4..9e5fd89f 100644 --- a/subworkflows/local/call.nf +++ b/subworkflows/local/call.nf @@ -61,4 +61,6 @@ workflow CALL { ch_collected_faa ch_collected_fna ch_collected_fasta + ch_gene_gff + ch_filtered_fasta } diff --git a/subworkflows/local/db_search.nf b/subworkflows/local/db_search.nf index a4edfec6..a9a2fe3c 100644 --- a/subworkflows/local/db_search.nf +++ b/subworkflows/local/db_search.nf @@ -22,6 +22,8 @@ include { MMSEQS_SEARCH as MMSEQS_SEARCH_CANTHYD } from "../../modules/lo include { MMSEQS_SEARCH as MMSEQS_SEARCH_KEGG } from "../../modules/local/annotate/mmseqs_search.nf" include { MMSEQS_SEARCH as MMSEQS_SEARCH_UNIREF } from "../../modules/local/annotate/mmseqs_search.nf" include { MMSEQS_SEARCH as MMSEQS_SEARCH_PFAM } from "../../modules/local/annotate/mmseqs_search.nf" +include { MMSEQS_SEARCH as MMSEQS_SEARCH_CARD } from "../../modules/local/annotate/mmseqs_search.nf" +include { MMSEQS_SEARCH as MMSEQS_SEARCH_TCDB } from "../../modules/local/annotate/mmseqs_search.nf" include { ADD_SQL_DESCRIPTIONS as SQL_UNIREF } from "../../modules/local/annotate/add_sql_descriptions.nf" include { ADD_SQL_DESCRIPTIONS as SQL_VIRAL } from "../../modules/local/annotate/add_sql_descriptions.nf" @@ -32,8 +34,8 @@ include { ADD_SQL_DESCRIPTIONS as SQL_DBCAN } from "../../modules/lo include { HMM_SEARCH as HMM_SEARCH_KOFAM } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_DBCAN } from "../../modules/local/annotate/hmmsearch.nf" -include { HMM_SEARCH as HMM_SEARCH_DBCAN3 } from "../../modules/local/annotate/hmmsearch.nf" -include { HMM_SEARCH as HMM_SEARCH_DBCAN3_SUB } from "../../modules/local/annotate/hmmsearch.nf" +include { HMM_SEARCH as HMM_SEARCH_DBCAN3 } from "../../modules/local/annotate/hmmsearch.nf" +include { HMM_SEARCH as HMM_SEARCH_DBCAN3_SUB } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_VOG } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_CAMPER } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_CANTHYD } from "../../modules/local/annotate/hmmsearch.nf" @@ -41,6 +43,9 @@ include { HMM_SEARCH as HMM_SEARCH_SULFUR } from "../../modules/lo include { HMM_SEARCH as HMM_SEARCH_FEGENIE } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_METALS } from "../../modules/local/annotate/hmmsearch.nf" +include { ANTISMASH_ANTISMASH } from '../../modules/nf-core/antismash/antismash/main' +include { RGI_MAIN } from '../../modules/nf-core/rgi/main/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW TO DB_SEARCH @@ -51,6 +56,9 @@ workflow DB_SEARCH { take: ch_gene_locs // channel: path(gene_locs_tsv) ] ch_called_proteins // channel: [ val(input_fasta name), path(called_proteins_file.faa) ] + ch_antismash_map + ch_rgi_map + ch_gene_gff default_sheet // Path to dummy sheet use_kegg use_kofam @@ -65,6 +73,10 @@ workflow DB_SEARCH { use_merops use_uniref use_metals + use_antismash + use_rgi + use_card + use_tcdb use_vog main: @@ -83,6 +95,10 @@ workflow DB_SEARCH { use_merops, use_uniref, use_metals, + use_antismash, + use_rgi, + use_card, + use_tcdb, use_vog ) @@ -112,6 +128,8 @@ workflow DB_SEARCH { pfam_name = "pfam" vogdb_name = "vogdb" metals_name = "metals" + card_name = "card" + tcdb_name = "tcdb" def formattedOutputChannels = channel.of() @@ -319,6 +337,31 @@ workflow DB_SEARCH { ch_metals_formatted = HMM_SEARCH_METALS.out.formatted_hits formattedOutputChannels = formattedOutputChannels.mix(ch_metals_formatted) } + // antiSMASH + if (use_antismash) { + ANTISMASH_ANTISMASH(ch_antismash_map, DB_CHANNEL_SETUP.out.ch_antismash_db, ch_gene_gff) + } + // RGI with CARD + if (use_rgi) { + + RGI_MAIN(ch_rgi_map, DB_CHANNEL_SETUP.out.ch_card_db, []) + } + // CARD annotation + if (use_card) { + ch_combined_query_locs_card = ch_mmseqs_query.join(ch_gene_locs) + MMSEQS_SEARCH_CARD( ch_combined_query_locs_card, DB_CHANNEL_SETUP.out.ch_card_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, card_name ) + ch_card_mmseqs_formatted = MMSEQS_SEARCH_CARD.out.mmseqs_search_formatted_out + + formattedOutputChannels = formattedOutputChannels.mix(ch_card_mmseqs_formatted) + } + // TCDB annotation + if (use_tcdb) { + ch_combined_query_locs_tcdb = ch_mmseqs_query.join(ch_gene_locs) + MMSEQS_SEARCH_TCDB( ch_combined_query_locs_tcdb, DB_CHANNEL_SETUP.out.ch_tcdb_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, tcdb_name ) + ch_tcdb_mmseqs_formatted = MMSEQS_SEARCH_TCDB.out.mmseqs_search_formatted_out + + formattedOutputChannels = formattedOutputChannels.mix(ch_tcdb_mmseqs_formatted) + } // VOGdb annotation if (use_vog) { ch_combined_proteins_locs = ch_called_proteins.join(ch_gene_locs) @@ -371,6 +414,10 @@ workflow DB_CHANNEL_SETUP { use_merops use_uniref use_metals + use_antismash + use_rgi + use_card + use_tcdb use_vog @@ -380,6 +427,8 @@ workflow DB_CHANNEL_SETUP { ch_kegg_db = Channel.empty() ch_kofam_db = Channel.empty() ch_dbcan_db = Channel.empty() + ch_dbcan3_db = Channel.empty() + ch_dbcan3_sub_db = Channel.empty() ch_camper_hmm_db = Channel.empty() ch_camper_mmseqs_db = Channel.empty() ch_camper_mmseqs_list = Channel.empty() @@ -389,6 +438,9 @@ workflow DB_CHANNEL_SETUP { ch_sulfur_db = Channel.empty() ch_uniref_db = Channel.empty() ch_metals_db = Channel.empty() + ch_antismash_db = Channel.empty() + ch_card_db = Channel.empty() + ch_tcdb_db = Channel.empty() ch_methyl_db = Channel.empty() ch_fegenie_db = Channel.empty() ch_canthyd_hmm_db = Channel.empty() @@ -449,6 +501,23 @@ workflow DB_CHANNEL_SETUP { ch_metals_db = file(params.metals_db).exists() ? file(params.metals_db) : error("Error: If using --annotate, you must supply prebuilt databases. METALS database file not found at ${params.metals_db}") } + if (use_antismash) { + ch_antismash_db = file(params.antismash_db).exists() ? file(params.antismash_db) : error("Error: If using --annotate, you must supply prebuilt databases. antismash database file not found at ${params.antismash_db}") + } + + if (use_rgi || use_card) { + ch_card_db = file(params.card_db).exists() ? file(params.card_db) : error("Error: If using --annotate, you must supply prebuilt databases. rgi database file not found at ${params.card_db}") + // rgi software uses the raw fasta, but card search we use the mmseqs database + if (use_card) { + index_mmseqs = true + } + } + + if (use_tcdb) { + ch_tcdb_db = file(params.tcdb_db).exists() ? file(params.tcdb_db) : error("Error: If using --annotate, you must supply prebuilt databases. tcdb database file not found at ${params.tcdb_db}") + index_mmseqs = true + } + if (use_methyl) { ch_methyl_db = file(params.methyl_db).exists() ? file(params.methyl_db) : error("Error: If using --annotate, you must supply prebuilt databases. METHYL database file not found at ${params.methyl_db}") index_mmseqs = true @@ -489,6 +558,9 @@ workflow DB_CHANNEL_SETUP { ch_sulfur_db ch_uniref_db ch_metals_db + ch_antismash_db + ch_card_db + ch_tcdb_db ch_methyl_db ch_fegenie_db ch_canthyd_hmm_db diff --git a/subworkflows/local/utils_nfcore_dram_pipeline/main.nf b/subworkflows/local/utils_nfcore_dram_pipeline/main.nf index 58b86c77..c72af0a2 100644 --- a/subworkflows/local/utils_nfcore_dram_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_dram_pipeline/main.nf @@ -67,10 +67,10 @@ workflow PIPELINE_INITIALISATION { if (params.anno_dbs != "") { anno_dbs = params.anno_dbs.tokenize(',').collect { it.trim().toLowerCase() } value_for_all = 'all' - use_kegg = getDBFlag(anno_dbs, 'kegg', value_for_all) - use_kofam = getDBFlag(anno_dbs, 'kofam', value_for_all) - use_fegenie = getDBFlag(anno_dbs, 'fegenie', value_for_all) - use_sulfur = getDBFlag(anno_dbs, 'sulfur', value_for_all) + use_kegg = getDBFlag(anno_dbs, 'kegg', value_for_all, params.kegg_db) + use_kofam = getDBFlag(anno_dbs, 'kofam', value_for_all, params.kofam_db) + use_fegenie = getDBFlag(anno_dbs, 'fegenie', value_for_all, params.fegenie_db) + use_sulfur = getDBFlag(anno_dbs, 'sulfur', value_for_all, params.sulfur_db) // use_pfam = getDBFlag(anno_dbs, 'pfam', value_for_all) // PFAM database is currently disabled in this pipeline due to a bug in the DRAM2 implementation with the PFAM database. It will be re-enabled in a future release. } diff --git a/subworkflows/local/utils_pipeline_setup.nf b/subworkflows/local/utils_pipeline_setup.nf index 5e40ab25..e208198d 100644 --- a/subworkflows/local/utils_pipeline_setup.nf +++ b/subworkflows/local/utils_pipeline_setup.nf @@ -8,8 +8,12 @@ include { getWorkflowVersion } from '../nf-core/utils_nfcore_pipeline' */ -def getDBFlag(db_list, db_name, value_for_all) { +def getDBFlag(db_list, db_name, value_for_all, db_path) { if (db_list.contains(value_for_all)) { + if (!file(db_path).exists()) { + log.warn("Database $db_name not found at path $db_path, skipping") + return false + } return true } else if (db_list.contains(db_name)) { return true diff --git a/workflows/dram.nf b/workflows/dram.nf index 53481ba2..1841366d 100644 --- a/workflows/dram.nf +++ b/workflows/dram.nf @@ -73,6 +73,7 @@ workflow DRAM { use_kegg = params.use_kegg use_kofam = params.use_kofam use_dbcan = params.use_dbcan + use_dbcan3 = params.use_dbcan3 use_camper = params.use_camper use_fegenie = params.use_fegenie use_methyl = params.use_methyl @@ -82,26 +83,33 @@ workflow DRAM { use_merops = params.use_merops use_uniref = params.use_uniref use_metals = params.use_metals + use_antismash = params.use_antismash + use_rgi = params.use_rgi + use_card = params.use_card + use_tcdb = params.use_tcdb use_vog = params.use_vog - if (params.anno_dbs != "") { anno_dbs = params.anno_dbs.tokenize(',').collect { it.trim().toLowerCase() } value_for_all = 'all' - use_kegg = getDBFlag(anno_dbs, 'kegg', value_for_all) - use_kofam = getDBFlag(anno_dbs, 'kofam', value_for_all) - use_dbcan = getDBFlag(anno_dbs, 'dbcan', value_for_all) - use_dbcan3 = getDBFlag(anno_dbs, 'dbcan3', value_for_all) - use_camper = getDBFlag(anno_dbs, 'camper', value_for_all) - use_fegenie = getDBFlag(anno_dbs, 'fegenie', value_for_all) - use_methyl = getDBFlag(anno_dbs, 'methyl', value_for_all) - use_canthyd = getDBFlag(anno_dbs, 'canthyd', value_for_all) - use_sulfur = getDBFlag(anno_dbs, 'sulfur', value_for_all) + use_kegg = getDBFlag(anno_dbs, 'kegg', value_for_all, params.kegg_db) + use_kofam = getDBFlag(anno_dbs, 'kofam', value_for_all, params.kofam_db) + use_dbcan = getDBFlag(anno_dbs, 'dbcan', value_for_all, params.dbcan_db) + use_dbcan3 = getDBFlag(anno_dbs, 'dbcan3', value_for_all, params.dbcan3_db) + use_camper = getDBFlag(anno_dbs, 'camper', value_for_all, params.camper_hmm_db) + use_fegenie = getDBFlag(anno_dbs, 'fegenie', value_for_all, params.fegenie_db) + use_methyl = getDBFlag(anno_dbs, 'methyl', value_for_all, params.methyl_db) + use_canthyd = getDBFlag(anno_dbs, 'canthyd', value_for_all, params.canthyd_hmm_db) + use_sulfur = getDBFlag(anno_dbs, 'sulfur', value_for_all, params.sulfur_db) // use_pfam = getDBFlag(anno_dbs, 'pfam', value_for_all) // PFAM database is currently disabled in this pipeline due to a bug in the DRAM2 implementation with the PFAM database. It will be re-enabled in a future release. - use_merops = getDBFlag(anno_dbs, 'merops', value_for_all) - use_uniref = getDBFlag(anno_dbs, 'uniref', value_for_all) - use_metals = getDBFlag(anno_dbs, 'metals', value_for_all) - use_vog = getDBFlag(anno_dbs, 'vog', value_for_all) + use_merops = getDBFlag(anno_dbs, 'merops', value_for_all, params.merops_db) + use_uniref = getDBFlag(anno_dbs, 'uniref', value_for_all, params.uniref_db) + use_metals = getDBFlag(anno_dbs, 'metals', value_for_all, params.metals_db) + use_antismash = getDBFlag(anno_dbs, 'antismash', value_for_all, params.antismash_db) + use_rgi = getDBFlag(anno_dbs, 'rgi', value_for_all, params.card_db) + use_card = getDBFlag(anno_dbs, 'card', value_for_all, params.card_db) + use_tcdb = getDBFlag(anno_dbs, 'tcdb', value_for_all, params.tcdb_db) + use_vog = getDBFlag(anno_dbs, 'vog', value_for_all, params.vog_db) } @@ -241,6 +249,10 @@ workflow DRAM { use_merops, use_uniref, use_metals, + use_antismash, + use_rgi, + use_card, + use_tcdb, use_vog ) From e07cd74e8d60fca7513f645c04d0956760c74768 Mon Sep 17 00:00:00 2001 From: Madeline Scyphers Date: Tue, 14 Apr 2026 23:31:01 -0600 Subject: [PATCH 2/3] feat: Add DRAM DB HMMs = Add DRAM team curated HMM database as new annotation db option. Work in progress and testing database, but can be found on GLOBUS. --- bin/combine_annotations.py | 2 + bin/hmm_parser.py | 26 +++-- bin/hmm_search.py | 18 +++- bin/utils/click_utils.py | 14 ++- modules/local/annotate/hmmsearch.nf | 5 +- nextflow.config | 4 + nextflow_schema.json | 14 +++ subworkflows/local/annotate.nf | 2 + subworkflows/local/db_search.nf | 148 ++++++++++++++++------------ workflows/dram.nf | 3 + 10 files changed, 155 insertions(+), 81 deletions(-) diff --git a/bin/combine_annotations.py b/bin/combine_annotations.py index 0fed20c2..f3707720 100755 --- a/bin/combine_annotations.py +++ b/bin/combine_annotations.py @@ -170,6 +170,8 @@ def combine_annotations(annotations_dir, genes_dir, output, threads): combined_data[FASTA_COLUMN] = combined_data[FASTA_COLUMN].where( mask, other=combined_data[FASTA_COLUMN + "2"] ) + # TODO: fix the merge so it doesn't make this column + combined_data = combined_data.drop(columns=FASTA_COLUMN + "2") combined_data = convert_bit_scores_to_numeric(combined_data) diff --git a/bin/hmm_parser.py b/bin/hmm_parser.py index 73490b43..6b7e910d 100755 --- a/bin/hmm_parser.py +++ b/bin/hmm_parser.py @@ -196,11 +196,6 @@ def main(hmm_domtbl, hmm_info_path, ec_from_info, gene_locs, db_name, output): hits["perc_cov"] = (hits["model_end"] - hits["model_start"] + 1) / hits[ "query_length" ] - hits[f"{db_name}_id"] = hits["query_name"].str.replace(r".hmm", "", regex=True) - all_hits = get_all_hits(hits, db_name) - all_hits.name = f"{db_name}_ids" - hits = hits.merge(all_hits, how="left", left_on="query_id", right_index=True) - hmm_sheet = False if hmm_info_path is not None: hmm_sheet = True @@ -228,8 +223,11 @@ def main(hmm_domtbl, hmm_info_path, ec_from_info, gene_locs, db_name, output): pass elif "definition" in hmm_info.columns: hmm_info = hmm_info.rename(columns={"definition": "description"}) - elif pd.api.types.is_string_dtype(hmm_info.iloc[:, -1]): - hmm_info = hmm_info.rename(columns={hmm_info.columns[-1]: "description"}) + elif ( + pd.api.types.is_string_dtype(hmm_info.iloc[:, -1]) + and hmm_info.columns[-1] not in merge_cols + ): # don't need to worry about description in merge cols, cause already checked + hmm_info["deescription"] = hmm_info[hmm_info.columns[-1]].copy() else: raise_on_ec = True @@ -243,10 +241,13 @@ def main(hmm_domtbl, hmm_info_path, ec_from_info, gene_locs, db_name, output): ) merge_cols = [col for col in merge_cols if col in hmm_info.columns] - + print(hmm_info.columns) + print(hmm_info) hits = hits.merge( hmm_info[merge_cols], how="left", left_on="query_name", right_index=True ) + print(hits.columns) + print(hits) hits_sig = sig_scores_row_by_row(hits, db_name=db_name) drop_cols = [ col @@ -268,6 +269,15 @@ def main(hmm_domtbl, hmm_info_path, ec_from_info, gene_locs, db_name, output): # df.to_csv(output, index=False) return + hits_sig[f"{db_name}_id"] = hits_sig["query_name"].str.replace( + r".hmm", "", regex=True + ) + all_hits_sig = get_all_hits(hits_sig, db_name) + all_hits_sig.name = f"{db_name}_ids" + hits_sig = hits_sig.merge( + all_hits_sig, how="left", left_on="query_id", right_index=True + ) + # Get the best hit # hits_sig = hits_sig.sort_values(['full_evalue', "domain_ievalue", "perc_cov"], ascending=[True, True, False]).drop_duplicates(subset=["query_id"]) hits_sig = hits_sig.sort_values( diff --git a/bin/hmm_search.py b/bin/hmm_search.py index ed096a37..e48ce7d4 100755 --- a/bin/hmm_search.py +++ b/bin/hmm_search.py @@ -19,18 +19,23 @@ help="Path to the input fasta to search against", ) @click.option("--e_value", type=float, help="e value cutoff for filtering") +@click.option("--t_value", type=float, help="bitscore cutoff for filtering") @click.option( "--output_file", type=click.Path(), help="Path to output file", ) @click.option("--cpus", type=int, help="number of cpu core to run HMMER with") -def main(hmm, input_file, e_value, output_file, cpus): +def main(hmm, input_file, e_value, t_value, output_file, cpus): t1 = time.time() hmm = Path(hmm) - - hmm_paths = hmm.parent.glob(hmm.name) + if hmm.is_dir(): # if directory passed, glob all hmms in dir + hmm = hmm / "*.hmm" + if "*" in str(hmm) or "?" in str(hmm): # check if path is glob path + hmm_paths = hmm.parent.glob(hmm.name) + else: + hmm_paths = [hmm] hmms = [] for path in hmm_paths: @@ -38,6 +43,11 @@ def main(hmm, input_file, e_value, output_file, cpus): hmms.extend(hmm_file) print(hmms) + kw = {} + if t_value: + kw["T"] = t_value + elif e_value: + kw["E"] = e_value with open(output_file, "wb") as out_fh: with pyhmmer.easel.SequenceFile( @@ -46,7 +56,7 @@ def main(hmm, input_file, e_value, output_file, cpus): seqs = pyhmmer.easel.DigitalSequenceBlock(alphabet) seqs.extend(sf) first = True - for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=cpus, E=e_value): + for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=cpus, **kw): hits.write(out_fh, format="domains", header=first) first = False # total = sum(len(hits) for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=8, E=1e-15)) diff --git a/bin/utils/click_utils.py b/bin/utils/click_utils.py index e563647e..303e532e 100755 --- a/bin/utils/click_utils.py +++ b/bin/utils/click_utils.py @@ -1,13 +1,21 @@ #!/usr/bin/env python -def validate_comma_separated(ctx, param, value, split=(",", " ")): +def validate_comma_separated(ctx, param, value, split=(",", " "), converter=None): if not value: return [] if isinstance(value, (list, tuple)): s = split if isinstance(split, str) else split[0] value = s.join(value) if isinstance(split, str): + split = [split] return value.split(split) if isinstance(split, (list, tuple)): + sentinel = "|SENTINEL|" for s in split: - value = value.replace(s, ",") - return [val.strip() for val in value.split(",")] + value = value.replace(s, sentinel) + ls = [] + for val in value.split(sentinel): + val = val.strip() + if converter: + val = converter(val) + ls.append(val) + return ls diff --git a/modules/local/annotate/hmmsearch.nf b/modules/local/annotate/hmmsearch.nf index 52c9dbd9..1007f28a 100644 --- a/modules/local/annotate/hmmsearch.nf +++ b/modules/local/annotate/hmmsearch.nf @@ -22,12 +22,13 @@ process HMM_SEARCH { script: def args = task.ext.args ?: "" def ec_flag = ec_from_info ? "--ec_from_info" : "" + def cutoff_flag = e_value ? "--e_value ${e_value}" : "" """ hmm_search.py \\ - --hmm ${database_loc}/*.hmm \\ + --hmm ${database_loc} \\ --input_file ${fasta} \\ - --e_value ${e_value} \\ + ${cutoff_flag} \\ --output_file ${input_fasta}_hmmsearch.out \\ --cpus ${task.cpus} diff --git a/nextflow.config b/nextflow.config index 623e4770..3801713f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,6 +45,7 @@ params { /* Annotate Options */ // Annotation Database Flags + use_dram_db = false use_kegg = false use_kofam = false use_dbcan = false @@ -103,6 +104,9 @@ params { metals_e_value = "1e-3" // Database locations + // DRAM DB + dram_db = "${launchDir}/databases/dram_db/dram_db.hmm" + dram_db_list = "${launchDir}/databases/dram_db/dram_db_scores.tsv" // KEGG kegg_db = "${launchDir}/databases/kegg/" // Uniref diff --git a/nextflow_schema.json b/nextflow_schema.json index d0d0f99e..689e2e17 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -134,6 +134,10 @@ "type": "string", "description": "Alternative way to specify database list for annotation. Comma sepeterated list of databases to include in the annotates. Use `all` for all. Example: 'kegg,dbcan,kofam,merops,viral,camper,cant_hyd,fegenie,sulfur,methyl,uniref,pfam,vogdb'. When in doubt, use the name after `use_` for each database. This option overrides individual `use_` database flags. (WARNING, this option name may change in the future)" }, + "use_dram_db": { + "type": "boolean", + "description": "Use the DRAM team specialized databases for annotation." + }, "use_camper": { "type": "boolean", "description": "Use the CAMPer database for annotation." @@ -341,6 +345,16 @@ "fa_icon": "fas fa-database", "description": "File paths to databases used in the workflow.", "properties": { + "dram_db": { + "type": "string", + "default": "${launchDir}/databases/dram_db.hmm", + "hidden": true + }, + "dram_db_list": { + "type": "string", + "default": "${launchDir}/databases/dram_db/dram_db_scores.tsv", + "hidden": true + }, "kegg_db": { "type": "string", "default": "${launchDir}/databases/kegg/", diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 94f48ad3..e8343162 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -32,6 +32,7 @@ workflow ANNOTATE { use_rgi use_card use_tcdb + use_dram_db use_vog main: @@ -148,6 +149,7 @@ workflow ANNOTATE { use_rgi, use_card, use_tcdb, + use_dram_db, use_vog ) ch_combined_annotations = DB_SEARCH.out.ch_combined_annotations diff --git a/subworkflows/local/db_search.nf b/subworkflows/local/db_search.nf index a9a2fe3c..bebfda76 100644 --- a/subworkflows/local/db_search.nf +++ b/subworkflows/local/db_search.nf @@ -36,6 +36,7 @@ include { HMM_SEARCH as HMM_SEARCH_KOFAM } from "../../modules/lo include { HMM_SEARCH as HMM_SEARCH_DBCAN } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_DBCAN3 } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_DBCAN3_SUB } from "../../modules/local/annotate/hmmsearch.nf" +include { HMM_SEARCH as HMM_SEARCH_DRAM_DB } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_VOG } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_CAMPER } from "../../modules/local/annotate/hmmsearch.nf" include { HMM_SEARCH as HMM_SEARCH_CANTHYD } from "../../modules/local/annotate/hmmsearch.nf" @@ -77,6 +78,7 @@ workflow DB_SEARCH { use_rgi use_card use_tcdb + use_dram_db use_vog main: @@ -99,6 +101,7 @@ workflow DB_SEARCH { use_rgi, use_card, use_tcdb, + use_dram_db, use_vog ) @@ -110,6 +113,7 @@ workflow DB_SEARCH { ch_vog_list = file(params.vog_list) ch_camper_hmm_list = file(params.camper_hmm_list) ch_canthyd_hmm_list = file(params.cant_hyd_hmm_list) + ch_dram_db_hmm_list = file(params.dram_db_list) kegg_name = "kegg" @@ -130,6 +134,7 @@ workflow DB_SEARCH { metals_name = "metals" card_name = "card" tcdb_name = "tcdb" + dram_db_name = "dram_db" def formattedOutputChannels = channel.of() @@ -146,12 +151,11 @@ workflow DB_SEARCH { if (use_kegg) { ch_combined_query_locs_kegg = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_KEGG( ch_combined_query_locs_kegg, DB_CHANNEL_SETUP.out.ch_kegg_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, kegg_name ) - ch_kegg_unformatted = MMSEQS_SEARCH_KEGG.out.mmseqs_search_formatted_out + ch_mmseqs_unformatted = MMSEQS_SEARCH_KEGG.out.mmseqs_search_formatted_out - SQL_KEGG(ch_kegg_unformatted, kegg_name, ch_sql_descriptions_db) - ch_kegg_formatted = SQL_KEGG.out.sql_formatted_hits - - formattedOutputChannels = formattedOutputChannels.mix(ch_kegg_formatted) + SQL_KEGG(ch_mmseqs_unformatted, kegg_name, ch_sql_descriptions_db) + ch_mmseqs_formatted = SQL_KEGG.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // KOFAM annotation if (use_kofam) { @@ -164,19 +168,18 @@ workflow DB_SEARCH { true, kofam_name ) - ch_kofam_formatted = HMM_SEARCH_KOFAM.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_kofam_formatted) + ch_hmm_formatted = HMM_SEARCH_KOFAM.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // PFAM annotation if (use_pfam) { ch_combined_query_locs_pfam = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_PFAM( ch_combined_query_locs_pfam, DB_CHANNEL_SETUP.out.ch_pfam_mmseqs_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, pfam_name ) - ch_pfam_unformatted = MMSEQS_SEARCH_PFAM.out.mmseqs_search_formatted_out - - SQL_PFAM(ch_pfam_unformatted, pfam_name, ch_sql_descriptions_db) - ch_pfam_formatted = SQL_PFAM.out.sql_formatted_hits + ch_mmseqs_unformatted = MMSEQS_SEARCH_PFAM.out.mmseqs_search_formatted_out - formattedOutputChannels = formattedOutputChannels.mix(ch_pfam_formatted) + SQL_PFAM(ch_mmseqs_unformatted, pfam_name, ch_sql_descriptions_db) + ch_mmseqs_formatted = SQL_PFAM.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // dbCAN annotation if (use_dbcan) { @@ -189,10 +192,10 @@ workflow DB_SEARCH { false, dbcan_name ) - ch_dbcan_unformatted = HMM_SEARCH_DBCAN.out.formatted_hits - SQL_DBCAN(ch_dbcan_unformatted, dbcan_name, ch_sql_descriptions_db) - ch_dbcan_formatted = SQL_DBCAN.out.sql_formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan_formatted) + ch_hmm_unformatted = HMM_SEARCH_DBCAN.out.formatted_hits + SQL_DBCAN(ch_hmm_unformatted, dbcan_name, ch_sql_descriptions_db) + ch_hmm_formatted = SQL_DBCAN.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // dbCAN3 annotation if (use_dbcan3) { @@ -205,9 +208,8 @@ workflow DB_SEARCH { false, dbcan3_name ) - ch_dbcan3_formatted = HMM_SEARCH_DBCAN3.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan3_formatted) - + ch_hmm_formatted = HMM_SEARCH_DBCAN3.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) HMM_SEARCH_DBCAN3_SUB ( ch_combined_proteins_locs, @@ -217,8 +219,8 @@ workflow DB_SEARCH { false, dbcan3_sub_name ) - ch_dbcan3_sub_formatted = HMM_SEARCH_DBCAN3_SUB.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_dbcan3_sub_formatted) + ch_hmm_formatted = HMM_SEARCH_DBCAN3_SUB.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // CAMPER annotation if (use_camper) { @@ -232,15 +234,14 @@ workflow DB_SEARCH { false, camper_name ) - ch_camper_hmm_formatted = HMM_SEARCH_CAMPER.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_camper_hmm_formatted) + ch_hmm_formatted = HMM_SEARCH_CAMPER.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) // MMseqs ch_combined_query_locs_camper = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_CAMPER( ch_combined_query_locs_camper, DB_CHANNEL_SETUP.out.ch_camper_mmseqs_db, params.bit_score_threshold, params.rbh_bit_score_threshold, DB_CHANNEL_SETUP.out.ch_camper_mmseqs_list, camper_name ) - ch_camper_mmseqs_formatted = MMSEQS_SEARCH_CAMPER.out.mmseqs_search_formatted_out - - formattedOutputChannels = formattedOutputChannels.mix(ch_camper_mmseqs_formatted) + ch_mmseqs_formatted = MMSEQS_SEARCH_CAMPER.out.mmseqs_search_formatted_out + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // FeGenie annotation if (use_fegenie) { @@ -253,25 +254,23 @@ workflow DB_SEARCH { false, fegenie_name ) - ch_fegenie_formatted = HMM_SEARCH_FEGENIE.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_fegenie_formatted) + ch_hmm_formatted = HMM_SEARCH_FEGENIE.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // Methyl annotation if (use_methyl) { ch_combined_query_locs_methyl = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_METHYL( ch_combined_query_locs_methyl, DB_CHANNEL_SETUP.out.ch_methyl_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, methyl_name ) - ch_methyl_mmseqs_formatted = MMSEQS_SEARCH_METHYL.out.mmseqs_search_formatted_out - - formattedOutputChannels = formattedOutputChannels.mix(ch_methyl_mmseqs_formatted) + ch_mmseqs_formatted = MMSEQS_SEARCH_METHYL.out.mmseqs_search_formatted_out + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // CANT-HYD annotation if (use_canthyd) { // MMseqs ch_combined_query_locs_canthyd = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_CANTHYD( ch_combined_query_locs_canthyd, DB_CHANNEL_SETUP.out.ch_canthyd_mmseqs_db, params.bit_score_threshold, params.rbh_bit_score_threshold, DB_CHANNEL_SETUP.out.ch_canthyd_mmseqs_list, canthyd_name ) - ch_canthyd_mmseqs_formatted = MMSEQS_SEARCH_CANTHYD.out.mmseqs_search_formatted_out - - formattedOutputChannels = formattedOutputChannels.mix(ch_canthyd_mmseqs_formatted) + ch_mmseqs_formatted = MMSEQS_SEARCH_CANTHYD.out.mmseqs_search_formatted_out + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) //HMM ch_combined_proteins_locs = ch_called_proteins.join(ch_gene_locs) @@ -283,9 +282,8 @@ workflow DB_SEARCH { false, canthyd_name ) - ch_canthyd_hmm_formatted = HMM_SEARCH_CANTHYD.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_canthyd_hmm_formatted) - + ch_hmm_formatted = HMM_SEARCH_CANTHYD.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // Sulfur annotation if (use_sulfur) { @@ -298,30 +296,28 @@ workflow DB_SEARCH { false, sulfur_name ) - ch_sulfur_formatted = HMM_SEARCH_SULFUR.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_sulfur_formatted) + ch_hmm_formatted = HMM_SEARCH_SULFUR.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // MEROPS annotation if (use_merops) { ch_combined_query_locs_merops = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_MEROPS( ch_combined_query_locs_merops, DB_CHANNEL_SETUP.out.ch_merops_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, merops_name ) - ch_merops_unformatted = MMSEQS_SEARCH_MEROPS.out.mmseqs_search_formatted_out - - SQL_MEROPS(ch_merops_unformatted, merops_name, ch_sql_descriptions_db) - ch_merops_formatted = SQL_MEROPS.out.sql_formatted_hits + ch_mmseqs_unformatted = MMSEQS_SEARCH_MEROPS.out.mmseqs_search_formatted_out - formattedOutputChannels = formattedOutputChannels.mix(ch_merops_formatted) + SQL_MEROPS(ch_mmseqs_unformatted, merops_name, ch_sql_descriptions_db) + ch_mmseqs_formatted = SQL_MEROPS.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // Uniref annotation if (use_uniref) { ch_combined_query_locs_uniref = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_UNIREF( ch_combined_query_locs_uniref, DB_CHANNEL_SETUP.out.ch_uniref_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, uniref_name ) - ch_uniref_unformatted = MMSEQS_SEARCH_UNIREF.out.mmseqs_search_formatted_out + ch_mmseqs_unformatted = MMSEQS_SEARCH_UNIREF.out.mmseqs_search_formatted_out - SQL_UNIREF(ch_uniref_unformatted, uniref_name, ch_sql_descriptions_db) - ch_uniref_formatted = SQL_UNIREF.out.sql_formatted_hits - - formattedOutputChannels = formattedOutputChannels.mix(ch_uniref_formatted) + SQL_UNIREF(ch_mmseqs_unformatted, uniref_name, ch_sql_descriptions_db) + ch_mmseqs_formatted = SQL_UNIREF.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // Metals annotation if (use_metals) { @@ -334,8 +330,8 @@ workflow DB_SEARCH { false, metals_name ) - ch_metals_formatted = HMM_SEARCH_METALS.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_metals_formatted) + ch_hmm_formatted = HMM_SEARCH_METALS.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // antiSMASH if (use_antismash) { @@ -350,17 +346,29 @@ workflow DB_SEARCH { if (use_card) { ch_combined_query_locs_card = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_CARD( ch_combined_query_locs_card, DB_CHANNEL_SETUP.out.ch_card_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, card_name ) - ch_card_mmseqs_formatted = MMSEQS_SEARCH_CARD.out.mmseqs_search_formatted_out - - formattedOutputChannels = formattedOutputChannels.mix(ch_card_mmseqs_formatted) + ch_mmseqs_formatted = MMSEQS_SEARCH_CARD.out.mmseqs_search_formatted_out + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } // TCDB annotation if (use_tcdb) { ch_combined_query_locs_tcdb = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_TCDB( ch_combined_query_locs_tcdb, DB_CHANNEL_SETUP.out.ch_tcdb_db, params.bit_score_threshold, params.rbh_bit_score_threshold, default_sheet, tcdb_name ) - ch_tcdb_mmseqs_formatted = MMSEQS_SEARCH_TCDB.out.mmseqs_search_formatted_out - - formattedOutputChannels = formattedOutputChannels.mix(ch_tcdb_mmseqs_formatted) + ch_mmseqs_formatted = MMSEQS_SEARCH_TCDB.out.mmseqs_search_formatted_out + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) + } + // DRAM DB annotation + if (use_dram_db) { + ch_combined_proteins_locs = ch_called_proteins.join(ch_gene_locs) + HMM_SEARCH_DRAM_DB ( + ch_combined_proteins_locs, + "", // No e value, skip e value flag + DB_CHANNEL_SETUP.out.ch_dram_db, + ch_dram_db_hmm_list, + false, + dram_db_name + ) + ch_hmm_formatted = HMM_SEARCH_DRAM_DB.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // VOGdb annotation if (use_vog) { @@ -373,19 +381,18 @@ workflow DB_SEARCH { false, vogdb_name ) - ch_vog_formatted = HMM_SEARCH_VOG.out.formatted_hits - formattedOutputChannels = formattedOutputChannels.mix(ch_vog_formatted) + ch_hmm_formatted = HMM_SEARCH_VOG.out.formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_hmm_formatted) } // Viral annotation if (params.use_viral) { ch_combined_query_locs_viral = ch_mmseqs_query.join(ch_gene_locs) MMSEQS_SEARCH_VIRAL( ch_combined_query_locs_viral, DB_CHANNEL_SETUP.out.ch_viral_db, params.bit_score_threshold, params.rbh_bit_score_threshold,default_sheet, viral_name ) - ch_viral_unformatted = MMSEQS_SEARCH_VIRAL.out.mmseqs_search_formatted_out - - SQL_VIRAL(ch_viral_unformatted, viral_name, ch_sql_descriptions_db) - ch_viral_formatted = SQL_VIRAL.out.sql_formatted_hits + ch_mmseqs_unformatted = MMSEQS_SEARCH_VIRAL.out.mmseqs_search_formatted_out - formattedOutputChannels = formattedOutputChannels.mix(ch_viral_formatted) + SQL_VIRAL(ch_mmseqs_unformatted, viral_name, ch_sql_descriptions_db) + ch_mmseqs_formatted = SQL_VIRAL.out.sql_formatted_hits + formattedOutputChannels = formattedOutputChannels.mix(ch_mmseqs_formatted) } fastas = formattedOutputChannels.map { it[1] }.collect() genes = ch_called_proteins.map { it[1] }.collect() @@ -418,6 +425,7 @@ workflow DB_CHANNEL_SETUP { use_rgi use_card use_tcdb + use_dram_db use_vog @@ -441,6 +449,7 @@ workflow DB_CHANNEL_SETUP { ch_antismash_db = Channel.empty() ch_card_db = Channel.empty() ch_tcdb_db = Channel.empty() + ch_dram_db = Channel.empty() ch_methyl_db = Channel.empty() ch_fegenie_db = Channel.empty() ch_canthyd_hmm_db = Channel.empty() @@ -518,6 +527,16 @@ workflow DB_CHANNEL_SETUP { index_mmseqs = true } + if (use_dram_db) { + if (!file(params.dram_db).exists()) { + error("Error: If using --annotate, you must supply prebuilt databases. dram database file not found at ${params.dram_db}") + } + // ch_dram_db = [file("${params.dram_db}/dram_db.hmm")] + // ch_dram_db = [file(params.dram_db)] + ch_dram_db = file(params.dram_db) + // ch_dram_db = file(params.dram_db).exists() ? file(params.dram_db) : error("Error: If using --annotate, you must supply prebuilt databases. dram database file not found at ${params.dram_db}") + } + if (use_methyl) { ch_methyl_db = file(params.methyl_db).exists() ? file(params.methyl_db) : error("Error: If using --annotate, you must supply prebuilt databases. METHYL database file not found at ${params.methyl_db}") index_mmseqs = true @@ -561,6 +580,7 @@ workflow DB_CHANNEL_SETUP { ch_antismash_db ch_card_db ch_tcdb_db + ch_dram_db ch_methyl_db ch_fegenie_db ch_canthyd_hmm_db diff --git a/workflows/dram.nf b/workflows/dram.nf index 1841366d..ecf903ea 100644 --- a/workflows/dram.nf +++ b/workflows/dram.nf @@ -87,6 +87,7 @@ workflow DRAM { use_rgi = params.use_rgi use_card = params.use_card use_tcdb = params.use_tcdb + use_dram_db = params.use_dram_db use_vog = params.use_vog if (params.anno_dbs != "") { anno_dbs = params.anno_dbs.tokenize(',').collect { it.trim().toLowerCase() } @@ -109,6 +110,7 @@ workflow DRAM { use_rgi = getDBFlag(anno_dbs, 'rgi', value_for_all, params.card_db) use_card = getDBFlag(anno_dbs, 'card', value_for_all, params.card_db) use_tcdb = getDBFlag(anno_dbs, 'tcdb', value_for_all, params.tcdb_db) + use_dram_db = getDBFlag(anno_dbs, 'dram_db', value_for_all, params.dram_db) use_vog = getDBFlag(anno_dbs, 'vog', value_for_all, params.vog_db) } @@ -253,6 +255,7 @@ workflow DRAM { use_rgi, use_card, use_tcdb, + use_dram_db, use_vog ) From 57efff640819e4cfed27ce8bc1a9111a6687ade4 Mon Sep 17 00:00:00 2001 From: Madeline Scyphers Date: Tue, 14 Apr 2026 23:36:35 -0600 Subject: [PATCH 3/3] Update version and changelog --- CHANGELOG.md | 21 +++++++++++++++++++++ nextflow.config | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32276d2c..7d07b775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ All notable changes to this project will be documented in this file. +## 2.0.0-beta28 - 2026-04-15 + +[cdfe210](https://github.com/WrightonLabCSU/DRAM/commit/cdfe210ca64eb95baf6f1acedb62f91b74630181)...[e07cd74](https://github.com/WrightonLabCSU/DRAM/commit/e07cd74e8d60fca7513f645c04d0956760c74768) + +### Features + +- Add antiSMASH, CARD, RGI, TCDB ([8d08d1f](https://github.com/WrightonLabCSU/DRAM/commit/8d08d1f9d54fb139eb53587754e569c4317ddc37)) + + Add antiSMASH nextflow module, right now just collect antismash + raw output while we work on incorporating raw output into + larger pipeline + Add rgi nextflow module, right now like antiSMASH, only + collect raw output while we work on incorporating + ADD CARD db processing with mmseqs + ADD TCDB processing with mmseqs + +- Add DRAM DB HMMs = ([e07cd74](https://github.com/WrightonLabCSU/DRAM/commit/e07cd74e8d60fca7513f645c04d0956760c74768)) + + Add DRAM team curated HMM database as new annotation db option. + Work in progress and testing database, but can be found on GLOBUS. + ## 2.0.0-beta27 - 2026-03-18 [f03804b](https://github.com/WrightonLabCSU/DRAM/commit/f03804bca43b15e55731316c00b1c34ac328c62c)...[7d9a12d](https://github.com/WrightonLabCSU/DRAM/commit/7d9a12d225c577a6b2fb0c4d7b1ba60a5588e1e8) diff --git a/nextflow.config b/nextflow.config index 3801713f..846686e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -495,7 +495,7 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24' - version = '2.0.0-beta27' + version = '2.0.0-beta28' doi = '' }