diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..d4a2c440 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/.gitbook.yaml b/.gitbook.yaml new file mode 100644 index 00000000..afdeba9f --- /dev/null +++ b/.gitbook.yaml @@ -0,0 +1 @@ +root: ./docs diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..14740fbe --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* cwl-commandlinetools version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.gitignore b/.gitignore index 76d33366..de23585e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python +env/ build/ develop-eggs/ dist/ @@ -81,8 +82,10 @@ celerybeat-schedule # SageMath parsed files *.sage.py -# Environments +# dotenv .env + +# virtualenv .venv env/ venv/ @@ -108,7 +111,6 @@ venv.bak/ # vscode .vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json \ No newline at end of file + +# pycharm +.idea diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..c0504869 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +# Config file for automatic testing at travis-ci.org + +language: python +python: + - 3.7 + - 2.7 + +# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: + - pip install -U tox-travis + - pip install -r requirements.txt + +# Command to run tests, e.g. python setup.py test +script: + - find . -name '*.cwl' | xargs -n 1 cwltool --validate + +# Assuming you have installed the travis-ci CLI tool, after you +# create the Github repo and add it to Travis, run the +# following command to finish PyPI deployment setup: +# $ travis encrypt --add deploy.password +deploy: + provider: pypi + distributions: sdist bdist_wheel + user: __token__ + password: + secure: g9Ja5QDLc1WGu50xpmXl6wcP7qRNzfYZk7i3PEJtQNO6JLPtxEmBgDAb4+RedRxLo9MRmws/n/bFkTOSP837d+tJ91cYN6TFbVu2teWiR6hblDX/Twhbceq/MjdYJyAVsH+KpuORjuJGqzk2I4QLzI+B/0mXuWcE4EPaCZ5mpm0aYYOTLW1Ukxl1j/PoV8wWC2glItLQ02zIvLyr276+en+RAdWYwqW8sY7rn4hI6VaM78OMsc2/cvG27X82SX4rBxJ3/VveslAc3O7Kck02ltOPyOLI3w++HEVvhHAaCK3kDxNEYQCMly1lDYWTfAGm2F5TZ5mgt2adb08AN//0GnWQOfciHh3JUrIt7po7B5Zs8kmZNGGTJFog8o+btU4pAeCDIt61lFyMo7VVpvPzR4ToiGP3zBvGEgnZd7WpTI0H0E4oc821vl9SAN+3aWQhDxDHl+z3VDwpZTA18mgQikFNc7asKDSXCAGoStI/YFWjw3X+tvFGMXR+R6dpmeSplFFSOx9L3TbrtymWProH8MOyxSVNDdQG6Vz41bN9IS47GRI+/1A9jXxwGurKY1ZL7HZDApDx42Fn2RdOFQNyLCeCneI+RUXtkHY56tH3GpBmnnJX6bKPrn4+VIbgd1VCahPrG8INqxx+SE4JojdIQHcxDy924PCL8mS4hakE4Z0= + on: + tags: true + repo: msk-access/cwl-commandlinetools + python: 2.7 + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e994ec71 --- /dev/null +++ b/LICENSE @@ -0,0 +1,16 @@ +Apache Software License 2.0 + +Copyright (c) 2019, msk-access + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..405d5d7e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include LICENSE +include README.md + +recursive-include tests * +recursive-include *.cwl +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.jpg *.png *.gif diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..4dba77a2 --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint: ## check style with flake8 + flake8 cwl_commandlinetools tests + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source cwl_commandlinetools -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/README.md b/README.md new file mode 100644 index 00000000..ee39a452 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +--- +description: Central location for storing common workflow language based command line tools for building workflows +--- + +# Command-line tools built by CCI + +- Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) + +## Features + +Create command line tools in common workflow language to generate workflows. + +## Installation + +Clone the repository: + +``` +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +**Follow the README in respective tool folder for execution of the tool.** + + +## Credits + +- CMO ACCESS Informatics Team +- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + - Cookiecutter: https://github.com/audreyr/cookiecutter + - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage + diff --git a/Workflows/bwa_sort_merge.cwl b/Workflows/bwa_sort_merge.cwl deleted file mode 100644 index 99b25be9..00000000 --- a/Workflows/bwa_sort_merge.cwl +++ /dev/null @@ -1,86 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: bwa_sort_merge -label: bwa_sort_merge - -inputs: - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: read_pair - type: - type: array - items: - items: File - type: array - - id: sample_id - type: string - - id: lane_id - type: 'string[]' - -outputs: - - id: sample_id_output - outputSource: - - bwa_sort/sample_id_output - type: - - string - - type: array - items: string - - id: output_md_metrics - outputSource: - - gatk_markduplicatesgatk/output_md_metrics - type: File - - id: output_md_bam - outputSource: - - gatk_markduplicatesgatk/output_md_bam - type: File - -steps: - - id: samtools_merge - in: - - id: input_bams - source: - - bwa_sort/output_file - out: - - id: output_file - run: ../CommandLineTools/samtools-merge_1.9/samtools-merge_1.9.cwl - - id: bwa_sort - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pair - - id: sample_id - source: sample_id - - id: lane_id - source: lane_id - out: - - id: output_file - - id: sample_id_output - - id: lane_id_output - run: ./bwa_sort.cwl - label: bwa_sort - scatter: - - read_pair - - lane_id - scatterMethod: dotproduct - - id: gatk_markduplicatesgatk - in: - - id: input_bam - source: samtools_merge/output_file - out: - - id: output_md_bam - - id: output_md_metrics - run: ../CommandLineTools/mark-duplicates_4.1.0.0/mark-duplicates_4.1.0.0.cwl - label: GATK MarkDuplicates - -requirements: - - class: SubworkflowFeatureRequirement - - class: ScatterFeatureRequirement diff --git a/Workflows/make_bam.cwl b/Workflows/make_bam.cwl deleted file mode 100644 index 5d1dfdef..00000000 --- a/Workflows/make_bam.cwl +++ /dev/null @@ -1,107 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: make_bam -label: make_bam - -inputs: - - id: read_pairs_normal - type: - type: array - items: - items: File - type: array - - id: lane_ids_normal - type: 'string[]' - - id: reference_sequence - type: File - secondaryFiles: - - .amb - - .ann - - .bwt - - .pac - - .sa - - .fai - - id: sample_id_normal - type: string - - id: sample_id_tumor - type: string - - id: read_pairs_tumor - type: - type: array - items: - items: File - type: array - - id: lane_ids_tumor - type: 'string[]' - -outputs: - - id: sample_id_output_normal - outputSource: - - make_bam_Normal/sample_id_output - type: - - string - - type: array - items: string - - id: normal_bam - outputSource: - - make_bam_Normal/output_md_bam - type: File - - id: sample_id_output_tumor - outputSource: - - make_bam_Tumor/sample_id_output - type: - - string - - type: array - items: string - - id: tumor_bam - outputSource: - - make_bam_Tumor/output_md_bam - type: File - - id: tumor_metrics - outputSource: - - make_bam_Tumor/output_md_metrics - type: File - - id: normal_metrics - outputSource: - - make_bam_Normal/output_md_metrics - type: File - -steps: - - id: make_bam_Normal - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_normal - - id: sample_id - source: sample_id_normal - - id: lane_id - source: - - lane_ids_normal - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Normal - - id: make_bam_Tumor - in: - - id: reference_sequence - source: reference_sequence - - id: read_pair - source: - - read_pairs_tumor - - id: sample_id - source: sample_id_tumor - - id: lane_id - source: - - lane_ids_tumor - out: - - id: sample_id_output - - id: output_md_metrics - - id: output_md_bam - run: ./bwa_sort_merge.cwl - label: make_bam_Tumor -requirements: - - class: SubworkflowFeatureRequirement diff --git a/Workflows/msisensor-run-both.cwl b/Workflows/msisensor-run-both.cwl deleted file mode 100644 index a9d2f373..00000000 --- a/Workflows/msisensor-run-both.cwl +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env cwl-runner - -$namespaces: - dct: http://purl.org/dc/terms/ - foaf: http://xmlns.com/foaf/0.1/ - doap: http://usefulinc.com/ns/doap# - -cwlVersion: v1.0 - -class: Workflow -id: msisensor-run-both -requirements: - StepInputExpressionRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - SubworkflowFeatureRequirement: {} - InlineJavascriptRequirement: {} - -inputs: - normal_bam: - type: File - secondaryFiles: [ ".bai" ] - tumor_bam: - type: File - secondaryFiles: [ ".bai" ] - output_prefix: string - msi_file: File - -outputs: - msisensor_0.2_output: - type: File - outputSource: msisensor_0.2/output - - msisensor_0.6_output: - type: File - outputSource: msisensor_0.6/output - -steps: - msisensor_0.2: - run: ../CommandLineTools/msisensor_0.2/msisensor-0.2.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.2.txt"; } - out: [ output ] - - msisensor_0.6: - run: ../CommandLineTools/msisensor_0.6/msisensor-0.6.cwl - in: - output_prefix: output_prefix - d: msi_file - n: normal_bam - t: tumor_bam - o: - valueFrom: ${ return inputs.output_prefix + "_0.6.txt"; } - out: [ output ] diff --git a/Workflows/mutect_wf.cwl b/Workflows/mutect_wf.cwl deleted file mode 100644 index c046e18a..00000000 --- a/Workflows/mutect_wf.cwl +++ /dev/null @@ -1,65 +0,0 @@ -class: Workflow -cwlVersion: v1.0 -id: mutect_wf -label: mutect_wf -inputs: - - id: scatter-count - type: int? - - id: output - type: string - - id: intervals - type: File? - - id: reference - type: File? - - id: tumor_sample - type: string? - - id: normal_sample - type: string? - - id: input_normal - type: File? - - id: input_tumor - type: File? - -outputs: - - id: output_1 - outputSource: - - mutect2/output - type: 'File[]?' - -steps: - - id: scatterintervals - in: - - id: reference - source: reference - - id: intervals - source: intervals - - id: scatter-count - source: scatter-count - - id: output - source: output - out: - - id: interval_files - run: ../CommandLineTools/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl - label: ScatterIntervals - - id: mutect2 - in: - - id: reference - source: reference - - id: intervals - source: scatterintervals/interval_files - - id: input - source: input_tumor - - id: tumor_sample - source: tumor_sample - - id: input_normal - source: input_normal - - id: normal_sample - source: normal_sample - out: - - id: output - run: ../CommandLineTools/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl - label: Mutect2 - scatter: - - intervals -requirements: - - class: ScatterFeatureRequirement diff --git a/Workflows/snp-pileup-to-facets.cwl b/Workflows/snp-pileup-to-facets.cwl deleted file mode 100644 index 619a8753..00000000 --- a/Workflows/snp-pileup-to-facets.cwl +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env cwl-runner - -class: Workflow -cwlVersion: v1.0 - -requirements: - InlineJavascriptRequirement: {} - StepInputExpressionRequirement: {} - SubworkflowFeatureRequirement: {} - MultipleInputFeatureRequirement: {} - ScatterFeatureRequirement: {} - -inputs: - facets_vcf: - type: File - secondaryFiles: - - .gz - - bam_normal: - type: File - - bam_tumor: - type: File - - tumor_sample_name: - type: string - -outputs: - - snp_pileup_out: - type: File - outputSource: do_snp_pileup/output_file - - facets_png: - type: File[]? - outputSource: do_facets/png_files - - facets_txt_purity: - type: File? - outputSource: do_facets/txt_files_purity - - facets_txt_hisens: - type: File? - outputSource: do_facets/txt_files_hisens - - facets_out_files: - type: File[]? - outputSource: do_facets/out_files - - facets_rdata: - type: File[]? - outputSource: do_facets/rdata_files - - facets_seg: - type: File[]? - outputSource: do_facets/seg_files - -steps: - do_snp_pileup: - run: ../CommandLineTools/snp-pileup_0.1.1/htstools-0.1.1.cwl - in: - vcf_file: facets_vcf - bam_normal: bam_normal - bam_tumor: bam_tumor - output_file: - valueFrom: ${ return inputs.bam_normal.basename.replace(".bam", "") + "_" + inputs.bam_tumor.basename.replace(".bam", "") + ".dat.gz"; } - out: [ output_file ] - - do_facets: - run: ../CommandLineTools/facets_1.5.6/facets.doFacets-1.5.6.cwl - in: - genome: - valueFrom: ${ return "hg19"; } - counts_file: do_snp_pileup/output_file - TAG: - valueFrom: ${ return inputs.counts_file.basename.replace(".dat.gz", ""); } - tumor_id: tumor_sample_name - directory: - valueFrom: ${ return "."; } - out: [ png_files, txt_files_purity, txt_files_hisens, out_files, rdata_files, seg_files ] diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..d62ed77c --- /dev/null +++ b/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '1.1.1' diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl index e9ac545e..0f31e4d4 100644 --- a/abra2_2.17/abra2_2.17.cwl +++ b/abra2_2.17/abra2_2.17.cwl @@ -184,7 +184,7 @@ requirements: ramMin: 60000 coresMin: 16 - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.1.0' + dockerPull: 'ghcr.io/msk-access/abra2:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/abra2_2.17/container/Dockerfile b/abra2_2.17/container/Dockerfile index 9f1bc82f..e7c64df9 100644 --- a/abra2_2.17/container/Dockerfile +++ b/abra2_2.17/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ - org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/" + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" @@ -37,4 +37,4 @@ RUN apt-get update && \ RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ - cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar \ No newline at end of file + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl index 8c9be47a..1af3c194 100644 --- a/abra2_2.19/abra2_2.19.cwl +++ b/abra2_2.19/abra2_2.19.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: abra2_2.19 + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_19 baseCommand: - java inputs: @@ -173,7 +174,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/abra2:0.2.0' + dockerPull: 'aphoid/abra2:2.19' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -193,6 +194,3 @@ requirements: - class: 'doap:Version' 'doap:name': abra2 'doap:revision': 2.19 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl new file mode 100644 index 00000000..e24cb107 --- /dev/null +++ b/abra2_2.22/abra2_2.22.cwl @@ -0,0 +1,246 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: abra2_2_22 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input_bam + type: + - File + - type: array + items: File + inputBinding: + position: 0 + prefix: '--in' + doc: Required list of input sam or bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: working_directory + type: string? + doc: Set the temp directory (overrides java.io.tmpdir) + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '--ref' + doc: Genome reference location + secondaryFiles: + - .fai + - id: targets + type: File + inputBinding: + position: 0 + prefix: '--targets' + - id: kmer_size + type: string? + inputBinding: + position: 0 + prefix: '--kmer' + doc: >- + Optional assembly kmer size(delimit with commas if multiple sizes + specified) + - id: maximum_average_depth + type: int? + inputBinding: + position: 0 + prefix: '--mad' + doc: >- + Regions with average depth exceeding this value will be downsampled + (default: 1000) + - id: soft_clip_contig + type: string? + inputBinding: + position: 0 + prefix: '--sc' + doc: >- + Soft clip contig args + [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len] + (default:16,13,80,15) + - id: maximum_mixmatch_rate + type: float? + inputBinding: + position: 0 + prefix: '--mmr' + doc: >- + Max allowed mismatch rate when mapping reads back to contigs (default: + 0.05) + - id: scoring_gap_alignments + type: string? + inputBinding: + position: 0 + prefix: '--sga' + doc: >- + Scoring used for contig alignments(match, + mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1) + - id: contig_anchor + type: string? + inputBinding: + position: 0 + prefix: '--ca' + doc: >- + Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge] + (default:10,2) + - id: window_size + type: string? + inputBinding: + position: 0 + prefix: '--ws' + doc: |- + Processing window size and overlap + (size,overlap) (default: 400,200) + - id: consensus_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--cons' + doc: Use positional consensus sequence when aligning high quality soft clipping + - id: output_bams + type: + - string + - type: array + items: string + inputBinding: + position: 0 + prefix: '--out' + doc: Required list of output sam or bam file (s) separated by comma + - id: ignore_bad_assembly + type: boolean? + inputBinding: + position: 0 + prefix: '--ignore-bad-assembly' + doc: Use this option to avoid parsing errors for corrupted assemblies + - id: bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--index' + doc: >- + Enable BAM index generation when outputting sorted alignments (may require + additonal memory) + - id: input_vcf + type: File? + inputBinding: + position: 0 + prefix: '--in-vcf' + doc: >- + VCF containing known (or suspected) variant sites. Very large files + should be avoided. + - id: no_edge_complex_indel + type: boolean? + inputBinding: + position: 0 + prefix: '--no-edge-ci' + doc: Prevent output of complex indels at read start or read end + - id: no_sort + type: boolean? + inputBinding: + position: 0 + prefix: '--nosort' + doc: Do not attempt to sort final output +outputs: + - id: abra_realigned_bam + type: + - 'null' + - File + - type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.output_bams + } + secondaryFiles: + - ^.bai +label: abra2_2.22 +arguments: + - position: 0 + valueFrom: |- + ${ + if (inputs.memory_per_job && inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead) { + + if (inputs.memory_per_job % 1000 == 0) { + + return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G" + } + else { + + return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G" + } + } + else if (!inputs.memory_per_job && inputs.memory_overhead) { + + return "-Xmx20G" + } + else { + + return "-Xmx20G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/abra2.jar + - position: 0 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } + - position: 0 + prefix: '--tmpdir' + valueFrom: |- + ${ + if(inputs.working_directory) + return inputs.working_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 60000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/abra2:2.22' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': abra2 + 'doap:revision': 2.22 diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile new file mode 100644 index 00000000..0ec95e41 --- /dev/null +++ b/abra2_2.22/container/Dockerfile @@ -0,0 +1,40 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG JAVA_VERSION=8 +ARG ABRA2_VERSION=2.22 + +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.version.abra2=${ABRA2_VERSION} \ + org.opencontainers.image.source="https://github.com/mozack/abra2/releases/" + +LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \ + chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \ + cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar diff --git a/abra2_2.22/example_inputs.yaml b/abra2_2.22/example_inputs.yaml new file mode 100644 index 00000000..928aafd8 --- /dev/null +++ b/abra2_2.22/example_inputs.yaml @@ -0,0 +1,30 @@ +bam_index: true +no_edge_complex_indel: true +consensus_sequence: +contig_anchor: +ignore_bad_assembly: +input_bam: + class: File + path: "path/to/alignment.bam" +input_vcf: +kmer_size: +maximum_average_depth: +maximum_mixmatch_rate: +memory_overhead: +memory_per_job: +no_sort: +number_of_threads: +output_bam: name_of_realigned_abra.bam +path_to_abra: +reference_fasta: + class: File + path: "/path/to/reference.fasta" +scoring_gap_alignments: +soft_clip_contig: +targets: + class: File + metadata: {} + path: "/path/to/target.bed" + secondaryFiles: [] +window_size: +working_directory: diff --git a/access_utils/0.1.1/README.md b/access_utils/0.1.1/README.md new file mode 100644 index 00000000..45bd9f8b --- /dev/null +++ b/access_utils/0.1.1/README.md @@ -0,0 +1,32 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| access_utils | 0.1.1 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner general_stats_parse.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner general_stats_parse.cwl -h +``` diff --git a/access_utils/0.1.1/example_inputs.yaml b/access_utils/0.1.1/example_inputs.yaml new file mode 100644 index 00000000..27fccad3 --- /dev/null +++ b/access_utils/0.1.1/example_inputs.yaml @@ -0,0 +1,2 @@ +dir: /path/to/sample_info_directory +samples-json: /path/to/sample_meta.json diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl new file mode 100644 index 00000000..5116e6f3 --- /dev/null +++ b/access_utils/0.1.1/general_stats_parse.cwl @@ -0,0 +1,67 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: general_stats_parse +baseCommand: + - general_stats_parse.py +arguments: + - --dir + - . +inputs: + - id: directory + type: Directory + doc: Directory containing results. + - id: samples-json + type: File + inputBinding: + prefix: '--samples-json' + doc: Sample JSON file. + - id: config + type: File? + inputBinding: + prefix: '--config' + doc: MultQC config file. +outputs: + - id: aggregate_parsed_stats + label: aggregate_parsed_stats + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = "aggregate_qc_stats"; + return self[0] + } +label: general_stats_parse +requirements: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1' + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.directory) + writable: true + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/athena/1.4.2/annotate_bed/annotate_bed.cwl b/athena/1.4.2/annotate_bed/annotate_bed.cwl new file mode 100755 index 00000000..78002b99 --- /dev/null +++ b/athena/1.4.2/annotate_bed/annotate_bed.cwl @@ -0,0 +1,98 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: annotate_bed +baseCommand: + - python + - /app/bin/annotate_bed.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + doc: 'worker thread number' + - id: panel_bed + type: File + inputBinding: + position: 0 + prefix: '-p' + doc: >- + Input panel bed file; must have ONLY the following 4 columns chromosome, + start position, end position, gene/transcript + - id: transcript_file + type: File + inputBinding: + position: 0 + prefix: '-t' + doc: >- + Transcript annotation file, contains required gene and exon information. + Must have ONLY the following 6 columns: + + chromosome, start, end, gene, transcript, exon + - id: coverage_file + type: File + inputBinding: + position: 0 + prefix: '-c' + doc: Per base coverage file (output from mosdepth or similar) + - id: chunk_size + type: int? + inputBinding: + position: 999 + prefix: '-s' + - id: output_name + type: string? + inputBinding: + position: 960 + prefix: '-n' + doc: >- + (optional) Prefix for naming output file, if not given will use name from + per base coverage file +outputs: + - id: annotated_bed + label: annotated_bed + type: File + outputBinding: + glob: '*.bed' +label: annotate_bed +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/athena:1.4.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/athena/1.4.2/annotate_bed/example_inputs.yaml b/athena/1.4.2/annotate_bed/example_inputs.yaml new file mode 100644 index 00000000..a770b0a5 --- /dev/null +++ b/athena/1.4.2/annotate_bed/example_inputs.yaml @@ -0,0 +1,5 @@ +panel_bed: {class: File, path: path/to/panel_bed.bed} +transcript_file: {class: File, path: path/to/transcript.bed} +coverage_file: {class: File, path: path/to/coverage.per-base.bed.gz} +chunk_size: 10000000 +output_name: 'prefix_name' diff --git a/athena/1.4.2/annotate_bed/example_inputs_juno.yaml b/athena/1.4.2/annotate_bed/example_inputs_juno.yaml new file mode 100644 index 00000000..c65dba0d --- /dev/null +++ b/athena/1.4.2/annotate_bed/example_inputs_juno.yaml @@ -0,0 +1,5 @@ +panel_bed: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/panel_bed_file_athena_CH_nodup.bed} +transcript_file: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/CH_transcript_pad3bp_athena_nodup.bed} +coverage_file: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/Myeloid200-1-05500HJ_P20.per-base.bed.gz} +chunk_size: 10000000 +output_name: 'ex1_prefix' diff --git a/athena/1.4.2/container/README.md b/athena/1.4.2/container/README.md new file mode 100644 index 00000000..f251d8f4 --- /dev/null +++ b/athena/1.4.2/container/README.md @@ -0,0 +1,3 @@ +Dockerfile avaliable via the athena repository: https://github.com/msk-access/athena/blob/master/Dockerfile + +Registry contaning image also housed with athena repository: https://github.com/msk-access/athena/pkgs/container/athena diff --git a/athena/1.4.2/coverage_report_single/coverage_report_single.cwl b/athena/1.4.2/coverage_report_single/coverage_report_single.cwl new file mode 100755 index 00000000..9411a023 --- /dev/null +++ b/athena/1.4.2/coverage_report_single/coverage_report_single.cwl @@ -0,0 +1,157 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: general_stats_parse +baseCommand: + - python + - /app/bin/coverage_report_single.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + doc: 'worker thread number' + - id: exon_stats + type: File + inputBinding: + position: 0 + prefix: '-e' + doc: per exon statistics file (from `coverage_stats_single.py`) + - id: gene_stats + type: File + inputBinding: + position: 0 + prefix: '-g' + doc: per gene statistics file (from `coverage_stats_single.py`) + - id: raw_coverage + type: File + inputBinding: + position: 0 + prefix: '-r' + doc: >- + annotated bed file with coverage data (generated from annotate_bed.sh / + bedtools intersect) + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: '-b' + doc: >- + Per-base coverage bed file from mosdepth. (Optional; if not submitted, + plots displaying global coverage per chromosome will not be displayed) + - id: snps + type: + - 'null' + - File + - type: array + items: File + inputBinding: + position: 999 + prefix: '-s' + doc: 'VCF(s) of known SNPs to check coverage of (optional; i.e. HGMD, ClinVar)' + - id: threshold + type: int? + inputBinding: + position: 0 + prefix: '-t' + doc: >- + threshold value defining sub-optimal coverage (optional; default if not + given: 20) + - id: sample_name + type: string? + inputBinding: + position: 0 + prefix: '-n' + - id: output + type: string? + inputBinding: + position: 0 + prefix: '-o' + doc: name for output report (optional; sample name will be used if not given) + - id: panel + type: File? + inputBinding: + position: 0 + prefix: '-p' + doc: >- + panel bed file used for initial annotation, name will be displayed in + summary of report (optional) + - id: limit + type: int? + inputBinding: + position: 0 + prefix: '-l' + doc: >- + number of genes at which to limit including full gene plots, large numbers + of genes may take a long time to generate the plots (optional) + - id: summary + type: boolean? + inputBinding: + position: 0 + prefix: '-m' + doc: >- + boolean flag to add clinical report summary text in summary section, + includes list of all genes with transcripts (optional; default False) +outputs: + - id: coverage_report_single + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '*_report.html' + } else { + return '*_report.html' + } + } +label: coverage_report_single +arguments: + - position: 0 + prefix: '--cores' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 6 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/athena:1.4.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/athena/1.4.2/coverage_report_single/example_inputs.yaml b/athena/1.4.2/coverage_report_single/example_inputs.yaml new file mode 100644 index 00000000..202a5647 --- /dev/null +++ b/athena/1.4.2/coverage_report_single/example_inputs.yaml @@ -0,0 +1,14 @@ +raw_coverage: {class: File, path: /path/to/annotated_bed/file.bed} +gene_stats: {class: File, path: /path/to/gene_stats.tsv} +exon_stats: {class: File, path: /path/to/exon_stats.tsv} +per_base_coverage: {class: File, path: /path/to/per_base/mosdepth/file.bed} +snps: + - {class: File, path: /path/to/vcfs/known1.vcf} + - {class: File, path: /path/to/vcfs/known2.vcf} +threshold: 20 +sample_name: 'report1' +output: 'report1' +panel: {class: File, path: path/to/panel_bed.bed} +limit: 20 +summary: true +cores: 4 diff --git a/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml b/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml new file mode 100644 index 00000000..0beae6b4 --- /dev/null +++ b/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml @@ -0,0 +1,11 @@ +raw_coverage: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_annotated.bed} +gene_stats: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_gene_stats.tsv} +exon_stats: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_exon_stats.tsv} +per_base_coverage: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/Myeloid200-1-05500HJ_P20.per-base.bed.gz} +panel: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/panel_bed_file_athena_CH_nodup.bed} +threshold: 20 +sample_name: 'report1' +output: 'report1' +limit: 20 +summary: true +cores: 4 diff --git a/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl b/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl new file mode 100755 index 00000000..7be30e2a --- /dev/null +++ b/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl @@ -0,0 +1,119 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: general_stats_parse +baseCommand: + - python + - /app/bin/coverage_stats_single.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + doc: 'worker thread number' + - id: file + type: File + inputBinding: + position: 0 + prefix: '--file' + doc: annotated bed file on which to generate report from + - id: build + type: File? + inputBinding: + position: 0 + prefix: '--build' + doc: >- + text file with build number used for alignment, output from mosdepth + (optional) chromosome, start, end, gene, transcript, exon + - id: outfile + type: string? + inputBinding: + position: 0 + prefix: '--outfile' + doc: >- + output file name prefix, if not given the input file name will be used as + the name prefix + - id: thresholds + type: 'int[]?' + inputBinding: + position: 999 + prefix: '--thresholds' + doc: >- + threshold values to calculate coverage for as comma seperated integers + (default: 10, 20, 30, 50, 100) + - id: output_name + type: string? + inputBinding: + position: 900 + prefix: '--n' + doc: >- + (optional) Prefix for naming output file, if not given will use name from + per base coverage file + - id: flagstat + type: string? + inputBinding: + position: 900 + prefix: '--flagstat' + doc: 'file for sample, required for generating run statistics (in development)' +outputs: + - id: exon_stats_output + label: exon_stats_output + type: File + outputBinding: + glob: '*_exon_stats.tsv' + - id: gene_stats_output + label: gene_stats_output + type: File + outputBinding: + glob: '*_gene_stats.tsv' +label: general_stats_parse +arguments: + - position: 0 + prefix: '--cores' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 6 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/athena:1.4.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charlk@mskcc.org' + 'foaf:name': Carmelina Charlambous + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/athena/1.4.2/coverage_stats_single/example_inputs.yaml b/athena/1.4.2/coverage_stats_single/example_inputs.yaml new file mode 100644 index 00000000..871cf071 --- /dev/null +++ b/athena/1.4.2/coverage_stats_single/example_inputs.yaml @@ -0,0 +1,5 @@ +file: {class: File, path: /path/to/annotated_bed/file.bed} +build: {class: File, path: /path/to/mosdepth/build/file.txt} +flagstat: {class: File, path: /path/to/flagstat/file.txt} +threshold: [10, 20, 30, 50, 100] +cores: 4 diff --git a/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml b/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml new file mode 100644 index 00000000..9dc935a3 --- /dev/null +++ b/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml @@ -0,0 +1,3 @@ +file: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_annotated.bed} +thresholds: [10, 20, 30, 50, 100] +cores: 4 diff --git a/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl new file mode 100644 index 00000000..60c2c313 --- /dev/null +++ b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl @@ -0,0 +1,82 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: bgzip +baseCommand: + - bgzip +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: stdout + type: boolean + inputBinding: + position: 0 + prefix: '-c' + doc: Stdandard output for bgzip + - id: input + type: File + inputBinding: + position: 10 + doc: input VCF file + - id: output_file_name + type: string? + doc: Name of the output file +outputs: + - id: zippedVcf + type: File? + outputBinding: + glob: |- + ${ + if (inputs.output_file_name) { + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.vcf/, '.vcf.gz') + } + } +label: bgzip +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.input) + - class: InlineJavascriptRequirement +stdout: |- + ${ + if (inputs.output_file_name) { + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.vcf/, '.vcf.gz') + } + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bgzip + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_concat_1.15.1.cwl b/bcftools_1.15.1/bcftools_concat_1.15.1.cwl new file mode 100644 index 00000000..f2bfc420 --- /dev/null +++ b/bcftools_1.15.1/bcftools_concat_1.15.1.cwl @@ -0,0 +1,82 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: bcftools_concat +baseCommand: + - bcftools + - concat +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: allow_overlaps + type: boolean? + inputBinding: + position: 0 + prefix: '-a' + doc: >- + First coordinate of the next file can precede last record of the current + file. + - id: output_name + type: string + inputBinding: + position: 0 + prefix: '-o' + doc: Output file name + - id: output_type + type: string? + inputBinding: + position: 99 + prefix: '-O' + doc: >- + compressed BCF (b), uncompressed BCF (u), compressed VCF (z), uncompressed + VCF (v) + - id: input + type: 'File[]' + inputBinding: + position: 100 + secondaryFiles: + - .tbi +outputs: + - id: concatenated_vcf + type: File + outputBinding: + glob: |- + ${ + return inputs.output_name + } +label: bcftools_concat +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bcftools concat + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_norm_1.15.1.cwl b/bcftools_1.15.1/bcftools_norm_1.15.1.cwl new file mode 100644 index 00000000..ecc24425 --- /dev/null +++ b/bcftools_1.15.1/bcftools_norm_1.15.1.cwl @@ -0,0 +1,101 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: norm +baseCommand: + - bcftools + - norm +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: check_ref + type: string? + inputBinding: + position: 99 + prefix: '--check-ref' + - id: multiallelics + type: string? + inputBinding: + position: 99 + prefix: '-m' + doc: use any + - id: output_type + type: string? + inputBinding: + position: 99 + prefix: '-O' + - id: output_name + type: string? + inputBinding: + position: 99 + prefix: '-o' + - id: input + type: File + inputBinding: + position: 100 + doc: input vcf file + secondaryFiles: + - .tbi + - id: fastaRef + type: File + inputBinding: + position: 99 + prefix: '-f' + secondaryFiles: + - .fai +outputs: + - id: normalized_vcf + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_name) { + return inputs.output_name + } else { + return inputs.input.basename.replace(/.vcf/, '_norm.vcf') + } + } +label: bcftools_norm +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InlineJavascriptRequirement +stdout: |- + ${ + if(inputs.output_name) { + return inputs.output_name + } else { + return inputs.input.basename.replace(/.vcf/, '_norm.vcf') + } + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bcftools norm + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_sort._1.15.1.cwl b/bcftools_1.15.1/bcftools_sort._1.15.1.cwl new file mode 100644 index 00000000..202fbcd2 --- /dev/null +++ b/bcftools_1.15.1/bcftools_sort._1.15.1.cwl @@ -0,0 +1,93 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: bcftools_sort +baseCommand: + - bcftools + - sort +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + inputBinding: + position: 0 + prefix: '--threads' + - id: output_name + type: string? + doc: Output file name + - id: output_type + type: string? + inputBinding: + position: 99 + prefix: '-O' + doc: >- + compressed BCF (b), uncompressed BCF (u), compressed VCF (z), uncompressed + VCF (v) + - id: input + type: File + inputBinding: + position: 100 + doc: input vcf files + secondaryFiles: + - .tbi +outputs: + - id: sorted_file + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_name) { + return inputs.output_name + } else { + return inputs.input.basename.replace(/.vcf/, '.sorted.vcf') + } + } +label: bcftools_sort +arguments: + - position: 0 + prefix: '-o' + valueFrom: |- + ${ + if(inputs.output_name) { + return inputs.output_name + } else { + return inputs.input.basename.replace(/.vcf/, '.sorted.vcf') + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InitialWorkDirRequirement + listing: + - $(inputs.input) + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bcftools sort + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl new file mode 100644 index 00000000..40e45df6 --- /dev/null +++ b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl @@ -0,0 +1,66 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: tabix +baseCommand: + - tabix +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: preset + type: string? + inputBinding: + position: 0 + prefix: '-p' + doc: 'input file type can be gff, bed, sam or vcf' + - id: input + type: File + inputBinding: + position: 99 + doc: Input bgziped file +outputs: + - id: tabixIndex + type: File? + outputBinding: + glob: $(inputs.input.basename) + secondaryFiles: + - .tbi +label: tabix +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1' + - class: InitialWorkDirRequirement + listing: + - $(inputs.input) + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': tabix + 'doap:revision': 1.15.1 diff --git a/bcftools_1.15.1/example_input_bgzip.yaml b/bcftools_1.15.1/example_input_bgzip.yaml new file mode 100644 index 00000000..3429f37d --- /dev/null +++ b/bcftools_1.15.1/example_input_bgzip.yaml @@ -0,0 +1,4 @@ +input: + class: File + path: /path/to/input.vcf +stdout: True diff --git a/bcftools_1.15.1/example_input_concat.yaml b/bcftools_1.15.1/example_input_concat.yaml new file mode 100644 index 00000000..36032292 --- /dev/null +++ b/bcftools_1.15.1/example_input_concat.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: >- + /path/input/vcf.sorted.vcf.gz + - class: File + path: >- + /path/input/vcf.sorted.vcf.gz +output_name: out_merged.vcf.gz +output_type: z +allow_overlaps: True diff --git a/bcftools_1.15.1/example_input_normvcf.yaml b/bcftools_1.15.1/example_input_normvcf.yaml new file mode 100644 index 00000000..8832efcb --- /dev/null +++ b/bcftools_1.15.1/example_input_normvcf.yaml @@ -0,0 +1,12 @@ +check-ref: s +fastaRef: + class: File + path: >- + /juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta +input: + class: File + path: /path/to/input.vcf.gz + secondaryFiles: [] +multiallelics: + +output-name: out_norm.vcf.gz +output-type: z diff --git a/bcftools_1.15.1/example_input_sort.yaml b/bcftools_1.15.1/example_input_sort.yaml new file mode 100644 index 00000000..bc77aeda --- /dev/null +++ b/bcftools_1.15.1/example_input_sort.yaml @@ -0,0 +1,5 @@ +input: + class: File + path: /path/to/input.vcf.gz +output_name: out.sorted.vcf.gz +output_type: z diff --git a/bcftools_1.15.1/example_input_tabix.yaml b/bcftools_1.15.1/example_input_tabix.yaml new file mode 100644 index 00000000..2128d691 --- /dev/null +++ b/bcftools_1.15.1/example_input_tabix.yaml @@ -0,0 +1,3 @@ +class: File + path: /path/to/input.vcf.gz +preset: vcf diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md new file mode 100644 index 00000000..770e8751 --- /dev/null +++ b/bcftools_1.6/README.md @@ -0,0 +1,69 @@ +# CWL and Dockerfile for running bcftools v1.6 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir bcftools_toil_log +> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --compact_PS Do not output PS tag at each site, only at the start + of a new phase set block. + --ligate Ligate phased VCFs by matching phase at overlapping + haplotypes + --list LIST Read the list of files from a file. + --min_PQ MIN_PQ Break phase set if phasing quality is lower than + [30] + --naive Concatenate BCF files without recompression + (dangerous, use with caution) + --no_version do not append version and command line to the header + --output OUTPUT Write output to a file [standard output] + --output_type OUTPUT_TYPE + b - compressed BCF, u - uncompressed BCF, z + - compressed VCF, v - uncompressed VCF [v] + --regions REGIONS Restrict to comma-separated list of regions + --regions_file REGIONS_FILE + Restrict to regions listed in a file + --remove_duplicates Alias for -d none + --rm_dups RM_DUPS Output duplicate records present in multiple + files only once - + --threads THREADS Number of extra output compression threads [0] + --vcf_files_csi VCF_FILES_CSI + Array of vcf files to be concatenated into one vcf + --vcf_files_tbi VCF_FILES_TBI + Array of vcf files to be concatenated into one vcf + +``` diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl new file mode 100644 index 00000000..98f4122c --- /dev/null +++ b/bcftools_1.6/bcftools_concat_1.6.cwl @@ -0,0 +1,180 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: bcftools_concat_v1_6 + +baseCommand: + - /usr/bin/bcftools + - concat + +doc: | + concatenate VCF/BCF files from the same set of samples + +inputs: + + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + + threads: + type: ["null", string] + doc: Number of extra output compression threads [0] + inputBinding: + prefix: --threads + + compact_PS: + type: ["null", boolean] + default: false + doc: Do not output PS tag at each site, only at the start of a new phase set block. + inputBinding: + prefix: --compact-PS + + remove_duplicates: + type: ["null", boolean] + default: false + doc: Alias for -d none + inputBinding: + prefix: --remove-duplicates + + ligate: + type: ["null", boolean] + default: false + doc: Ligate phased VCFs by matching phase at overlapping haplotypes + inputBinding: + prefix: --ligate + + output_type: + type: ["null", string] + doc: b - compressed BCF, u - uncompressed BCF, z - compressed VCF, v - uncompressed VCF [v] + inputBinding: + prefix: --output-type + + no_version: + type: ["null", boolean] + default: false + doc: do not append version and command line to the header + inputBinding: + prefix: --no-version + + naive: + type: ["null", boolean] + default: false + doc: Concatenate BCF files without recompression (dangerous, use with caution) + inputBinding: + prefix: --naive + + allow_overlaps: + type: ["null", boolean] + default: false + doc: First coordinate of the next file can precede last record of the current file. + inputBinding: + prefix: --allow-overlaps + + min_PQ: + type: ["null", string] + doc: Break phase set if phasing quality is lower than [30] + inputBinding: + prefix: --min-PQ + + regions_file: + type: ["null", string] + doc: Restrict to regions listed in a file + inputBinding: + prefix: --regions-file + + regions: + type: ["null", string] + doc: Restrict to comma-separated list of regions + inputBinding: + prefix: --regions + + rm_dups: + type: ["null", string] + doc: Output duplicate records present in multiple files only once - + inputBinding: + prefix: --rm-dups + + output: + type: string + doc: Write output to a file [standard output] + default: "bcftools_concat.vcf" + inputBinding: + prefix: --output + + list: + type: ['null', string] + doc: Read the list of files from a file. + inputBinding: + prefix: --file-list + + vcf_files_tbi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - .tbi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + + vcf_files_csi: + type: + - 'null' + - type: array + items: File + secondaryFiles: + - ^.bcf.csi + doc: Array of vcf files to be concatenated into one vcf + inputBinding: + position: 1 + +outputs: + bcftools_concat_vcf_output_file: + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) + return inputs.output; + return null; + } + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 1 + DockerRequirement: + dockerPull: ghcr.io/msk-access/bcftools:1.6 + + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: bcftools + doap:revision: 1.6 diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml new file mode 100644 index 00000000..0512753d --- /dev/null +++ b/bcftools_1.6/example_inputs.yaml @@ -0,0 +1,9 @@ +vcf_files_tbi: + class: File + path: /path/to/vcf/and/tbi/files +tumor_sample_name: tumor_sample_name +normal_sample_name: normal_sample_name +allow_overlaps: allow_overlaps_boolean +rm_dups: rm_dups_str +output_type: output_type_str +output: output_file_name diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md deleted file mode 100644 index 75de8a57..00000000 --- a/bedtools_genomecov_v2.28.0_cv2/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# CWL and Dockerfile for running Bedtools GenomeCov - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help - -usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT - --output_file_name OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--option_bedgraph] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT The input file can be in BAM format (Note: BAM must be - sorted by position) - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --option_bedgraph option flag parameter to choose output file format. - -bg refers to bedgraph format \ No newline at end of file diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl index 43f392dd..396931b9 100644 --- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl +++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl @@ -39,7 +39,7 @@ inputs: option flag parameter to choose output file format. -bg refers to bedgraph format outputs: - - id: output_file + - id: bedtools_genomecove_bedgraph type: File outputBinding: glob: |- @@ -55,7 +55,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md deleted file mode 100644 index 960664db..00000000 --- a/bedtools_merge_v2.28.0_cv2/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# CWL and Dockerfile for running Bedtools Merge - -## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) - -| Tool | Version | Location | -|--- |--- |--- | -| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 | - -[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help - -usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name - OUTPUT_FILE_NAME - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [--distance_between_features DISTANCE_BETWEEN_FEATURES] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BEDgraph format file generated from Bedtools Genomecov - module - --output_file_name OUTPUT_FILE_NAME - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS - --distance_between_features DISTANCE_BETWEEN_FEATURES - Maximum distance between features allowed for features - to be merged. \ No newline at end of file diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl index 21e7cc7b..e6259ff3 100644 --- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl +++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl @@ -33,8 +33,8 @@ inputs: shellQuote: false doc: Maximum distance between features allowed for features to be merged. outputs: - - id: output_file - type: File? + - id: bedtools_merge_bed + type: File outputBinding: glob: |- ${ @@ -49,7 +49,7 @@ requirements: ramMin: 20000 coresMin: 1 - class: DockerRequirement - dockerPull: 'biocontainers/bedtools:v2.28.0_cv2' + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' - class: InlineJavascriptRequirement stdout: |- ${ diff --git a/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl b/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl new file mode 100644 index 00000000..8b6c2ba9 --- /dev/null +++ b/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl @@ -0,0 +1,77 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: sortbed +baseCommand: + - sortBed +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-i' + doc: 'input file can be either bed, gff or vcf' + - id: output_file_name + type: string? + doc: Name of the output file +outputs: + - id: sorted_file + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) { + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.vcf/, '.sorted.vcf') + } + } +label: sortBed +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2' + - class: InitialWorkDirRequirement + listing: + - $(inputs.input) + - class: InlineJavascriptRequirement +stdout: |- + ${ + if(inputs.output_file_name) { + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.vcf/, '.sorted.vcf') + } + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sortbed + 'doap:revision': 2.28.0 diff --git a/bedtools_sortBed_v2.28.0_cv2/example_input.yaml b/bedtools_sortBed_v2.28.0_cv2/example_input.yaml new file mode 100644 index 00000000..e51645b1 --- /dev/null +++ b/bedtools_sortBed_v2.28.0_cv2/example_input.yaml @@ -0,0 +1 @@ +input: /path/to/the/file/to/be/sorted diff --git a/biometrics_extract/0.2.13/biometrics_extract.cwl b/biometrics_extract/0.2.13/biometrics_extract.cwl new file mode 100644 index 00000000..19ee1121 --- /dev/null +++ b/biometrics_extract/0.2.13/biometrics_extract.cwl @@ -0,0 +1,139 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract_0_2_13 +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: File + inputBinding: + position: 0 + prefix: '--sample-bam' + doc: BAM file. + secondaryFiles: + - ^.bai + - id: sample_sex + type: string? + inputBinding: + position: 0 + prefix: '--sample-sex' + doc: Expected sample sex (i.e. M or F). + - id: sample_group + type: string? + inputBinding: + position: 0 + prefix: '--sample-group' + doc: The sample group (e.g. the sample patient ID). + - id: sample_name + type: string + inputBinding: + position: 0 + prefix: '--sample-name' + doc: >- + Sample name. If not specified, sample name is automatically figured out + from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: '--fafile' + doc: Path to reference fasta. + secondaryFiles: + - ^.fasta.fai + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: '--vcf' + doc: VCF file containing the SNPs to be queried. + - id: bed_file + type: File? + inputBinding: + position: 0 + prefix: '--bed' + doc: BED file containing the intervals to be queried. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 1 + id: min_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-mapping-quality' + doc: Minimum mapping quality of reads to be used for pileup. + - default: 1 + id: min_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--min-base-quality' + doc: Minimum base quality of reads to be used for pileup. + - default: 10 + id: min_coverage + type: int? + inputBinding: + position: 0 + prefix: '--min-coverage' + doc: Minimum coverage to count a site. + - default: 0.1 + id: min_homozygous_thresh + type: float? + inputBinding: + position: 0 + prefix: '--min-homozygous-thresh' + doc: Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: '--default-genotype' + doc: Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: File + outputBinding: + glob: |- + ${ + if (inputs.database) { + return inputs.database + '/' + inputs.sample_name + '.pickle'; + } else { + return inputs.sample_name + '.pickle'; + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_extract/0.2.13/example_inputs.yaml b/biometrics_extract/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..566b496a --- /dev/null +++ b/biometrics_extract/0.2.13/example_inputs.yaml @@ -0,0 +1,24 @@ +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" +fafile: + class: File + path: /path/to/fasta +sample_bam: + - class: File + path: /path/to/bam +bed_file: null +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_extract/0.2.5/biometrics_extract.cwl b/biometrics_extract/0.2.5/biometrics_extract.cwl new file mode 100644 index 00000000..0abf2aec --- /dev/null +++ b/biometrics_extract/0.2.5/biometrics_extract.cwl @@ -0,0 +1,172 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_extract +baseCommand: + - biometrics + - extract +inputs: + - id: sample_bam + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --sample-bam + secondaryFiles: + - ^.bai + doc: >- + BAM file. + - id: sample_type + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-type + doc: >- + Sample types: Normal or Tumor. + - id: sample_sex + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-sex + doc: >- + Expected sample sex (i.e. M or F). + - id: sample_group + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-group + doc: >- + The sample group (e.g. the sample patient ID). + - id: sample_name + type: + - type: array + items: string + inputBinding: + position: 0 + prefix: --sample-name + doc: >- + Sample name. If not specified, sample name is automatically figured out from the BAM file. + - id: fafile + type: File + inputBinding: + position: 0 + prefix: --fafile + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta. + - id: vcf_file + type: File + inputBinding: + position: 0 + prefix: --vcf + doc: >- + VCF file containing the SNPs to be queried. + - id: bed_file + type: File? + inputBinding: + position: 0 + prefix: --bed + doc: >- + BED file containing the intervals to be queried. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: min_mapping_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-mapping-quality + doc: >- + Minimum mapping quality of reads to be used for pileup. + - id: min_base_quality + type: int? + default: 1 + inputBinding: + position: 0 + prefix: --min-base-quality + doc: >- + Minimum base quality of reads to be used for pileup. + - id: min_coverage + type: int? + default: 10 + inputBinding: + position: 0 + prefix: --min-coverage + doc: >- + Minimum coverage to count a site. + - id: min_homozygous_thresh + type: float? + default: 0.1 + inputBinding: + position: 0 + prefix: --min-homozygous-thresh + doc: >- + Minimum threshold to define homozygous. + - id: default_genotype + type: string? + inputBinding: + position: 0 + prefix: --default-genotype + doc: >- + Default genotype if coverage is too low (options are Het or Hom). +outputs: + - id: biometrics_extract_pickle + type: + type: array + items: File + outputBinding: + glob: |- + ${ + return inputs.sample_name.map(val => { + if (inputs.database) { + return inputs.database + '/' + val + '.pk'; + } else { + return val + '.pk'; + } + }); + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_extract/0.2.5/example_inputs.yaml b/biometrics_extract/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..566b496a --- /dev/null +++ b/biometrics_extract/0.2.5/example_inputs.yaml @@ -0,0 +1,24 @@ +sample_type: + - "Normal" +sample_sex: + - "M" +sample_name: + - "test" +sample_group: + - "test" +fafile: + class: File + path: /path/to/fasta +sample_bam: + - class: File + path: /path/to/bam +bed_file: null +vcf_file: + class: File + path: /path/to/vcf +database: null +min_mapping_quality: null +min_base_quality: null +min_coverage: null +min_homozygous_thresh: null +default_genotype: null diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md new file mode 100644 index 00000000..5687fef3 --- /dev/null +++ b/biometrics_extract/README.md @@ -0,0 +1,69 @@ +# CWL for running biometrics extract tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_extract.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_extract.cwl [-h] [--sample_bam SAMPLE_BAM] + [--sample_type SAMPLE_TYPE] + [--sample_sex SAMPLE_SEX] + [--sample_group SAMPLE_GROUP] + [--sample_name SAMPLE_NAME] --fafile + FAFILE --vcf_file VCF_FILE --bed_file + BED_FILE --database DATABASE + [--min_mapping_quality MIN_MAPPING_QUALITY] + [--min_base_quality MIN_BASE_QUALITY] + [--min_coverage MIN_COVERAGE] + [--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH] + [--default_genotype DEFAULT_GENOTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --sample_bam SAMPLE_BAM + BAM file. + --sample_type SAMPLE_TYPE + Sample types: Normal or Tumor. + --sample_sex SAMPLE_SEX + Expected sample sex (i.e. M or F). + --sample_group SAMPLE_GROUP + The sample group (e.g. the sample patient ID). + --sample_name SAMPLE_NAME + Sample name. If not specified, sample name is + automatically figured out from the BAM file. + --fafile FAFILE Path to reference fasta. + --vcf_file VCF_FILE VCF file containing the SNPs to be queried. + --bed_file BED_FILE BED file containing the intervals to be queried. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --min_mapping_quality MIN_MAPPING_QUALITY + Minimum mapping quality of reads to be used for + pileup. + --min_base_quality MIN_BASE_QUALITY + Minimum base quality of reads to be used for pileup. + --min_coverage MIN_COVERAGE + Minimum coverage to count a site. + --min_homozygous_thresh MIN_HOMOZYGOUS_THRESH + Minimum threshold to define homozygous. + --default_genotype DEFAULT_GENOTYPE + Default genotype if coverage is too low (options are + Het or Hom). +``` diff --git a/biometrics_genotype/0.2.13/biometrics_genotype.cwl b/biometrics_genotype/0.2.13/biometrics_genotype.cwl new file mode 100644 index 00000000..d4d1449a --- /dev/null +++ b/biometrics_genotype/0.2.13/biometrics_genotype.cwl @@ -0,0 +1,149 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_genotype_0_2_13 +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + type: array + items: File + inputBinding: + position: 0 + prefix: '--input' + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.05 + id: discordance_threshold + type: float? + inputBinding: + position: 0 + prefix: '--discordance-threshold' + doc: >- + Discordance values less than this are regarded as matching samples. + (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. + - default: 2 + id: threads + type: int? + inputBinding: + position: 0 + prefix: '--threads' + doc: Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_genotype/0.2.13/example_inputs.yaml b/biometrics_genotype/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..0bc68d94 --- /dev/null +++ b/biometrics_genotype/0.2.13/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_genotype/0.2.5/biometrics_genotype.cwl b/biometrics_genotype/0.2.5/biometrics_genotype.cwl new file mode 100644 index 00000000..51285328 --- /dev/null +++ b/biometrics_genotype/0.2.5/biometrics_genotype.cwl @@ -0,0 +1,145 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_genotype +baseCommand: + - biometrics + - genotype +inputs: + - id: input + type: + - type: array + items: File + inputBinding: + position: 0 + prefix: --input + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: discordance_threshold + type: float? + default: 0.05 + inputBinding: + position: 0 + prefix: --discordance-threshold + doc: >- + Discordance values less than this are regarded as matching samples. (default: 0.05) + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. + - id: threads + type: int? + default: 2 + inputBinding: + position: 0 + prefix: --threads + doc: >- + Number of threads to use. +outputs: + - id: biometrics_genotype_comparisons + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_comparison.csv' + } else { + return 'genotype_comparison.csv' + } + } + - id: biometrics_genotype_cluster_input + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_input.csv' + } else { + return 'genotype_clusters_input.csv' + } + } + - id: biometrics_genotype_cluster_input_database + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_genotype_clusters_database.csv' + } else { + return 'genotype_clusters_database.csv' + } + } + - id: biometrics_genotype_plot_input + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_input.html' + } + - id: biometrics_genotype_plot_input_database + type: File? + outputBinding: + glob: |- + ${ + return 'genotype_comparison_database.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_genotype/0.2.5/example_inputs.yaml b/biometrics_genotype/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..0bc68d94 --- /dev/null +++ b/biometrics_genotype/0.2.5/example_inputs.yaml @@ -0,0 +1,12 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +prefix: 'test' +outdir: null +plot: true +no_db_comparison: false +threads: null +discordance_threshold: null diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md new file mode 100644 index 00000000..fd398b99 --- /dev/null +++ b/biometrics_genotype/README.md @@ -0,0 +1,53 @@ +# CWL for running biometrics genotype tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_genotype.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_genotype.cwl [-h] --input INPUT [--database DATABASE] + [--discordance_threshold DISCORDANCE_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --discordance_threshold DISCORDANCE_THRESHOLD + Discordance values less than this are regarded as + matching samples. (default: 0.05) + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. + --threads THREADS Number of threads to use. +``` diff --git a/biometrics_major/0.2.13/biometrics_major.cwl b/biometrics_major/0.2.13/biometrics_major.cwl new file mode 100644 index 00000000..b5a42a9e --- /dev/null +++ b/biometrics_major/0.2.13/biometrics_major.cwl @@ -0,0 +1,123 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major_0_2_13 +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.6 + id: major_threshold + type: float? + inputBinding: + position: 0 + prefix: '--major-threshold' + doc: Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_major/0.2.13/example_inputs.yaml b/biometrics_major/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..da03de55 --- /dev/null +++ b/biometrics_major/0.2.13/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_major/0.2.5/biometrics_major.cwl b/biometrics_major/0.2.5/biometrics_major.cwl new file mode 100644 index 00000000..217c9d96 --- /dev/null +++ b/biometrics_major/0.2.5/biometrics_major.cwl @@ -0,0 +1,120 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_major +baseCommand: + - biometrics + - major +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: major_threshold + type: float? + default: 0.6 + inputBinding: + position: 0 + prefix: --major-threshold + doc: >- + Major contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_major_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.csv' + } else { + return 'major_contamination.csv' + } + } + - id: biometrics_major_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_major_contamination.json' + } else { + return 'major_contamination.json' + } + } + - id: biometrics_major_plot + type: File? + outputBinding: + glob: |- + ${ + return 'major_contamination.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_major/0.2.5/example_inputs.yaml b/biometrics_major/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..da03de55 --- /dev/null +++ b/biometrics_major/0.2.5/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +minor_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_major/README.md b/biometrics_major/README.md new file mode 100644 index 00000000..10fa476e --- /dev/null +++ b/biometrics_major/README.md @@ -0,0 +1,51 @@ +# CWL for running biometrics major tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_major.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_major.cwl [-h] --input INPUT [--database DATABASE] + [--major_threshold MAJOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --major_threshold MAJOR_THRESHOLD + Major contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_minor/0.2.13/biometrics_minor.cwl b/biometrics_minor/0.2.13/biometrics_minor.cwl new file mode 100644 index 00000000..2535eb6b --- /dev/null +++ b/biometrics_minor/0.2.13/biometrics_minor.cwl @@ -0,0 +1,130 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor_0_2_13 +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 0.002 + id: minor_threshold + type: float? + inputBinding: + position: 0 + prefix: '--minor-threshold' + doc: Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: '--plot' + doc: Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_minor/0.2.13/example_inputs.yaml b/biometrics_minor/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..bddb4c72 --- /dev/null +++ b/biometrics_minor/0.2.13/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor/0.2.5/biometrics_minor.cwl b/biometrics_minor/0.2.5/biometrics_minor.cwl new file mode 100644 index 00000000..dc0410cb --- /dev/null +++ b/biometrics_minor/0.2.5/biometrics_minor.cwl @@ -0,0 +1,127 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_minor +baseCommand: + - biometrics + - minor +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: minor_threshold + type: float? + default: 0.002 + inputBinding: + position: 0 + prefix: --minor-threshold + doc: >- + Minor contamination threshold for bad sample. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: plot + type: boolean? + inputBinding: + position: 0 + prefix: --plot + doc: >- + Also output plots of the data. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_minor_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.csv' + } else { + return 'minor_contamination.csv' + } + } + - id: biometrics_minor_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_minor_contamination.json' + } else { + return 'minor_contamination.json' + } + } + - id: biometrics_minor_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination.html' + } + - id: biometrics_minor_sites_plot + type: File? + outputBinding: + glob: |- + ${ + return 'minor_contamination_sites.html' + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_minor/0.2.5/example_inputs.yaml b/biometrics_minor/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..bddb4c72 --- /dev/null +++ b/biometrics_minor/0.2.5/example_inputs.yaml @@ -0,0 +1,11 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +major_threshold: null +prefix: null +plot: true +json: true +no_db_comparison: null diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md new file mode 100644 index 00000000..af94ea40 --- /dev/null +++ b/biometrics_minor/README.md @@ -0,0 +1,51 @@ +# CWL for running biometrics minor tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_minor.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_minor.cwl [-h] --input INPUT [--database DATABASE] + [--minor_threshold MINOR_THRESHOLD] + [--prefix PREFIX] [--plot] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --minor_threshold MINOR_THRESHOLD + Minor contamination threshold for bad sample. + --prefix PREFIX Output file prefix. + --plot Also output plots of the data. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl new file mode 100644 index 00000000..e8755e55 --- /dev/null +++ b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl @@ -0,0 +1,110 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch_0_2_13 +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: '--input' + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample + information (one per line). For example: + sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a + '*.pk' file that was produced by the 'extract' tool. (3) Name of the + sample to analyze; this assumes there is a file named '{sample_name}.pk' + in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: '--database' + doc: >- + Directory to store the intermediate files after running the extraction + step. + - default: 50 + id: coverage_threshold + type: int? + inputBinding: + position: 0 + prefix: '--coverage-threshold' + doc: Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: '--prefix' + doc: Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: '--json' + doc: Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: '--no-db-compare' + doc: >- + Do not compare the sample(s) you provided to all samples in the database, + only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.13 diff --git a/biometrics_sexmismatch/0.2.13/example_inputs.yaml b/biometrics_sexmismatch/0.2.13/example_inputs.yaml new file mode 100644 index 00000000..60832e43 --- /dev/null +++ b/biometrics_sexmismatch/0.2.13/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl new file mode 100644 index 00000000..bae28a19 --- /dev/null +++ b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl @@ -0,0 +1,106 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: biometrics_sexmismatch +baseCommand: + - biometrics + - sexmismatch +inputs: + - id: input + type: + type: array + items: File + inputBinding: + prefix: --input + inputBinding: + position: 0 + doc: >- + Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once. + - id: database + type: string? + inputBinding: + position: 0 + prefix: --database + doc: >- + Directory to store the intermediate files after running the extraction step. + - id: coverage_threshold + type: int? + default: 50 + inputBinding: + position: 0 + prefix: --coverage-threshold + doc: >- + Samples with Y chromosome above this value will be considered male. + - id: prefix + type: string? + inputBinding: + position: 0 + prefix: --prefix + doc: >- + Output file prefix. + - id: json + type: boolean? + inputBinding: + position: 0 + prefix: --json + doc: >- + Also output data in JSON format. + - id: no_db_comparison + type: boolean? + inputBinding: + position: 0 + prefix: --no-db-compare + doc: >- + Do not compare the sample(s) you provided to all samples in the database, only compare them with each other. +outputs: + - id: biometrics_sexmismatch_csv + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.csv' + } else { + return 'sex_mismatch.csv' + } + } + - id: biometrics_sexmismatch_json + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '_sex_mismatch.json' + } else { + return 'sex_mismatch.json' + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': biometrics + 'doap:revision': 0.2.5 diff --git a/biometrics_sexmismatch/0.2.5/example_inputs.yaml b/biometrics_sexmismatch/0.2.5/example_inputs.yaml new file mode 100644 index 00000000..60832e43 --- /dev/null +++ b/biometrics_sexmismatch/0.2.5/example_inputs.yaml @@ -0,0 +1,10 @@ +input: + - class: File + path: "../biometrics_extract_0.2.5/test.pk" + - class: File + path: "../biometrics_extract_0.2.5/test2.pk" +database: null +coverage_threshold: null +prefix: null +json: true +no_db_comparison: null diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md new file mode 100644 index 00000000..81b02145 --- /dev/null +++ b/biometrics_sexmismatch/README.md @@ -0,0 +1,52 @@ +# CWL for running biometrics sexmismatch tool. + +| Tool | Latest version | Location | +|--- |--- |--- | +| biometrics | 0.2.12 | | + +The python package source code and Docker file are located on GitHub. + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner biometrics_sexmismatch.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: biometrics_sexmismatch.cwl [-h] --input INPUT + [--database DATABASE] + [--coverage_threshold COVERAGE_THRESHOLD] + [--prefix PREFIX] [--json] + [--no_db_comparison] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Can be one of three types: (1) path to a CSV file + containing sample information (one per line). For + example: sample_name,sample_bam,sample_type,sample_sex + ,sample_group. (2) Path to a '*.pk' file that was + produced by the 'extract' tool. (3) Name of the sample + to analyze; this assumes there is a file named + '{sample_name}.pk' in your database directory. Can be + specified more than once. + --database DATABASE Directory to store the intermediate files after + running the extraction step. + --coverage_threshold COVERAGE_THRESHOLD + Samples with Y chromosome above this value will be + considered male. + --prefix PREFIX Output file prefix. + --json Also output data in JSON format. + --no_db_comparison Do not compare the sample(s) you provided to all + samples in the database, only compare them with each + other. +``` diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md new file mode 100644 index 00000000..fd6785d6 --- /dev/null +++ b/bwa_mem_0.7.17/README.md @@ -0,0 +1,136 @@ +# CWL and Dockerfile for running BWA MEM + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 | + +[![](https://images.microbadger.com/badges/version/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskaccess/bwa_mem_0.7.17.svg)](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: bwa_mem_0.7.17.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --reads + READS --reference REFERENCE [-A A] [-B B] [-C] + [-E E] [-L L] [-M] [-O O] [-P] [-S] [-T T] [-U U] + [-a] [-c C] [-d D] [-k K] [-K K] [--output OUTPUT] + [-p] [-r R] [-v V] [-w W] [-y Y] [-D D] [-W W] + [-m M] [-e] [-x X] [-j J] [--he HE] [-V] [-Y] [-I I] + [-R R] [--sample_id SAMPLE_ID] [--lane_id LANE_ID] + [--platform PLATFORM] + [--platform_unit PLATFORM_UNIT] + [--center_name CENTER_NAME] + [--library_id LIBRARY_ID] + [job_order] + +bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] +[-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] +[-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] +db.prefix reads.fq [mates.fq] Align 70bp-1Mbp query sequences with the BWA-MEM +algorithm. Briefly, the algorithm works by seeding alignments with maximal +exact matches (MEMs) and then extending seeds with the affine-gap Smith- +Waterman algorithm (SW). If mates.fq file is absent and option -p is not set, +this command regards input reads are single-end. If mates.fq is present, this +command assumes the i-th read in reads.fq and the i-th read in mates.fq +constitute a read pair. If -p is used, the command assumes the 2i-th and the +(2i+1)-th read in reads.fq constitute a read pair (such input file is said to +be interleaved). In this case, mates.fq is ignored. In the paired-end mode, +the mem command will infer the read orientation and the insert size +distribution from a batch of reads. The BWA-MEM algorithm performs local +alignment. It may produce multiple primary alignments for different part of a +query sequence. This is a crucial feature for long sequences. However, some +tools such as Picard’s markDuplicates does not work with split alignments. One +may consider to use option -M to flag shorter split hits as secondary. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --reads READS + --reference REFERENCE + -A A score for a sequence match, which scales options + -TdBOELU unless overridden [1] + -B B penalty for a mismatch [4] + -C append FASTA/FASTQ comment to SAM output + -E E gap extension penalty; a gap of size k cost '{-O} + + {-E}*k' [1,1] + -L L penalty for 5'- and 3'-end clipping [5,5] + -M + -O O gap open penalties for deletions and insertions [6,6] + -P skip pairing; mate rescue performed unless -S also in + use + -S skip mate rescue + -T T minimum score to output [30] + -U U penalty for an unpaired read pair [17] + -a output all alignments for SE or unpaired PE + -c C skip seeds with more than INT occurrences [500] + -d D off-diagonal X-dropoff [100] + -k K minimum seed length [19] + -K K process INT input bases in each batch regardless of + nThreads (for reproducibility) [] + --output OUTPUT + -p smart pairing (ignoring in2.fq) + -r R look for internal seeds inside a seed longer than {-k} + * FLOAT [1.5] + -v V verbosity level: 1=error, 2=warning, 3=message, + 4+=debugging [3] + -w W band width for banded alignment [100] + -y Y seed occurrence for the 3rd round seeding [20] + -D D drop chains shorter than FLOAT fraction of the longest + overlapping chain [0.50] + -W W discard a chain if seeded bases shorter than INT [0] + -m M perform at most INT rounds of mate rescues for each + read [50] + -e + -x X read type. Setting -x changes multiple parameters + unless overridden [null] pacbio: -k17 -W40 -r10 -A1 + -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: -k14 -W20 + -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to + ref) intractg: -B9 -O16 -L5 (intra-species contigs to + ref) + -j J treat ALT contigs as part of the primary assembly + (i.e. ignore .alt file) + --he HE if there are 80% of the max + score, output all in XA [5,200] + -V output the reference FASTA header in the XR tag + -Y use soft clipping for supplementary alignments + -I I + -R R STR read group header line such as '@RG\tID -foo\tSM + -bar' [null] + --sample_id SAMPLE_ID + --lane_id LANE_ID + --platform PLATFORM + --platform_unit PLATFORM_UNIT + --center_name CENTER_NAME + --library_id LIBRARY_ID +``` diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl new file mode 100644 index 00000000..5e7e55dd --- /dev/null +++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl @@ -0,0 +1,351 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - bwa + - mem +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: reads + type: 'File[]' + inputBinding: + position: 3 + - id: reference + type: File + inputBinding: + position: 2 + secondaryFiles: + - .amb + - .ann + - .bwt + - .pac + - .sa + - .fai + - id: A + type: int? + inputBinding: + position: 0 + prefix: '-A' + doc: >- + score for a sequence match, which scales options -TdBOELU unless + overridden [1] + - id: B + type: int? + inputBinding: + position: 0 + prefix: '-B' + doc: 'penalty for a mismatch [4]' + - id: C + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + doc: append FASTA/FASTQ comment to SAM output + - id: E + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-E' + itemSeparator: ',' + doc: 'gap extension penalty; a gap of size k cost ''{-O} + {-E}*k'' [1,1]' + - id: L + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-L' + itemSeparator: ',' + doc: 'penalty for 5''- and 3''-end clipping [5,5]' + - id: M + type: boolean? + inputBinding: + position: 0 + prefix: '-M' + - id: O + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-O' + itemSeparator: ',' + doc: 'gap open penalties for deletions and insertions [6,6]' + - id: P + type: boolean? + inputBinding: + position: 0 + prefix: '-P' + doc: skip pairing; mate rescue performed unless -S also in use + - id: S + type: boolean? + inputBinding: + position: 0 + prefix: '-S' + doc: skip mate rescue + - id: T + type: int? + inputBinding: + position: 0 + prefix: '-T' + doc: 'minimum score to output [30]' + - id: U + type: int? + inputBinding: + position: 0 + prefix: '-U' + doc: 'penalty for an unpaired read pair [17]' + - id: a + type: boolean? + inputBinding: + position: 0 + prefix: '-a' + doc: output all alignments for SE or unpaired PE + - id: c + type: int? + inputBinding: + position: 0 + prefix: '-c' + doc: 'skip seeds with more than INT occurrences [500]' + - id: d + type: int? + inputBinding: + position: 0 + prefix: '-d' + doc: 'off-diagonal X-dropoff [100]' + - id: k + type: int? + inputBinding: + position: 0 + prefix: '-k' + doc: 'minimum seed length [19]' + - id: K + type: int? + inputBinding: + position: 0 + prefix: '-K' + doc: >- + process INT input bases in each batch regardless of nThreads (for + reproducibility) [] + - id: output + type: string? + - id: p + type: boolean? + inputBinding: + position: 0 + prefix: '-p' + doc: smart pairing (ignoring in2.fq) + - id: r + type: float? + inputBinding: + position: 0 + prefix: '-r' + doc: 'look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]' + - id: v + type: int? + inputBinding: + position: 0 + prefix: '-v' + doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]' + - id: w + type: int? + inputBinding: + position: 0 + prefix: '-w' + doc: 'band width for banded alignment [100]' + - id: 'y' + type: int? + inputBinding: + position: 0 + prefix: '-y' + doc: 'seed occurrence for the 3rd round seeding [20]' + - id: D + type: float? + inputBinding: + position: 0 + prefix: '-D' + doc: >- + drop chains shorter than FLOAT fraction of the longest overlapping chain + [0.50] + - id: W + type: int? + inputBinding: + position: 0 + prefix: '-W' + doc: 'discard a chain if seeded bases shorter than INT [0]' + - id: m + type: int? + inputBinding: + position: 0 + prefix: '-m' + doc: 'perform at most INT rounds of mate rescues for each read [50]' + - id: e + type: boolean? + inputBinding: + position: 0 + prefix: '-e' + - id: x + type: string? + inputBinding: + position: 0 + prefix: '-x' + doc: >- + read type. Setting -x changes multiple parameters unless overridden [null] + pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: + -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) + intractg: -B9 -O16 -L5 (intra-species contigs to ref) + - id: H + type: boolean? + inputBinding: + position: 0 + prefix: '-H' + doc: >- + Use hard clipping ’H’ in the SAM output. This option may dramatically + reduce the redundancy of output when mapping long contig or BAC sequences + - id: j + type: File? + inputBinding: + position: 0 + prefix: '-j' + doc: >- + treat ALT contigs as part of the primary assembly (i.e. ignore + .alt file) + - id: he + type: 'int[]?' + inputBinding: + position: 0 + prefix: '-h' + itemSeparator: ',' + doc: >- + if there are 80% of the max score, output all in XA + [5,200] + - id: V + type: boolean? + inputBinding: + position: 0 + prefix: '-V' + doc: output the reference FASTA header in the XR tag + - id: 'Y' + type: boolean? + inputBinding: + position: 0 + prefix: '-Y' + doc: use soft clipping for supplementary alignments + - id: I + type: string? + inputBinding: + position: 0 + prefix: '-M' + - id: R + type: string? + doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]' + - id: sample_id + type: string? + - id: lane_id + type: string? + - id: platform + type: string? + - id: platform_unit + type: string? + - id: center_name + type: string? + - id: library_id + type: string? +outputs: + - id: bwa_mem_output_sam + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } +doc: >- + bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff] + [-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen] + [-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel] + db.prefix reads.fq [mates.fq] + + Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the + algorithm works by seeding alignments with maximal exact matches (MEMs) and + then extending seeds with the affine-gap Smith-Waterman algorithm (SW). + + + If mates.fq file is absent and option -p is not set, this command regards + input reads are single-end. If mates.fq is present, this command assumes the + i-th read in reads.fq and the i-th read in mates.fq constitute a read pair. If + -p is used, the command assumes the 2i-th and the (2i+1)-th read in reads.fq + constitute a read pair (such input file is said to be interleaved). In this + case, mates.fq is ignored. In the paired-end mode, the mem command will infer + the read orientation and the insert size distribution from a batch of reads. + + + The BWA-MEM algorithm performs local alignment. It may produce multiple + primary alignments for different part of a query sequence. This is a crucial + feature for long sequences. However, some tools such as Picard’s + markDuplicates does not work with split alignments. One may consider to use + option -M to flag shorter split hits as secondary. +label: bwa_mem_0.7.17 +arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) + - position: 0 + prefix: '-R' + valueFrom: |- + ${ + if (inputs.sample_id) { + var rg_id = "@RG\\tID:" + inputs.sample_id + "\\tSM:" + inputs.sample_id; + if (inputs.library_id) { + rg_id += "\\tLB:" + inputs.library_id; + } if (inputs.platform) { + rg_id += "\\tPL:" + inputs.platform; + } if (inputs.platform_unit) { + rg_id += "\\tPU:" + inputs.platform_unit; + } if (inputs.center_name) { + rg_id += "\\tCN:" + inputs.center_name; + } + return rg_id + } else { + return inputs.R + } + } +requirements: + - class: ResourceRequirement + ramMin: 34000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/bwa:0.7.17' + - class: InlineJavascriptRequirement +stdout: |- + ${ + if (inputs.output) + return inputs.output; + return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam'); + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': bwa + 'doap:revision': 0.7.17 diff --git a/bwa_mem_0.7.17/container/Dockerfile b/bwa_mem_0.7.17/container/Dockerfile new file mode 100644 index 00000000..49cc6831 --- /dev/null +++ b/bwa_mem_0.7.17/container/Dockerfile @@ -0,0 +1,23 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Ian Johnson (johnsoni@mskcc.org)" \ + version.image="0.1.0" \ + version.bwa="0.7.17" \ + version.ubuntu="16.04" \ + source.bwa="https://github.com/lh3/bwa/releases/tag/v0.7.17" + +ENV BWA_VERSION 0.7.17 + +RUN apt-get -y update \ + # install build tools and dependencies + && apt-get -y install build-essential zlib1g-dev wget unzip \ + # download and unzip bwa + && cd /tmp && wget "https://github.com/lh3/bwa/archive/v${BWA_VERSION}.zip" \ + && unzip "v${BWA_VERSION}.zip" \ + # build + && cd "/tmp/bwa-${BWA_VERSION}" \ + && make \ + # move binaries to /usr/bin + && mv "/tmp/bwa-${BWA_VERSION}/bwa" /usr/bin \ + # clean up + && rm -rf /tmp/* diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml new file mode 100644 index 00000000..74683384 --- /dev/null +++ b/bwa_mem_0.7.17/example_inputs.yaml @@ -0,0 +1,9 @@ +reads: +- class: File + path: "path/to/fastq_R1.fastq" +- class: File + path: "path/to/fastq_R2.fastq" +reference: + class: File + path: "/path/to/reference.fasta" +sample_id: test_sample_id diff --git a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl index 8741d4fd..41701c6c 100644 --- a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl +++ b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl @@ -4,7 +4,6 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' - sbg: 'https://www.sevenbridges.com/' baseCommand: - bwa - mem diff --git a/cci_utils/0.2.8/example_inputs.yaml b/cci_utils/0.2.8/example_inputs.yaml new file mode 100644 index 00000000..5929933f --- /dev/null +++ b/cci_utils/0.2.8/example_inputs.yaml @@ -0,0 +1,3 @@ +dir: {class: Directory, path: /path/to/sample_info_directory } +samples-json: {class: File, path: /path/to/sample_meta.json } +config: {class: File, path: /path/to/config.yaml } diff --git a/cci_utils/0.2.8/general_stats_parse.cwl b/cci_utils/0.2.8/general_stats_parse.cwl new file mode 100644 index 00000000..339f3013 --- /dev/null +++ b/cci_utils/0.2.8/general_stats_parse.cwl @@ -0,0 +1,61 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: general_stats_parse +baseCommand: + - general_stats_parse.py +arguments: + - --dir + - . +inputs: + - id: directory + type: Directory + doc: Directory containing results. + - id: samples-json + type: File + inputBinding: + prefix: '--samples-json' + doc: Sample JSON file. + - id: config + type: File + inputBinding: + prefix: '--config' + doc: MultQC config file. +outputs: + - id: aggregate_parsed_stats + label: aggregate_parsed_stats + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = "aggregate_parsed_stats"; + return self[0] + } +label: general_stats_parse +requirements: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/cci_utils:0.2.8' + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.directory) + writable: true + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py new file mode 100644 index 00000000..30f6dd27 --- /dev/null +++ b/cwl_commandlinetools/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +"""Top-level package for cwl-commandlinetools.""" + +__author__ = """msk-access""" +__email__ = 'msk.access@gmail.com' +__version__ = '1.2.0' diff --git a/cwl_commandlinetools/cwl_commandlinetools.py b/cwl_commandlinetools/cwl_commandlinetools.py new file mode 100644 index 00000000..7fbbae4f --- /dev/null +++ b/cwl_commandlinetools/cwl_commandlinetools.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Main module.""" diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md new file mode 100644 index 00000000..d3f8c542 --- /dev/null +++ b/delly_0.9.1/README.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/delly_0.9.1/container/Dockerfile b/delly_0.9.1/container/Dockerfile new file mode 100644 index 00000000..ce58d263 --- /dev/null +++ b/delly_0.9.1/container/Dockerfile @@ -0,0 +1,65 @@ +# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile +# modify for additional functionality +################## Base Image ########## +FROM --platform=linux/amd64 ubuntu:18.04 +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG DELLY_VERSION="0.9.1" +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Eric Buehlere (buehlere@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.delly=${DELLY_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \ + DELLY version ${DELLY_VERSION}" + + +################## INSTALL ########################## +RUN apt-get update && apt-get install -y \ + autoconf \ + build-essential \ + cmake \ + g++ \ + gfortran \ + git \ + libcurl4-gnutls-dev \ + hdf5-tools \ + libboost-date-time-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + libboost-iostreams-dev \ + libbz2-dev \ + libhdf5-dev \ + libncurses-dev \ + liblzma-dev \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# set environment +ENV BOOST_ROOT /usr + +# Download delly +RUN cd /opt \ + && git clone --recursive https://github.com/dellytools/delly.git \ + && cd /opt/delly/ \ + && git checkout tags/v${DELLY_VERSION} \ + && make STATIC=1 all \ + && make install + +# Add Delly to PATH +ENV PATH="/opt/delly/bin:${PATH}" + +# by default /bin/sh +CMD ["/bin/sh"] diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl new file mode 100644 index 00000000..067768b5 --- /dev/null +++ b/delly_0.9.1/delly_0.9.1.cwl @@ -0,0 +1,131 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - delly +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: out_file + type: string? + inputBinding: + position: 0 + prefix: '-o' + shellQuote: false + doc: The name to be used for the output bcf file + - id: reference_genome + type: File + inputBinding: + position: 0 + prefix: '-g' + shellQuote: false + doc: reference genome fasta file + secondaryFiles: + - ^.fasta.fai + - id: input_bams + type: + - File + - type: array + items: File + inputBinding: + position: 99 + shellQuote: false + doc: >- + an indexed bam tumor file, an indexed bam control file or it can be an + array of indexed bam files + secondaryFiles: + - ^.bai + - id: exclude_regions + type: File? + inputBinding: + position: 0 + prefix: '-x' + shellQuote: false + doc: file with regions to exclude + - id: vcffile + type: File? + inputBinding: + position: 0 + prefix: '-v' + shellQuote: false + doc: input VCF/BCF file for genotyping + - id: svtype + type: string? + inputBinding: + position: 0 + prefix: '-t' + shellQuote: false + doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]' + - id: geno_qual + type: int? + inputBinding: + position: 71 + prefix: '-u' + doc: min. mapping quality for genotyping + - id: dump + type: File? + inputBinding: + position: 0 + prefix: '-d' + doc: gzipped output file for SV-reads (optional) + - id: map_qual + type: int? + inputBinding: + position: 0 + prefix: '-q' + doc: min. paired-end (PE) mapping quality + - id: qual_tra + type: int? + inputBinding: + position: 0 + prefix: '-r' + doc: min. PE quality for translocation + - id: mad_cutoff + type: int? + inputBinding: + position: 0 + prefix: '-s' + doc: 'insert size cutoff, median+s*MAD (deletions only)' +outputs: + - id: bcf_out + type: File + outputBinding: + glob: $(inputs.out_file) +arguments: + - call +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 40000 + coresMin: 4 +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/delly:0.9.1' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': delly + 'doap:revision': 0.9.1 diff --git a/delly_0.9.1/example_input.yml b/delly_0.9.1/example_input.yml new file mode 100644 index 00000000..d84fb73c --- /dev/null +++ b/delly_0.9.1/example_input.yml @@ -0,0 +1,5 @@ +out_file: "name_of_output.bcf" +reference_genome: {class: File, path: path_to_file.fasta} +input_bams: + - {class: File, path: /path/to/file.bam} + - {class: File, path: /path/to/file.bam} diff --git a/delly_1.0.3/README.md b/delly_1.0.3/README.md new file mode 100644 index 00000000..3bd6c5be --- /dev/null +++ b/delly_1.0.3/README.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 1.0.3 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_1.0.3 .cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/delly_1.0.3 .cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr & +``` + +### Usage + +``` +usage: delly_1.0.3 .cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/delly_1.0.3/container/Dockerfile b/delly_1.0.3/container/Dockerfile new file mode 100644 index 00000000..56bd8db0 --- /dev/null +++ b/delly_1.0.3/container/Dockerfile @@ -0,0 +1,65 @@ +# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile +# modify for additional functionality +################## Base Image ########## +FROM --platform=linux/amd64 ubuntu:18.04 +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG DELLY_VERSION="1.0.3" +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Eric Buehler (buehlere@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.delly=${DELLY_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \ + DELLY version ${DELLY_VERSION}" + + +################## INSTALL ########################## +RUN apt-get update && apt-get install -y \ + autoconf \ + build-essential \ + cmake \ + g++ \ + gfortran \ + git \ + libcurl4-gnutls-dev \ + hdf5-tools \ + libboost-date-time-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + libboost-iostreams-dev \ + libbz2-dev \ + libhdf5-dev \ + libncurses-dev \ + liblzma-dev \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# set environment +ENV BOOST_ROOT /usr + +# Install delly +RUN cd /opt \ + && git clone --recursive https://github.com/dellytools/delly.git \ + && cd /opt/delly/ \ + && git checkout tags/v${DELLY_VERSION} \ + && make STATIC=1 all \ + && make install + +# Add Delly to PATH +ENV PATH="/opt/delly/bin:${PATH}" + +# by default /bin/sh +CMD ["/bin/sh"] diff --git a/delly_1.0.3/delly_1.0.3.cwl b/delly_1.0.3/delly_1.0.3.cwl new file mode 100644 index 00000000..f1525ce7 --- /dev/null +++ b/delly_1.0.3/delly_1.0.3.cwl @@ -0,0 +1,131 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - delly +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: out_file + type: string? + inputBinding: + position: 0 + prefix: '-o' + shellQuote: false + doc: The name to be used for the output bcf file + - id: reference_genome + type: File + inputBinding: + position: 0 + prefix: '-g' + shellQuote: false + doc: reference genome fasta file + secondaryFiles: + - ^.fasta.fai + - id: input_bams + type: + - File + - type: array + items: File + inputBinding: + position: 99 + shellQuote: false + doc: >- + an indexed bam tumor file, an indexed bam control file or it can be an + array of indexed bam files + secondaryFiles: + - ^.bai + - id: exclude_regions + type: File? + inputBinding: + position: 0 + prefix: '-x' + shellQuote: false + doc: file with regions to exclude + - id: vcffile + type: File? + inputBinding: + position: 0 + prefix: '-v' + shellQuote: false + doc: input VCF/BCF file for genotyping + - id: svtype + type: string? + inputBinding: + position: 0 + prefix: '-t' + shellQuote: false + doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]' + - id: geno_qual + type: int? + inputBinding: + position: 71 + prefix: '-u' + doc: min. mapping quality for genotyping + - id: dump + type: File? + inputBinding: + position: 0 + prefix: '-d' + doc: gzipped output file for SV-reads (optional) + - id: map_qual + type: int? + inputBinding: + position: 0 + prefix: '-q' + doc: min. paired-end (PE) mapping quality + - id: qual_tra + type: int? + inputBinding: + position: 0 + prefix: '-r' + doc: min. PE quality for translocation + - id: mad_cutoff + type: int? + inputBinding: + position: 0 + prefix: '-s' + doc: 'insert size cutoff, median+s*MAD (deletions only)' +outputs: + - id: bcf_out + type: File + outputBinding: + glob: $(inputs.out_file) +arguments: + - call +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 40000 + coresMin: 4 +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/delly:1.0.3' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': delly + 'doap:revision': 1.0.3 diff --git a/delly_1.0.3/example_input.yml b/delly_1.0.3/example_input.yml new file mode 100644 index 00000000..d84fb73c --- /dev/null +++ b/delly_1.0.3/example_input.yml @@ -0,0 +1,5 @@ +out_file: "name_of_output.bcf" +reference_genome: {class: File, path: path_to_file.fasta} +input_bams: + - {class: File, path: /path/to/file.bam} + - {class: File, path: /path/to/file.bam} diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md deleted file mode 100644 index b1cd50f0..00000000 --- a/disambiguate_1.0.0/README.md +++ /dev/null @@ -1,52 +0,0 @@ - # CWL and Dockerfile for running Disambiguate - -## Version of tools in docker image (/container/Dockerfile) - -Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. - -| Tool | Version | Location | Notes | -|--- |--- |--- | - | -| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub| -| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - | -| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - | - -[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com") - - -## CWL - -- CWL specification 1.0 -- Use `example_inputs.yaml` to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml -``` - -## Command -``` -USAGE: - - cwltool disambiguate_1.0.0.cwl \ - --prefix \ - --output_dir \ - [--aligner ] \ - - -Where: - - --prefix - (required) Sample ID or name used as prefix. Do not include .bam - - --output_dir - (required) Output directory - - --aligner - Aligner option {bwa(default),tophat,hisat2,star} - - - (required) Species A BAM file - - - (required) Species B BAM file -``` diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..7b085810 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +--- +description: >- + Central location for storing common workflow language based command line tools + for building workflows +--- + +# Command-line tools used by CCI + +* Free software: Apache Software License 2.0 +* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/) + +## Features + +Create command line tools in common workflow language to generate workflows. + +## Installation + +Clone the repository: + +```text +git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git +``` + +**Follow the README in repsective tool folder for execution of the tool.** + +## Credits + +This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template. + +* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter) +* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) + diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 00000000..a6cc28d1 --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,120 @@ +# Table of contents + +* [MSK-ACCESS command-line tools](README.md) + +* [ABRA2](abra2/README.md) + * [v2.17](abra2/abra2_2.17.md) + * [v2.22](abra2/abra2_2.22.md) + +* access_utils + * [0.1.1](../access_utils/0.1.1/README.md) + +* [bcftools](bcftools/README.md) + + * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md) + * [bcftools concat v1.15.1](bcftools/bcftools_concat_1.15.1.md) + * [bcftools norm v1.15.1](bcftools/bcftools_norm_v1.15.1.md) + * [bcftools sort v1.15.1](bcftools/bcftools_sort_v1.15.1.md) + * [bcftools tabix v1.15.1](bcftools/bcftools_tabix_v1.15.1.md) + +* [Bedtools](bedtools/README.md) + * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md) + * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md) + * [sortbed v2.28.0\_cv2](bedtools/bedtools_sortbed_v2.28.0_cv2.md) + +* Biometrics + * [extract](../biometrics_extract/README.md) + * [minor](../biometrics_minor/README.md) + * [major](../biometrics_major/README.md) + * [genotype](../biometrics_genotype/README.md) + * [sexmismatch](../biometrics_sexmismatch/README.md) + +* [Delly](delly/README.md) + * [delly call 0.9.1](delly/delly_call_0.9.1.md) + * [delly call 1.0.3](delly/delly_call_1.0.3.md) + +* [Disambiguate](disambiguate/README.md) + * [v1.0.0](disambiguate/disambiguate_1.0.0.md) + +* [Fgbio](fgbio/README.md) + * [CallDuplexConsensusReads v1.2.0](fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md) + * [CollectDuplexSeqMetrics v1.2.0](fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md) + * [GroupReadsByUmi v1.2.0](fgbio/fgbio_group_reads_by_umi_1.2.0.md) + * [FastqToBam v1.2.0](fgbio/fgbio_fastq_to_bam_1.2.0.md) + * [FilterConsensusReads v1.2.0](fgbio/fgbio_filter_consensus_reads_1.2.0.md) + * [simplex\_filter v0.1.8](fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md) + +* [GATK](gatk/README.md) + * [ApplyBQSR v4.1.2.0](gatk/gatk_applybqsr_4.1.2.0.md) + * [ApplyBQSR v4.1.8.1](gatk/gatk_apply_bqsr_4.1.8.1.md) + * [BaseRecalibrator v4.1.2.0](gatk/gatk_baserecalibrator_4.1.2.0.md) + * [BaseRecalibrator v4.1.8.1](gatk/gatk_base_recalibrator_4.1.8.1.md) + * [MergeBamAlignment v4.1.8.0](gatk/gatk_merge_bam_alignment_4.1.8.0.md) + * [MergeSamFiles v4.1.8.0](gatk/gatk_merge_sam_files_4.1.8.0.md) + * [SamToFastq v4.1.8.0](gatk/samtofastq-v4.1.8.0.md) + +* GetBaseCountsMultiSample + * [1.2.5](../getbasecountsmultisample/1.2.5/README.md) + +* [Manta](manta/README.md) + * [Manta v1.5.1](manta/manta_1.5.1.md) + +* [Marianas](marianas/README.md) + * [Collapsing First Pass v1.8.1](marianas/marianas_collapsing_first_pass_1.8.1.md) + * [Collapsing Second Pass v1.8.1](marianas/marianas_collapsing_second_pass_1.8.1.md) + * [Process Loop UMI v1.8.1](marianas/marianas_process_loop_umi_1.8.1.md) + * [Seprate BAMs v1.8.1](marianas/marianas_separate_bams_1.8.1.md) + +* [MultiQC](multiqc/README.md) + * [MultiQC v1.10.1.7](multiqc/multiqc_1.10.1.7.md) + * [MultiQC v1.12](multiqc/multiqc_1.12.md) + +* [MuTect](mutect/README.md) + * [MuTect 1.1.5](mutect/mutect_1.1.5.md) + +* [Merge Fastq](merge-fastq/README.md) + * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md) + +* [Mosdepth](mosdepth/README.md) + * [0.3.3](mosdepth/mosdepth_0.3.3.md) + +* [Octopus](octopus/README.md) + * [v0.7.4](octopus/octopus_0.7.4.md) + +* [Picard Tools](picard-tools/README.md) + * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md) + * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md) + * [AddOrReplaceReadGroups v4.1.8.1](picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md) + * [CollectAlignmentSummaryMetrics v2.8.1](picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md) + * [CollectAlignmentSummaryMetrics v2.21.2](picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md) + * [CollectMultipleMetrics v2.8.1](picard-tools/picard_collectmultiplemetric_2.8.1.md) + * [CollectMultipleMetrics v2.21.2](picard-tools/picard_collectmultiplemetric_2.21.2.md) + * [FixMateInformation v1.96](picard-tools/picard_fix_mate_information_1.96.md) + * [FixMateInformation v2.21.2](picard-tools/picard_fix_mate_information_2.21.2.md) + * [FixMateInformation v4.1.8.1](picard-tools/picard_fix_mate_information_4.1.8.1.md) + * [HSmetrics v2.8.1](picard-tools/picard_hsmetrics_2.8.1.md) + * [HSmetrics v2.21.2](picard-tools/picard_hsmetrics_2.21.2.md) + * [MarkDuplicates v1.96](picard-tools/picard_mark_duplicates_1.96.md) + * [MarkDuplicates v2.8.1](picard-tools/picard_mark_duplicates_2.8.1.md) + * [MarkDuplicates v2.21.2](picard-tools/picard_mark_duplicates_2.21.2.md) + * [MarkDuplicates v4.1.8.1](picard-tools/picard_mark_duplicates_4.1.8.1.md) +* [Postprocessing variant calls](postprocessing_variant_calls/README.md) + * [vardict_filter_case-control 0.1.3](postprocessing_variant_calls/vardict_filter_case-control_0.1.3.md) + * [vardict_filter_single-sample 0.1.3](postprocessing_variant_calls/vardict_filter_single-sample_0.1.3.md) +* [Trim Galore](trim-galore/README.md) + * [v0.6.2](trim-galore/trim_galore_0.6.2.md) + +* [Ubuntu utilites](ubuntu-utilites/README.md) + * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md) + +* [VarDictJava](vardictjava/README.md) + * [v1.8.2](vardictjava/vardictjava_1.8.2.md) + +* [VCF2MAF](vcf2maf/README.md) + * [1.6.21](vcf2maf/vcf2maf_1.6.21.md) + +* [Waltz](waltz/README.md) + * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md) + * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md) + + diff --git a/docs/abra2/README.md b/docs/abra2/README.md new file mode 100644 index 00000000..5cd595c7 --- /dev/null +++ b/docs/abra2/README.md @@ -0,0 +1,2 @@ +# ABRA2 + diff --git a/abra2_2.17/README.md b/docs/abra2/abra2_2.17.md similarity index 73% rename from abra2_2.17/README.md rename to docs/abra2/abra2_2.17.md index 89818e32..e2f9457b 100644 --- a/abra2_2.17/README.md +++ b/docs/abra2/abra2_2.17.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running ABRA2 +# v2.17 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| openjdk | 8 | - | -| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) | -[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.1.0.svg)](https://microbadger.com/images/mskcc/abra2:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner abra2_2.17.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command:** ```bash #Using CWLTOOL @@ -30,9 +30,9 @@ > toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr & ``` -### Usage +### Usage -``` +```text usage: abra2_2.17.cwl [-h] positional arguments: @@ -85,4 +85,5 @@ optional arguments: VCF containing known (or suspected) variant sites. Very large files should be avoided. --no_sort Do not attempt to sort final output - ``` \ No newline at end of file +``` + diff --git a/docs/abra2/abra2_2.22.md b/docs/abra2/abra2_2.22.md new file mode 100644 index 00000000..c23aaada --- /dev/null +++ b/docs/abra2/abra2_2.22.md @@ -0,0 +1,21 @@ +# v2.22 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk | 8 | - | +| ABRA2 | 2.22 | [https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar](https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar) | + +[![](https://images.microbadger.com/badges/version/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/image/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) [![](https://images.microbadger.com/badges/license/mskcc/abra2:0.2.0.svg)](https://microbadger.com/images/mskcc/abra2:0.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml +``` + diff --git a/docs/athena/README.md b/docs/athena/README.md new file mode 100644 index 00000000..5f4802c9 --- /dev/null +++ b/docs/athena/README.md @@ -0,0 +1,9 @@ +# athena + +Athena is a tool to generate coverage statistics for NGS data, and combine these into an interactive HTML report. This gives both summary level and in depth information as to the coverage of the data, including various tables and plots to visualise the data. Athena can also optionally include plots visualising per-chromosome level coverage. + +The general workflow for generating the statistics and report is as follows: + ++ Annotate each region of the bed file with the gene, exon and per base coverage data using annotate_bed.cwl ++ Generate per exon and per gene statistics using coverage_stats_single.cwl ++ Generate HTML coverage report with coverage_report_single.cwl diff --git a/docs/athena/annotate_bed.md b/docs/athena/annotate_bed.md new file mode 100644 index 00000000..f1848719 --- /dev/null +++ b/docs/athena/annotate_bed.md @@ -0,0 +1,75 @@ +# annotate_bed_1.4.2 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| python:8 base image | 8 | - | +| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip | + +## Explanation + +The annotate_bed.cwl annotates the given bed file with transcript and coverage information required for the next step coverage_stats_single.cwl. Specifically, this is done using BEDtools intersect, with a file containing transcript to gene and exon information, and then the per base coverage data using the mosdepth output (*per_based.bed.gz). Currently, 100% overlap is required between coordinates in the panel bed file and the transcript annotation file, therefore you must ensure any added flank regions etc. are the same. + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner annotate_bed.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/annotate_bed.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir annotate_bed_toil_log +> toil-cwl-runner --singularity --logFile /path/to/annotate_bed_toil_log/cwltoil.log --jobStore /path/to/annotate_bed_jobStore --batchSystem lsf --workDir /path/to/annotate_bed_toil_log --outdir . --writeLogs /path/to/annotate_bed_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/annotate_bed.cwl /path/to/inputs.yaml > annotate_bed_toil.stdout 2> annotate_bed_toil.stderr & +``` + +## Usage + +``` +toil-cwl-runner annotate_bed.cwl --help + +usage: annotate_bed.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --panel_bed + PANEL_BED --transcript_file TRANSCRIPT_FILE + --coverage_file COVERAGE_FILE + [--chunk_size CHUNK_SIZE] [--output_name OUTPUT_NAME] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + worker thread number + --panel_bed PANEL_BED + Input panel bed file; must have ONLY the following 4 + columns chromosome, start position, end position, + gene/transcript + --transcript_file TRANSCRIPT_FILE + Transcript annotation file, contains required gene and + exon information. Must have ONLY the following 6 + columns: chromosome, start, end, gene, transcript, + exon + --coverage_file COVERAGE_FILE + Per base coverage file (output from mosdepth or + similar) + --chunk_size CHUNK_SIZE + --output_name OUTPUT_NAME + (optional) Prefix for naming output file, if not given + will use name from per base coverage file +``` diff --git a/docs/athena/coverage_report_single.md b/docs/athena/coverage_report_single.md new file mode 100644 index 00000000..c33c461d --- /dev/null +++ b/docs/athena/coverage_report_single.md @@ -0,0 +1,95 @@ +# coverage_report_single_1.4.2 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| python:8 base image | 8 | - | +| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip | + +## Explanation +The coverage_report_single.cwl generates a full HTML report on a per sample-level. The user can define the threshold to display. +The output html report contains the following: ++ Summary including per gene coverage chart ++ Table of exons with sub-optimal coverage ++ Interactive plots of exons with sub-optimal coverage ++ A summary table of average coverage across all genes ++ Full gene coverage plots ++ Table of per exon coverage across all genes ++ Coverage of known variants (if specified) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner coverage_report_single.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/coverage_report_single.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir coverage_report_single_toil_log +> toil-cwl-runner --singularity --logFile /path/to/coverage_report_single_toil_log/cwltoil.log --jobStore /path/to/coverage_report_single_jobStore --batchSystem lsf --workDir /path/to/coverage_report_single_toil_log --outdir . --writeLogs /path/to/coverage_report_single_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/coverage_report_single.cwl /path/to/inputs.yaml > coverage_report_single_toil.stdout 2> coverage_report_single_toil.stderr & +``` + +## Usage + +``` +usage: coverage_report_single.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --exon_stats EXON_STATS --gene_stats + GENE_STATS --raw_coverage RAW_COVERAGE + [--per_base_coverage PER_BASE_COVERAGE] + [--threshold THRESHOLD] + [--sample_name SAMPLE_NAME] + [--output OUTPUT] [--panel PANEL] + [--limit LIMIT] [--summary] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + worker thread number + --exon_stats EXON_STATS + per exon statistics file (from + `coverage_stats_single.py`) + --gene_stats GENE_STATS + per gene statistics file (from + `coverage_stats_single.py`) + --raw_coverage RAW_COVERAGE + annotated bed file with coverage data (generated from + annotate_bed.sh / bedtools intersect) + --per_base_coverage PER_BASE_COVERAGE + Per-base coverage bed file from mosdepth. (Optional; + if not submitted, plots displaying global coverage per + chromosome will not be displayed) + --threshold THRESHOLD + threshold value defining sub-optimal coverage + (optional; default if not given: 20) + --sample_name SAMPLE_NAME + --output OUTPUT name for output report (optional; sample name will be + used if not given) + --panel PANEL panel bed file used for initial annotation, name will + be displayed in summary of report (optional) + --limit LIMIT number of genes at which to limit including full gene + plots, large numbers of genes may take a long time to + generate the plots (optional) + --summary boolean flag to add clinical report summary text in + summary section, includes list of all genes with + transcripts (optional; default False) +``` diff --git a/docs/athena/coverage_stats_single.md b/docs/athena/coverage_stats_single.md new file mode 100644 index 00000000..0f5090c1 --- /dev/null +++ b/docs/athena/coverage_stats_single.md @@ -0,0 +1,71 @@ +# coverage_stats_single_1.4.2 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| python:8 base image | 8 | - | +| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip | + +## Explanation +The coverage_stats_single.cwl uses the annotated coverage bed file generated from the previous step, annotate_bed.cwl. The output of coverage_stats_single.cwl are tsv files of per per exon and per gene coverage statistics. This gives a minimum, mean and maxmimum coverage for each region, along with coverage at defined thresholds. The output tsv files are used as input files in the next step, coverage_report_single.cwl. + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner coverage_stats_single.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/coverage_stats_single.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir coverage_stats_single_toil_log +> toil-cwl-runner --singularity --logFile /path/to/coverage_stats_single_toil_log/cwltoil.log --jobStore /path/to/coverage_stats_single_jobStore --batchSystem lsf --workDir /path/to/coverage_stats_single_toil_log --outdir . --writeLogs /path/to/coverage_stats_single_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/coverage_stats_single.cwl /path/to/inputs.yaml > coverage_stats_single_toil.stdout 2> coverage_stats_single_toil.stderr & +``` + +## Usage + +``` +usage: coverage_stats_single.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --file FILE [--build BUILD] + [--outfile OUTFILE] [--thresholds THRESHOLDS] + [--output_name OUTPUT_NAME] + [--flagstat FLAGSTAT] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + worker thread number + --file FILE annotated bed file on which to generate report from + --build BUILD text file with build number used for alignment, output + from mosdepth (optional) chromosome, start, end, gene, + transcript, exon + --outfile OUTFILE output file name prefix, if not given the input file + name will be used as the name prefix + --thresholds THRESHOLDS + threshold values to calculate coverage for as comma + seperated integers (default: 10, 20, 30, 50, 100) + --output_name OUTPUT_NAME + (optional) Prefix for naming output file, if not given + will use name from per base coverage file + --flagstat FLAGSTAT file for sample, required for generating run + statistics (in development) +``` diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md new file mode 100644 index 00000000..a08fa464 --- /dev/null +++ b/docs/bcftools/README.md @@ -0,0 +1 @@ +# BCFTOOLS diff --git a/docs/bcftools/bcftools_bgzip_v1.15.1.md b/docs/bcftools/bcftools_bgzip_v1.15.1.md new file mode 100644 index 00000000..afae2236 --- /dev/null +++ b/docs/bcftools/bcftools_bgzip_v1.15.1.md @@ -0,0 +1,42 @@ +## CWL and Docker for Running bgzip using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| -------- | ------- | -------------------------------------------------------- | +| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_bgzip_1.15.1.cwl example_input_bgzip.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_bgzip_1.15.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bcftools/bcftools_concat_v1.15.1.md b/docs/bcftools/bcftools_concat_v1.15.1.md new file mode 100644 index 00000000..47033dfd --- /dev/null +++ b/docs/bcftools/bcftools_concat_v1.15.1.md @@ -0,0 +1,53 @@ +# CWL and Dockerfile for running bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bcftools_concat_1.15.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_concat_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_concat_1.15.1.cwl [-h] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --allow_overlaps First coordinate of the next file can precede last + record of the current file. + --output_name OUTPUT_NAME + Output file name + --output_type OUTPUT_TYPE + compressed BCF (b), uncompressed BCF (u), compressed + VCF (z), uncompressed VCF (v) + --input INPUT + +``` diff --git a/docs/bcftools/bcftools_norm_v1.15.1.md b/docs/bcftools/bcftools_norm_v1.15.1.md new file mode 100644 index 00000000..0268425a --- /dev/null +++ b/docs/bcftools/bcftools_norm_v1.15.1.md @@ -0,0 +1,52 @@ +# CWL and Dockerfile for running bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| -------- | ------- | -------------------------------------------------------- | +| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bcftools_norm_1.15.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_norm_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_norm_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_norm_1.15.1.cwl [-h] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --check_ref CHECK_REF + --multiallelics MULTIALLELICS + use any + --output_type OUTPUT_TYPE + --output_name OUTPUT_NAME + --input INPUT input vcf file + --fastaRef FASTAREF + +``` diff --git a/docs/bcftools/bcftools_sort_v1.15.1.md b/docs/bcftools/bcftools_sort_v1.15.1.md new file mode 100644 index 00000000..856d292d --- /dev/null +++ b/docs/bcftools/bcftools_sort_v1.15.1.md @@ -0,0 +1,50 @@ +# CWL and Dockerfile for running bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| -------- | ------- | -------------------------------------------------------- | +| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bcftools_sort_1.15.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_sort_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_sort_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner bcftools_sort_1.15.1.cwl [-h] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --output_name OUTPUT_NAME + Output file name + --output_type OUTPUT_TYPE + compressed BCF (b), uncompressed BCF (u), compressed + VCF (z), uncompressed VCF (v) + --input INPUT input vcf files +``` diff --git a/docs/bcftools/bcftools_tabix_v1.15.1.md b/docs/bcftools/bcftools_tabix_v1.15.1.md new file mode 100644 index 00000000..9271454d --- /dev/null +++ b/docs/bcftools/bcftools_tabix_v1.15.1.md @@ -0,0 +1,42 @@ +## CWL and Docker for Running tabs using bcftools v1.15.1 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| -------- | ------- | -------------------------------------------------------- | +| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io/): + +``` +toil-cwl-runner bcftools_tabix_1.15.1.cwl example_input_tabix.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```shell +#Using CWLTOOL +cwltool --singularity --non-strict /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +mkdir bcftools_toil_log +toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr & +``` + +### Usage + +```shell +usage: bcftools_tabix_1.15.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/bedtools/README.md b/docs/bedtools/README.md new file mode 100644 index 00000000..13a0e51c --- /dev/null +++ b/docs/bedtools/README.md @@ -0,0 +1,2 @@ +# Bedtools + diff --git a/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md new file mode 100644 index 00000000..da41c5b2 --- /dev/null +++ b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# genomecov v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format + diff --git a/docs/bedtools/bedtools_merge_v2.28.0_cv2.md b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md new file mode 100644 index 00000000..507a8994 --- /dev/null +++ b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# merge v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help + +usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged. + diff --git a/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md b/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md new file mode 100644 index 00000000..7b76d73b --- /dev/null +++ b/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md @@ -0,0 +1,43 @@ +# SortVCF v2.28.0\_cv2 + +## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools) + +| Tool | Version | Location | +| :------- | :----------- | :----------------------------------------------------------- | +| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) | + +[![](https://img.shields.io/badge/version-2.28.0_cv2-blue)](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +toil-cwl-runner bedtools_sortbed_vcf.cwl example_input.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +cwltool --singularity --non-strict bedtools_sortbed_vcf.cwl inputs.yaml + +#Using toil-cwl-runner +mkdir run_directory +toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_sortbed_vcf.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +```shell +Usage: bedtools_sortbed_vcf.cwl [-h] --input INPUT [job_order] +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT input VCF file +``` + diff --git a/docs/cci_utils/README.md b/docs/cci_utils/README.md new file mode 100644 index 00000000..bafc11f6 --- /dev/null +++ b/docs/cci_utils/README.md @@ -0,0 +1,2 @@ +# CCI_UTILS + diff --git a/docs/cci_utils/general_stats_parse_0.2.7.md b/docs/cci_utils/general_stats_parse_0.2.7.md new file mode 100644 index 00000000..435424b3 --- /dev/null +++ b/docs/cci_utils/general_stats_parse_0.2.7.md @@ -0,0 +1,46 @@ +# CWL and Dockerfile for running general_stats_parse in cci_utils + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| cci_utils | 0.2.8 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner general_stats_parse.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/general_stats_parse.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner general_stats_parse.cwl -h +usage: general_stats_parse.cwl [-h] --directory DIRECTORY --samples-json + SAMPLES_JSON [--config CONFIG] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --directory DIRECTORY + Directory containing results. + --samples-json SAMPLES_JSON + Sample JSON file. + --config CONFIG MultQC config file. +``` diff --git a/docs/delly/README.md b/docs/delly/README.md new file mode 100644 index 00000000..3cf4e541 --- /dev/null +++ b/docs/delly/README.md @@ -0,0 +1,2 @@ +# Delly + diff --git a/docs/delly/delly_call_0.9.1.md b/docs/delly/delly_call_0.9.1.md new file mode 100644 index 00000000..08dcf390 --- /dev/null +++ b/docs/delly/delly_call_0.9.1.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 0.9.1 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr & +``` + +### Usage + +``` +usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/docs/delly/delly_call_1.0.3.md b/docs/delly/delly_call_1.0.3.md new file mode 100644 index 00000000..3bd6c5be --- /dev/null +++ b/docs/delly/delly_call_1.0.3.md @@ -0,0 +1,81 @@ +# CWL and Dockerfile for running Delly + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| ubuntu | 18.04 | - | +| DELLY | 1.0.3 | https://github.com/dellytools/delly | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner delly_1.0.3 .cwl example_inputs.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/delly_1.0.3 .cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr & +``` + +### Usage + +``` +usage: delly_1.0.3 .cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--out_file OUT_FILE] --reference_genome + REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS] + [--vcffile VCFFILE] [--svtype SVTYPE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --out_file OUT_FILE The name to be used for the output bcf file + --reference_genome REFERENCE_GENOME + reference genome fasta file + --exclude_regions EXCLUDE_REGIONS + file with regions to exclude + --vcffile VCFFILE input VCF/BCF file for genotyping + --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL] +``` + +## Disclaimer +Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license: + +Copyright (c) 2012- European Molecular Biology Laboratory (EMBL) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/docs/disambiguate/README.md b/docs/disambiguate/README.md new file mode 100644 index 00000000..4850c2fd --- /dev/null +++ b/docs/disambiguate/README.md @@ -0,0 +1,2 @@ +# Disambiguate + diff --git a/docs/disambiguate/disambiguate_1.0.0.md b/docs/disambiguate/disambiguate_1.0.0.md new file mode 100644 index 00000000..8e065892 --- /dev/null +++ b/docs/disambiguate/disambiguate_1.0.0.md @@ -0,0 +1,53 @@ +# v1.0.0 + +## Version of tools in docker image \(/container/Dockerfile\) + +Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`. + +| Tool | Version | Location | Notes | +| :--- | :--- | :--- | :--- | +| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub | +| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - | +| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - | + +[![](https://images.microbadger.com/badges/version/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [![](https://images.microbadger.com/badges/image/mskcc/disambiguate:1.0.0.svg)](https://microbadger.com/images/mskcc/disambiguate:1.0.0) + +## CWL + +* CWL specification 1.0 +* Use `example_inputs.yaml` to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml +``` + +## Command + +```text +USAGE: + + cwltool disambiguate_1.0.0.cwl \ + --prefix \ + --output_dir \ + [--aligner ] \ + + +Where: + + --prefix + (required) Sample ID or name used as prefix. Do not include .bam + + --output_dir + (required) Output directory + + --aligner + Aligner option {bwa(default),tophat,hisat2,star} + + + (required) Species A BAM file + + + (required) Species B BAM file +``` + diff --git a/docs/fgbio/README.md b/docs/fgbio/README.md new file mode 100644 index 00000000..f1d1ca8d --- /dev/null +++ b/docs/fgbio/README.md @@ -0,0 +1,2 @@ +# Fgbio + diff --git a/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md new file mode 100644 index 00000000..c8d3d73e --- /dev/null +++ b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md @@ -0,0 +1,79 @@ +# CallDuplexConsensusReads v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read_name_prefix READ_NAME_PREFIX] + [--read_group_id READ_GROUP_ID] + [--error_rate_pre_umi ERROR_RATE_PRE_UMI] + [--error_rate_post_umi ERROR_RATE_POST_UMI] + [--min_input_base_quality MIN_INPUT_BASE_QUALITY] + [--trim] + [--sort_order SORT_ORDER] + [--min_reads MIN_READS] + [--max_reads_per_strand MAX_READS_PER_STRAND] + [--threads THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --read_name_prefix READ_NAME_PREFIX + The prefix all consensus read names + --read_group_id READ_GROUP_ID + The new read group ID for all the consensus reads. + --error_rate_pre_umi ERROR_RATE_PRE_UMI + The Phred-scaled error rate for an error prior to the + UMIs being integrated. + --error_rate_post_umi ERROR_RATE_POST_UMI + The Phred-scaled error rate for an error post the UMIs + have been integrated. + --min_input_base_quality MIN_INPUT_BASE_QUALITY + Ignore bases in raw reads that have Q below this + value. + --trim If true, quality trim input reads in addition to + masking low Q bases + --sort_order SORT_ORDER + The sort order of the output, if :none: then the same + as the input. + --min_reads MIN_READS + The minimum number of input reads to a consensus read. + --max_reads_per_strand MAX_READS_PER_STRAND + The maximum number of reads to use when building a + single-strand consensus. If more than this many reads + are present in a tag family, the family is randomly + downsampled to exactly max-reads reads. +``` + diff --git a/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md new file mode 100644 index 00000000..78812a55 --- /dev/null +++ b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md @@ -0,0 +1,62 @@ +# CollectDuplexSeqMetrics v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_collect_duplex_seq_metrics_1.2.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_prefix + OUTPUT_PREFIX [--intervals INTERVALS] [--description DESCRIPTION] + [--duplex_umi_counts DUPLEX_UMI_COUNTS] [--min_ab_reads MIN_AB_READS] + [--min_ba_reads MIN_BA_READS] [--umi_tag UMI_TAG] [--mi_tag MI_TAG] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input BAM file generated by GroupReadByUmi. + --output_prefix OUTPUT_PREFIX + Prefix of output files to write. + --intervals INTERVALS + Optional set of intervals over which to restrict + analysis. [Optional]. + --description DESCRIPTION + Description of data set used to label plots. Defaults + to sample/library. [Optional]. + --duplex_umi_counts DUPLEX_UMI_COUNTS + If true, produce the .duplex_umi_counts.txt file with + counts of duplex UMI observations. [Optional]. + --min_ab_reads MIN_AB_READS + Minimum AB reads to call a tag family a 'duplex'. + [Optional]. + --min_ba_reads MIN_BA_READS + Minimum BA reads to call a tag family a 'duplex'. + [Optional]. + --umi_tag UMI_TAG The tag containing the raw UMI. [Optional]. + --mi_tag MI_TAG The output tag for UMI grouping. [Optional]. +``` + diff --git a/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md new file mode 100644 index 00000000..3d4ede7f --- /dev/null +++ b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md @@ -0,0 +1,82 @@ +# FastqToBam v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--read-structures READ_STRUCTURES] + [--sort] [--umi-tag UMI_TAG] + [--read-group-id READ_GROUP_ID] + [--sample SAMPLE] [--library LIBRARY] + [--platform PLATFORM] + [--platform-unit PLATFORM_UNIT] + [--platform-model PLATFORM_MODEL] + [--sequencing-center SEQUENCING_CENTER] + [--predicted-insert-size PREDICTED_INSERT_SIZE] + [--description DESCRIPTION] + [--comment COMMENT] [--run-date RUN_DATE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Fastq files corresponding to each sequencing read + (e.g. R1, I1, etc.). + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --read-structures READ_STRUCTURES + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read- + Structures + --sort If true, queryname sort the BAM file, otherwise + preserve input order. + --umi-tag UMI_TAG Tag in which to store molecular barcodes/UMIs + --read-group-id READ_GROUP_ID + Read group ID to use in the file header. + --sample SAMPLE The name of the sequenced sample. + --library LIBRARY The name/ID of the sequenced library. + --platform PLATFORM Sequencing Platform + --platform-unit PLATFORM_UNIT + Platform unit (e.g. ‘..') + --platform-model PLATFORM_MODEL + Platform model to insert into the group header (ex. + miseq, hiseq2500, hiseqX) + --sequencing-center SEQUENCING_CENTER + The sequencing center from which the data originated + --predicted-insert-size PREDICTED_INSERT_SIZE + Predicted median insert size, to insert into the read + group header + --description DESCRIPTION + Description of the read group. + --comment COMMENT Comment(s) to include in the output file’s header + --run-date RUN_DATE Date the run was produced, to insert into the read + group header +``` + diff --git a/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md new file mode 100644 index 00000000..470166a1 --- /dev/null +++ b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md @@ -0,0 +1,80 @@ +# FilterConsensusReads v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + --reference_fasta + REFERENCE_FASTA + [--reverse_per_base_tags] + [--min_reads MIN_READS] + [--max_read_error_rate MAX_READ_ERROR_RATE] + [--max_base_error_rate MAX_BASE_ERROR_RATE] + [--min_base_quality MIN_BASE_QUALITY] + [--max_no_call_fraction MAX_NO_CALL_FRACTION] + [--min_mean_base_quality MIN_MEAN_BASE_QUALITY] + [--require_single_strand_agreement] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input SAM or BAM file. + --output_file_name OUTPUT_FILE_NAME + Output SAM or BAM file to write consensus reads. + --reference_fasta REFERENCE_FASTA + Reference fasta file. + --reverse_per_base_tags + Reverse [complement] per base tags on reverse strand + reads. + --min_reads MIN_READS + The minimum number of reads supporting a consensus + base/read. (Max 3 values) + --max_read_error_rate MAX_READ_ERROR_RATE + The maximum raw-read error rate across the entire + consensus read. (Max 3 values) + --max_base_error_rate MAX_BASE_ERROR_RATE + The maximum error rate for a single consensus base. + (Max 3 values) + --min_base_quality MIN_BASE_QUALITY + Mask (make N) consensus bases with quality less than + this threshold. + --max_no_call_fraction MAX_NO_CALL_FRACTION + Maximum fraction of no-calls in the read after + filtering + --min_mean_base_quality MIN_MEAN_BASE_QUALITY + The minimum mean base quality across the consensus + read + --require_single_strand_agreement + Mask (make N) consensus bases where the AB and BA + consensus reads disagree (for duplex-sequencing only). +``` + diff --git a/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md new file mode 100644 index 00000000..87ad2aaa --- /dev/null +++ b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md @@ -0,0 +1,68 @@ +# GroupReadsByUmi v1.2.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--family_size_histogram FAMILY_SIZE_HISTOGRAM] + [--raw_tag RAW_TAG] + [--assign_tag ASSIGN_TAG] + [--min_map_q MIN_MAP_Q] + [--include_non_pf_reads] + --strategy STRATEGY + [--edits EDITS] + [--min_umi_length MIN_UMI_LENGTH] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input BAM file. + --output_file_name OUTPUT_FILE_NAME + The output SAM or BAM file to be written. + --family_size_histogram FAMILY_SIZE_HISTOGRAM + Optional output of tag family size counts. + --raw_tag RAW_TAG The tag containing the raw UMI. + --assign_tag ASSIGN_TAG + The output tag for UMI grouping. + --min_map_q MIN_MAP_Q + Minimum mapping quality. + --include_non_pf_reads + --strategy STRATEGY The UMI assignment strategy. + (identity,edit,adjacency,paired) + --edits EDITS The allowable number of edits between UMIs. + --min_umi_length MIN_UMI_LENGTH + The minimum UMI length. If not specified then all UMIs + must have the same length, otherwise discard reads + with UMIs shorter than this length and allow for + differing UMI lengths. +``` + diff --git a/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md new file mode 100644 index 00000000..b12b00a3 --- /dev/null +++ b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md @@ -0,0 +1,44 @@ +# simplex\_filter v0.1.8 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| fgbio\_postprocessing | 0.1.8 | [https://github.com/msk-access/fgbio\_postprocessing](https://github.com/msk-access/fgbio_postprocessing) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml +``` + +## Usage + +```bash +usage: fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM + [--output_file_name OUTPUT_FILE_NAME] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input_bam INPUT_BAM + Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). +``` + diff --git a/docs/gatk/README.md b/docs/gatk/README.md new file mode 100644 index 00000000..ebad94ae --- /dev/null +++ b/docs/gatk/README.md @@ -0,0 +1,2 @@ +# GATK + diff --git a/docs/gatk/gatk_apply_bqsr_4.1.8.1.md b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md new file mode 100644 index 00000000..c919c1f4 --- /dev/null +++ b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_apply\_bqsr\_4.1.8.1.cwl --help + +usage: gatk\_apply\_bqsr\_4.1.8.1.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_applybqsr_4.1.2.0.md b/docs/gatk/gatk_applybqsr_4.1.2.0.md new file mode 100644 index 00000000..709855a1 --- /dev/null +++ b/docs/gatk/gatk_applybqsr_4.1.2.0.md @@ -0,0 +1,43 @@ +# ApplyBQSR v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help + +usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS + diff --git a/docs/gatk/gatk_base_recalibrator_4.1.8.1.md b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md new file mode 100644 index 00000000..9b90a39f --- /dev/null +++ b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_base\_recalibrator\_4.1.8.1.cwl --help + +usage: gatk\_base\_recalibrator\_4.1.8.1.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/docs/gatk/gatk_baserecalibrator_4.1.2.0.md b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md new file mode 100644 index 00000000..41f341b4 --- /dev/null +++ b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md @@ -0,0 +1,43 @@ +# BaseRecalibrator v4.1.2.0 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) | + +[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help + +usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\] + +positional arguments: job\_order Job input json file + +optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2 + diff --git a/docs/gatk/gatk_downsamplesam_4.1.8.1.md b/docs/gatk/gatk_downsamplesam_4.1.8.1.md new file mode 100644 index 00000000..b0f093d9 --- /dev/null +++ b/docs/gatk/gatk_downsamplesam_4.1.8.1.md @@ -0,0 +1,113 @@ +# ApplyBQSR v4.1.8.1 + +## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) | + +[![](https://img.shields.io/badge/version-4.1.8.1-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl example_inputs.yml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict gatk_downsamplesam_4.1.8.1.cwl inputs.yaml + +#Using toil-cwl-runner +> mkdir run_directory +> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_downsamplesam_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr & +``` + +## Usage + +\`\`\`bash + +> toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl --help + +usage: gatk_downsamplesam_4.1.8.1.cwl [-h] --input INPUT --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--output_file_name_metrics OUTPUT_FILE_NAME_METRICS] + [--probability PROBABILITY] + [--random_seed RANDOM_SEED] + [--strategy STRATEGY] + [--arguments_file ARGUMENTS_FILE] + [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] + [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] + [--create_output_bam_index] + [--create_output_bam_md5] + [--disable_bam_index_caching] + [--disable_read_filter DISABLE_READ_FILTER] + [--disable_sequence_dictionary_validation] + [--exclude_intervals EXCLUDE_INTERVALS] + [--gatk_config_file GATK_CONFIG_FILE] + [--gcs_max_retries GCS_MAX_RETRIES] + [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] + [--QUIET] [--read_filter READ_FILTER] + [--read_index READ_INDEX] + [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] + [--lenient] + [--number_of_threads NUMBER_OF_THREADS] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT BAM/SAM file containing reads + --reference REFERENCE + Reference sequence file + --output_file_name OUTPUT_FILE_NAME + Output file name. Not Required + --output_file_name_metrics OUTPUT_FILE_NAME_METRICS + Output file name for metrics file. Not Required + --probability PROBABILITY + The probability of keeping any individual read, + between 0 and 1. + --random_seed RANDOM_SEED + Random seed used for deterministic results. Setting to + null will cause multiple invocations to produce + different results. + --strategy STRATEGY The --STRATEGY argument is an enumerated type + (Strategy), which can have one of the following + values: HighAccuracy ConstantMemory Chained default + Strategy ConstantMemory + --arguments_file ARGUMENTS_FILE + --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER + --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER + --create_output_bam_index + --create_output_bam_md5 + --disable_bam_index_caching + --disable_read_filter DISABLE_READ_FILTER + Read filters to be disabled before analysis + --disable_sequence_dictionary_validation + --exclude_intervals EXCLUDE_INTERVALS + --gatk_config_file GATK_CONFIG_FILE + --gcs_max_retries GCS_MAX_RETRIES + --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS + --QUIET + --read_filter READ_FILTER + --read_index READ_INDEX + --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES + --lenient + --number_of_threads NUMBER_OF_THREADS + --memory_per_job MEMORY_PER_JOB + --memory_overhead MEMORY_OVERHEAD + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. + + diff --git a/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md new file mode 100644 index 00000000..8a593757 --- /dev/null +++ b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md @@ -0,0 +1,245 @@ +# MergeBamAlignment v4.1.8.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_bam_alignment_4.1.8.0.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --unmapped_bam UNMAPPED_BAM + --reference REFERENCE + [--output_file_name OUTPUT_FILE_NAME] + [--add_mate_cigar] + [--add_pg_tag_to_reads] + [--aligned_bam ALIGNED_BAM] + [--aligned_reads_only] + [--aligner_proper_pair_flags] + [--attributes_to_remove ATTRIBUTES_TO_REMOVE] + [--attributes_to_retain ATTRIBUTES_TO_RETAIN] + [--attributes_to_reverse ATTRIBUTES_TO_REVERSE] + [--attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT] + [--clip_adapters] + [--clip_overlapping_reads] + [--expected_orientations EXPECTED_ORIENTATIONS] + [--hard_clip_overlapping_reads] + [--include_secondary_alignments] + [--is_bisulfite_sequence] + [--jump_size JUMP_SIZE] + [--matching_dictionary_tags MATCHING_DICTIONARY_TAGS] + [--max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS] + [--min_unclipped_bases MIN_UNCLIPPED_BASES] + [--paired_run] + [--primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY] + [--read1_aligned_bam READ1_ALIGNED_BAM] + [--read1_trim READ1_TRIM] + [--read2_aligned_bam READ2_ALIGNED_BAM] + [--read2_trim READ2_TRIM] + [--sort_order SORT_ORDER] + [--unmap_contaminant_reads] + [--unmapped_read_strategy UNMAPPED_READ_STRATEGY] + [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --unmapped_bam UNMAPPED_BAM + Original SAM or BAM file of unmapped reads, which must + be in queryname order. Reads MUST be unmapped. + Required. + --reference REFERENCE + Reference sequence file. Required. + --output_file_name OUTPUT_FILE_NAME + Merged SAM or BAM file to write to. Required. + --add_mate_cigar Adds the mate CIGAR tag (MC) if true, does not if + false. Default value: true. Possible values: {true, + false} + --add_pg_tag_to_reads + Add PG tag to each read in a SAM or BAM Default value: + true. Possible values: {true, false} + --aligned_bam ALIGNED_BAM + SAM or BAM file(s) with alignment data. This argument + may be specified 0 or more times. Default value: null. + Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM + (R2_ALIGNED) + --aligned_reads_only Whether to output only aligned reads. Default value: + false. Possible values: {true, false} + --aligner_proper_pair_flags + Use the aligners idea of what a proper pair is rather + than computing in this program. Default value: false. + Possible values: {true, false} + --attributes_to_remove ATTRIBUTES_TO_REMOVE + Attributes from the alignment record that should be + removed when merging. This overrides + ATTRIBUTES_TO_RETAIN if they share common tags. This + argument may be specified 0 or more times. Default + value: null. + --attributes_to_retain ATTRIBUTES_TO_RETAIN + Reserved alignment attributes (tags starting with X, + Y, or Z) that should be brought over from the + alignment data when merging. This argument may be + specified 0 or more times. Default value: null. + --attributes_to_reverse ATTRIBUTES_TO_REVERSE + Attributes on negative strand reads that need to be + reversed. This argument may be specified 0 or more + times. Default value: [OQ, U2]. + --attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT + Attributes on negative strand reads that need to be + reverse complemented. This argument may be specified 0 + or more times. Default value: [E2, SQ]. + --clip_adapters Whether to clip adapters where identified. Default + value: true. Possible values: {true, false} + --clip_overlapping_reads + For paired reads, clip the 3' end of each read if + necessary so that it does not extend past the 5' end + of its mate. Clipping will be either soft or hard + clipping, depending on CLIP_OVERLAPPING_READS_OPERATOR + setting. Hard clipped bases and their qualities will + be stored in the XB and XQ tags respectively. Default + value: true. Possible values: {true, false} + --expected_orientations EXPECTED_ORIENTATIONS + The expected orientation of proper read pairs. + Replaces JUMP_SIZE This argument may be specified 0 or + more times. Default value: null. Possible values: {FR, + RF, TANDEM} Cannot be used in conjunction with + argument(s) JUMP_SIZE (JUMP) + --hard_clip_overlapping_reads + If true, hard clipping will be applied to overlapping + reads. By default, soft clipping is used. Default + value: false. Possible values: {true, false} + --include_secondary_alignments + If false, do not write secondary alignments to output. + Default value: true. Possible values: {true, false} + --is_bisulfite_sequence + Whether the lane is bisulfite sequence (used when + calculating the NM tag). Default value: false. + Possible values: {true, false} + --jump_size JUMP_SIZE + The expected jump size (required if this is a jumping + library). Deprecated. Use EXPECTED_ORIENTATIONS + instead Default value: null. Cannot be used in + conjunction with argument(s) EXPECTED_ORIENTATIONS + (ORIENTATIONS) + --matching_dictionary_tags MATCHING_DICTIONARY_TAGS + List of Sequence Records tags that must be equal (if + present) in the reference dictionary and in the + aligned file. Mismatching tags will cause an error if + in this list, and a warning otherwise. This argument + may be specified 0 or more times. Default value: [M5, + LN]. + --max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS + The maximum number of insertions or deletions + permitted for an alignment to be included. Alignments + with more than this many insertions or deletions will + be ignored. Set to -1 to allow any number of + insertions or deletions. Default value: 1. + --min_unclipped_bases MIN_UNCLIPPED_BASES + If UNMAP_CONTAMINANT_READS is set, require this many + unclipped bases or else the read will be marked as + contaminant. Default value: 32. + --paired_run DEPRECATED. This argument is ignored and will be + removed. Default value: true. Possible values: {true, + false} + --primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY + Strategy for selecting primary alignment when the + aligner has provided more than one alignment for a + pair or fragment, and none are marked as primary, more + than one is marked as primary, or the primary + alignment is filtered out for some reason. For all + strategies, ties are resolved arbitrarily. Default + value: BestMapq. BestMapq (Expects that multiple + alignments will be correlated with HI tag, and prefers + the pair of alignments with the largest MAPQ, in the + absence of a primary selected by the aligner.) + EarliestFragment (Prefers the alignment which maps the + earliest base in the read. Note that EarliestFragment + may not be used for paired reads.) BestEndMapq + (Appropriate for cases in which the aligner is not + pair-aware, and does not output the HI tag. It simply + picks the alignment for each end with the highest + MAPQ, and makes those alignments primary, regardless + of whether the two alignments make sense together.) + MostDistant (Appropriate for a non-pair-aware aligner. + Picks the alignment pair with the largest insert size. + If all alignments would be chimeric, it picks the + alignments for each end with the best MAPQ.) + --read1_aligned_bam READ1_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the first + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read1_trim READ1_TRIM + The number of bases trimmed from the beginning of read + 1 prior to alignment Default value: 0. + --read2_aligned_bam READ2_ALIGNED_BAM + SAM or BAM file(s) with alignment data from the second + read of a pair. This argument may be specified 0 or + more times. Default value: null. Cannot be used in + conjunction with argument(s) ALIGNED_BAM (ALIGNED) + --read2_trim READ2_TRIM + The number of bases trimmed from the beginning of read + 2 prior to alignment Default value: 0. + --sort_order SORT_ORDER + The order in which the merged reads should be output. + Default value: coordinate. Possible values: {unsorted, + queryname, coordinate, duplicate, unknown} + --unmap_contaminant_reads + Detect reads originating from foreign organisms (e.g. + bacterial DNA in a non-bacterial sample),and unmap + + label those reads accordingly. Default value: false. + Possible values: {true, false} + --unmapped_read_strategy UNMAPPED_READ_STRATEGY + How to deal with alignment information in reads that + are being unmapped (e.g. due to cross-species + contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. Note that the + DO_NOT_CHANGE strategy will actually reset the cigar + and set the mapping quality on unmapped reads since + otherwisethe result will be an invalid record. To + force no change use the DO_NOT_CHANGE_INVALID + strategy. Default value: DO_NOT_CHANGE. Possible + values: {COPY_TO_TAG, DO_NOT_CHANGE, + DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` + diff --git a/docs/gatk/gatk_merge_sam_files_4.1.8.0.md b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md new file mode 100644 index 00000000..0602f9be --- /dev/null +++ b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md @@ -0,0 +1,98 @@ +# MergeSamFiles v4.1.8.0 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--assume_sorted] [--comment COMMENT] + [--create_index] [--create_md5_file] + [--intervals INTERVALS] + [--merge_sequence_dictionaries] + [--reference_sequence REFERENCE_SEQUENCE] + [--sort_order SORT_ORDER] + [--use_threading] + [--validation_stringency VALIDATION_STRINGENCY] + [--verbosity VERBOSITY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT SAM or BAM input file This argument must be specified + at least once. Required. + --output_file_name OUTPUT_FILE_NAME + SAM or BAM file to write merged result to Required. + --assume_sorted If true, assume that the input files are in the same + sort order as the requested output sort order, even if + their headers say otherwise. Default value: false. + Possible values: {true, false} + --comment COMMENT Comment(s) to include in the merged output files + header. This argument may be specified 0 or more + times. Default value: null. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --intervals INTERVALS + An interval list file that contains the locations of + the positions to merge. Assume bam are sorted and + indexed. The resulting file will contain alignments + that may overlap with genomic regions outside the + requested region. Unmapped reads are discarded. + Default value: null. + --merge_sequence_dictionaries + Merge the sequence dictionaries Default value: false. + Possible values: {true, false} + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --sort_order SORT_ORDER + Sort order of output file Default value: coordinate. + Possible values: {unsorted, queryname, coordinate, + duplicate, unknown} + --use_threading Option to create a background thread to encode, + compress and write to disk the output file. The + threaded version uses about 20% more CPU and decreases + runtime by ~20% when writing out a compressed BAM + file. Default value: false. Possible values: {true, + false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} + --verbosity VERBOSITY + Control verbosity of logging. Default value: INFO. + Possible values: {ERROR, WARNING, INFO, DEBUG} +``` + diff --git a/docs/gatk/samtofastq-v4.1.8.0.md b/docs/gatk/samtofastq-v4.1.8.0.md new file mode 100644 index 00000000..b2b44b12 --- /dev/null +++ b/docs/gatk/samtofastq-v4.1.8.0.md @@ -0,0 +1,2 @@ +# SamToFastq v4.1.8.0 + diff --git a/docs/manta/README.md b/docs/manta/README.md new file mode 100644 index 00000000..1849796d --- /dev/null +++ b/docs/manta/README.md @@ -0,0 +1,2 @@ +# Manta + diff --git a/docs/manta/manta_1.5.1.md b/docs/manta/manta_1.5.1.md new file mode 100644 index 00000000..9b285335 --- /dev/null +++ b/docs/manta/manta_1.5.1.md @@ -0,0 +1,70 @@ +# Manta v1.5.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image | 16.04 | - | +| manta | 1.5.1 | [https://github.com/Illumina/manta/releases/download/](https://github.com/Illumina/manta/releases/download/) | +| samtools | 1.9 | [https://github.com/samtools/samtools/releases/download/](https://github.com/samtools/samtools/releases/download/) | +| htslib | 1.9 | "[https://github.com/samtools/htslib/releases/download/](https://github.com/samtools/htslib/releases/download/) | + +[![](https://images.microbadger.com/badges/image/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2)[![](https://images.microbadger.com/badges/version/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/commit/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) [![](https://images.microbadger.com/badges/license/mskaccess/manta:0.0.2.svg)](https://microbadger.com/images/mskaccess/manta:0.0.2) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/manta_1.5.1/manta_1.51.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/manta.1.5.1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner manta_1.5.1.cwl --help +usage: manta_1.5.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --call_regions CALL_REGIONS + bgzip-compressed, tabix-indexed BED file specifiying + regions to which variant analysis will be restricted + --non_wgs toggles on settings for WES + --normal_bam NORMAL_BAM + Normal sample BAM or CRAM file. May be specified more + than once, multiple inputs will be treated as each BAM + file representing a different sample. [optional] (no + default) + --output_contigs if true, outputs assembled contig sequences in final + VCF files, in the INFO field CONTIG + --reference_fasta REFERENCE_FASTA + samtools-indexed reference fasta file [required] + --tumor_bam TUMOR_BAM + Tumor sample BAM or CRAM file. Only up to one tumor + bam file accepted. + --generateEvidenceBam + Generate a bam of supporting reads for all SVs +``` + diff --git a/docs/marianas/README.md b/docs/marianas/README.md new file mode 100644 index 00000000..67939d60 --- /dev/null +++ b/docs/marianas/README.md @@ -0,0 +1,2 @@ +# Marianas + diff --git a/docs/marianas/marianas_collapsing_first_pass_1.8.1.md b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md new file mode 100644 index 00000000..f0b24afe --- /dev/null +++ b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing First Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_collapsing_second_pass_1.8.1.md b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md new file mode 100644 index 00000000..7117bec5 --- /dev/null +++ b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md @@ -0,0 +1,19 @@ +# Collapsing Second Pass v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml +``` + diff --git a/docs/marianas/marianas_process_loop_umi_1.8.1.md b/docs/marianas/marianas_process_loop_umi_1.8.1.md new file mode 100644 index 00000000..7c1efc78 --- /dev/null +++ b/docs/marianas/marianas_process_loop_umi_1.8.1.md @@ -0,0 +1,19 @@ +# Process Loop UMI v1.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml +``` + diff --git a/docs/marianas/marianas_separate_bams_1.8.1.md b/docs/marianas/marianas_separate_bams_1.8.1.md new file mode 100644 index 00000000..232c89da --- /dev/null +++ b/docs/marianas/marianas_separate_bams_1.8.1.md @@ -0,0 +1,33 @@ +# Seprate BAMs v1.8.1 + +## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl + [-h] --input_bam INPUT_BAM [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input_bam INPUT_BAM +``` + diff --git a/docs/merge-fastq/README.md b/docs/merge-fastq/README.md new file mode 100644 index 00000000..19544a3d --- /dev/null +++ b/docs/merge-fastq/README.md @@ -0,0 +1,2 @@ +# Merge Fastq + diff --git a/docs/merge-fastq/merge_fastq_0.1.7.md b/docs/merge-fastq/merge_fastq_0.1.7.md new file mode 100644 index 00000000..ec1c9213 --- /dev/null +++ b/docs/merge-fastq/merge_fastq_0.1.7.md @@ -0,0 +1,67 @@ +# v0.1.7 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| miniconda | 3 | [https://hub.docker.com/r/continuumio/miniconda3](https://hub.docker.com/r/continuumio/miniconda3) | +| merge\_fastq | 0.1.7 | [https://pypi.org/project/merge-fastq/](https://pypi.org/project/merge-fastq/) | + +[![](https://images.microbadger.com/badges/version/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/image/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/commit/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [![](https://images.microbadger.com/badges/license/mskaccess/merge_fastq:0.6.1.svg)](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner merge_fastq_0.1.7.cwl --help +usage: merge_fastq_0.1.7.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --fastq1 FASTQ1 Full path to gziped READ1 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + --fastq2 FASTQ2 Full path to gziped READ2 fastq files, can be + specified multiple times for example: --fastq1 + test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + --output_path OUTPUT_PATH + Full path to write the output files (default: Current + working directory) + --out_fastq1_name OUT_FASTQ1_NAME + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + --out_fastq2_name OUT_FASTQ2_NAME + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) +``` + diff --git a/docs/mosdepth/README.md b/docs/mosdepth/README.md new file mode 100644 index 00000000..d576ad29 --- /dev/null +++ b/docs/mosdepth/README.md @@ -0,0 +1,2 @@ +# Mosdepth + diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md new file mode 100644 index 00000000..44080bde --- /dev/null +++ b/docs/mosdepth/mosdepth_0.3.3.md @@ -0,0 +1,68 @@ +Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**. + +`mosdepth` can output: ++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome. ++ mean per-window depth given a window size--as would be used for CNV calling. ++ the mean per-region given a BED file of regions. +* the mean or median per-region cumulative coverage histogram given a window size ++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. ++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20) ++ threshold output to indicate how many bases in each region are covered at the given thresholds. ++ A summary of mean depths per chromosome and within specified regions per chromosome. + +# CWL for running Mosdepth - Coverage tool +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| mosdepth | 0.3.3 | [https://hub.docker.com/r/brentp/mosdepth/tags](https://hub.docker.com/r/brentp/mosdepth/tags) [https://github.com/brentp/mosdepth/releases/tag/v0.3.3](https://github.com/brentp/mosdepth/releases/tag/v0.3.3) | + +[![](https://img.shields.io/badge/version-0.3.3-blue)](https://github.com/brentp/mosdepth/releases/tag/v0.3.3)| + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml & +``` + +### Usage + +```bash +usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] [--bed BED] + [--chrom CHROM] [--prefix PREFIX] [--flag FLAG] + [--mapq MAPQ] + [job_order] + +fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --bed BED optional BED file or (integer) window-sizes. + --chrom CHROM chromosome to restrict depth calculation. + --prefix PREFIX Prefix for the output files + --flag FLAG exclude reads with any of the bits in FLAG set + --mapq MAPQ mapping quality threshold. reads with a mapping + quality less than this are ignored +``` diff --git a/docs/multiqc/README.md b/docs/multiqc/README.md new file mode 100644 index 00000000..a3dcea74 --- /dev/null +++ b/docs/multiqc/README.md @@ -0,0 +1,2 @@ +# MultiQC + diff --git a/docs/multiqc/multiqc_1.10.1.7.md b/docs/multiqc/multiqc_1.10.1.7.md new file mode 100644 index 00000000..02fffc9c --- /dev/null +++ b/docs/multiqc/multiqc_1.10.1.7.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.10.1.7 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1.7/multiqc.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner multiqc.cwl --helpusage: multiqc_1.10.1.cwl [-h] + [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY] + [--qc_files_dir QC_FILES_DIR] + [--qc_list_of_dirs QC_LIST_OF_DIRS] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY + qc files which shall be part of the multiqc summary; + optional, only one of qc_files_array or + qc_files_array_of_array must be provided + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --qc_list_of_dirs QC_LIST_OF_DIRS + qc files in multiple directories + --report_name REPORT_NAME + name used for the html report + --config CONFIG +``` diff --git a/docs/multiqc/multiqc_1.12.md b/docs/multiqc/multiqc_1.12.md new file mode 100644 index 00000000..411afef8 --- /dev/null +++ b/docs/multiqc/multiqc_1.12.md @@ -0,0 +1,48 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.12 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1/multiqc.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.12/multiqc_1.12.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +usage: multiqc_1.12.cwl [-h] [--qc_files_dir QC_FILES_DIR] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --report_name REPORT_NAME + name used for the html report and the corresponding + zip file + --config CONFIG +``` \ No newline at end of file diff --git a/docs/mutect/README.md b/docs/mutect/README.md new file mode 100644 index 00000000..639d595b --- /dev/null +++ b/docs/mutect/README.md @@ -0,0 +1,2 @@ +# MuTect + diff --git a/docs/mutect/mutect_1.1.5.md b/docs/mutect/mutect_1.1.5.md new file mode 100644 index 00000000..f5a70ddb --- /dev/null +++ b/docs/mutect/mutect_1.1.5.md @@ -0,0 +1,273 @@ +# MuTect 1.1.5 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| openjdk:7 base image | 7 | - | +| muTect | 1.1.5 | [https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip](https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir mutect_toil_log +> toil-cwl-runner --singularity --logFile /path/to/mutect_toil_log/cwltoil.log --jobStore /path/to/mutect_jobStore --batchSystem lsf --workDir /path/to/mutect_toil_log --outdir . --writeLogs /path/to/mutect_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml > mutect_toil.stdout 2> mutect_toil.stderr & +``` + +### Usage + +```text +usage: toil-cwl-runner mutect_1.1.5.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --BQSR BQSR The input covariates table file which enables on-the- + fly base quality score recalibration + --absolute_copy_number_data ABSOLUTE_COPY_NUMBER_DATA + Absolute Copy Number Data, as defined by Absolute, to + use in power calculations + --arg_file ARG_FILE Reads arguments from the specified file + --bam_tumor_sample_name BAM_TUMOR_SAMPLE_NAME + if the tumor bam contains multiple samples, only use + read groups with SM equal to this value + --baq BAQ Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + --baqGapOpenPenalty BAQGAPOPENPENALTY + BAQ gap open penalty (Phred Scaled). Default value is + 40. 30 is perhaps better for whole genome call sets + --clipping_bias_pvalue_threshold CLIPPING_BIAS_PVALUE_THRESHOLD + pvalue threshold for fishers exact test of clipping + bias in mutant reads vs ref reads + --cosmic COSMIC VCF file of COSMIC sites + --coverage_20_q20_file COVERAGE_20_Q20_FILE + write out 20x of Q20 coverage in WIGGLE format to this + file + --coverage_file COVERAGE_FILE + write out coverage in WIGGLE format to this file + --dbsnp DBSNP VCF file of DBSNP information + --dbsnp_normal_lod DBSNP_NORMAL_LOD + LOD threshold for calling normal non-variant at dbsnp + sites + --defaultBaseQualities DEFAULTBASEQUALITIES + If reads are missing some or all base quality scores, + this value will be used for all base quality scores + --disableRandomization + Completely eliminates randomization from + nondeterministic methods. To be used mostly in the + testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + --disable_indel_quals + If true, disables printing of base insertion and base + deletion tags (with -BQSR) + --downsample_to_coverage DOWNSAMPLE_TO_COVERAGE + Target coverage threshold for downsampling to coverage + --downsampling_type DOWNSAMPLING_TYPE + Type of reads downsampling to employ at a given locus. + Reads will be selected randomly to be removed from the + pile based on the method described here + (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all + possible reads at a locus + --emit_original_quals + If true, enables printing of the OQ tag with the + original base qualities (with -BQSR) + --enable_extended_output + --excludeIntervals EXCLUDEINTERVALS + One or more genomic intervals to exclude from + processing. Can be explicitly specified on the command + line or in a file (including a rod file) + --filter_mismatching_base_and_quals + if a read has mismatching number of bases and base + qualities, filter out the read instead of blowing up. + --force_alleles force output for all alleles at each site + --force_output force output for each site + --fraction_contamination FRACTION_CONTAMINATION + estimate of fraction (0-1) of physical contamination + with other unrelated samples + --fraction_mapq0_threshold FRACTION_MAPQ0_THRESHOLD + threshold for determining if there is relatedness + between the alt and ref allele read piles + --gap_events_threshold GAP_EVENTS_THRESHOLD + how many gapped events (ins/del) are allowed in + proximity to this candidate + --gatk_key GATK_KEY GATK Key file. Required if running with -et NO_ET. + Please see -phone-home-and-how-does-it-affect- + me#latest for details. + --heavily_clipped_read_fraction HEAVILY_CLIPPED_READ_FRACTION + if this fraction or more of the bases in a read are + soft/hard clipped, do not use this read for mutation + calling + --initial_tumor_lod INITIAL_TUMOR_LOD + Initial LOD threshold for calling tumor variant + --input_file_normal INPUT_FILE_NORMAL + SAM or BAM file(s) + --input_file_tumor INPUT_FILE_TUMOR + SAM or BAM file(s) + --interval_merging INTERVAL_MERGING + Indicates the interval merging rule we should use for + abutting intervals (ALL| OVERLAPPING_ONLY) + --interval_padding INTERVAL_PADDING + Indicates how many basepairs of padding to include + around each of the intervals specified with the -L/ + --interval_set_rule INTERVAL_SET_RULE + Indicates the set merging approach the interval parser + should use to combine the various -L or -XL inputs + (UNION| INTERSECTION) + --java_7 JAVA_7 + --keep_program_records + Should we override the Walkers default and keep + program records from the SAM header + --log_to_file LOG_TO_FILE + Set the logging location + --logging_level LOGGING_LEVEL + Set the minimum level of logging, i.e. setting INFO + gets you INFO up to FATAL, setting ERROR gets you + ERROR and FATAL level logging. + --maxRuntime MAXRUNTIME + If provided, that GATK will stop execution cleanly as + soon after maxRuntime has been exceeded, truncating + the run but not exiting with a failure. By default the + value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + --maxRuntimeUnits MAXRUNTIMEUNITS + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + --max_alt_allele_in_normal_fraction MAX_ALT_ALLELE_IN_NORMAL_FRACTION + threshold for maximum alternate allele fraction in + normal + --max_alt_alleles_in_normal_count MAX_ALT_ALLELES_IN_NORMAL_COUNT + threshold for maximum alternate allele counts in + normal + --max_alt_alleles_in_normal_qscore_sum MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM + threshold for maximum alternate allele quality score + sum in normal + --min_qscore MIN_QSCORE + threshold for minimum base quality score + --minimum_mutation_cell_fraction MINIMUM_MUTATION_CELL_FRACTION + minimum fraction of cells which are presumed to have a + mutation, used to handle non-clonality and + contamination + --minimum_normal_allele_fraction MINIMUM_NORMAL_ALLELE_FRACTION + minimum allele fraction to be considered in normal, + useful for normal sample contaminated with tumor + --monitorThreadEfficiency + Enable GATK threading efficiency monitoring + --mutect MUTECT + --nonDeterministicRandomSeed + Makes the GATK behave non deterministically, that is, + the random numbers generated will be different in + every run + --noop used for debugging, basically exit as soon as we get + the reads + --normal_depth_file NORMAL_DEPTH_FILE + write out normal read depth in WIGGLE format to this + file + --normal_lod NORMAL_LOD + LOD threshold for calling normal non-germline + --normal_sample_name NORMAL_SAMPLE_NAME + name to use for normal in output files + --num_bam_file_handles NUM_BAM_FILE_HANDLES + The total number of BAM file handles to keep open + simultaneously + --num_cpu_threads_per_data_thread NUM_CPU_THREADS_PER_DATA_THREAD + How many CPU threads should be allocated per data + thread to running this analysis? + --num_threads NUM_THREADS + How many data threads should be allocated to running + this analysis. + --only_passing_calls only emit passing calls + --pedigree PEDIGREE Pedigree files for samples + --pedigreeString PEDIGREESTRING + Pedigree string for samples + --pedigreeValidationType PEDIGREEVALIDATIONTYPE + How strict should we be in validating the pedigree + information? (STRICT|SILENT) + --performanceLog PERFORMANCELOG + If provided, a GATK runtime performance log will be + written to this file + --phone_home PHONE_HOME + What kind of GATK run report should we generate? + STANDARD is the default, can be NO_ET so nothing is + posted to the run repository. Please see -phone-home- + and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + --pir_mad_threshold PIR_MAD_THRESHOLD + threshold for clustered read position artifact MAD + --pir_median_threshold PIR_MEDIAN_THRESHOLD + threshold for clustered read position artifact median + --power_constant_af POWER_CONSTANT_AF + Allelic fraction constant to use in power calculations + --power_constant_qscore POWER_CONSTANT_QSCORE + Phred scale quality score constant to use in power + calculations + --power_file POWER_FILE + write out power in WIGGLE format to this file + --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN + Bases with quality scores less than this threshold + wont be recalibrated (with -BQSR) + --read_buffer_size READ_BUFFER_SIZE + Number of reads per SAM file to buffer in memory + --read_filter READ_FILTER + Specify filtration criteria to apply to each read + individually + --read_group_black_list READ_GROUP_BLACK_LIST + Filters out read groups matching - or a + .txt file containing the filter strings one per line. + --reference_sequence REFERENCE_SEQUENCE + --remove_program_records + Should we override the Walkers default and remove + program records from the SAM header + --required_maximum_alt_allele_mapping_quality_score + required minimum value for + + tumor alt allele maximum mapping quality score + --somatic_classification_normal_power_threshold + Power threshold for normal to + + determine germline vs variant + --tag TAG Arbitrary tag string to identify this GATK run as part + of a group of runs, for later analysis + --tumor_depth_file TUMOR_DEPTH_FILE + write out tumor read depth in WIGGLE format to this + file + --tumor_f_pretest TUMOR_F_PRETEST + for computational efficiency, reject sites with + allelic fraction below this threshold + --tumor_lod TUMOR_LOD + LOD threshold for calling tumor variant + --tumor_sample_name TUMOR_SAMPLE_NAME + name to use for tumor in output files + --unsafe UNSAFE If set, enables unsafe operations - nothing will be + checked at runtime. For expert users only who know + what they are doing. We do not support usage of this + argument. (ALLOW_UNINDEXED_BAM| + ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| + ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + --useOriginalQualities + If set, use the original base quality scores from the + OQ tag when present instead of the standard scores + --validation_strictness VALIDATION_STRICTNESS + How strict should we be with validation + (STRICT|LENIENT|SILENT) + --vcf VCF VCF output of mutation candidates +``` + diff --git a/docs/octopus/README.md b/docs/octopus/README.md new file mode 100644 index 00000000..c19f208c --- /dev/null +++ b/docs/octopus/README.md @@ -0,0 +1,2 @@ +# Octopus + diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md new file mode 100644 index 00000000..2967f783 --- /dev/null +++ b/docs/octopus/octopus_0.7.4.md @@ -0,0 +1,74 @@ +## CWL and Docker for Running Octopus + +## Version of tools in [docker image](https://hub.docker.com/r/dancooke/octopus/tags) + +| Tool | Version | Location | +| ------- | ------- | ---------------------------------------------------------- | +| Octopus | v0.7.4 | https://github.com/luntergroup/octopus/releases/tag/v0.7.4 | + +### CWL + +CWL specification 1.0 +Use example_input.yaml to see the inputs to the cwl +Example Command using [toil](https://toil.readthedocs.io/): +`toil-cwl-runner octopus_0-7-4.cwl example_input.yaml` + +If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing type==X86_64 && you can use the following command + +### Using CWLTOOL + +``` +cwltool --singularity --non-strict /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml +``` + +### Using toil-cwl-runner + +```shell +mkdir octopus_toil_log +toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log --jobStore /path/to/octopus_jobStore --batchSystem lsf --workDir /path/to/octopus_toil_log --outdir . --writeLogs /path/to/octopus_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml > octopus_toil.stdout 2> octopus_toil.stderr & +``` + +### Usage + +```shell +usage: octopus_0-7-4.cwl [-h] --input INPUT [--normalId NORMALID] + [--tumorOnlySample] [--somaticOnlyCalls] + [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY] + [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY] + [--targettedCalling_file TARGETTEDCALLING_FILE] + [--skipRegions_file SKIPREGIONS_FILE] + [--error_models ERROR_MODELS] --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT Tumor and normal bam files with .bai + --normalId NORMALID add the name of the normal sample + --tumorOnlySample mention this parameter if it is tumor only sample. + --somaticOnlyCalls if somatics only call is required. Use this with -f ON + parameter + --targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY + list of regions to call variants from. eg 1. chr1: all + of chr1. 2. chr2:10,000,000: the single position + 10000000 in chr2. chr3:5,000,000-: everything from 3. + chr3:5,000,000 onwards. 4. + chr4:100,000,000-200,000,000: everything between + chr4:100,000,000 and chr4:200,000,000. The interval is + half open so position chr4:200,000,000 is not + included. + --skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY + to skip a set of regions + --targettedCalling_file TARGETTEDCALLING_FILE + regions in a text or bed file + --skipRegions_file SKIPREGIONS_FILE + regions in text or bed file format + --error_models ERROR_MODELS + error model will be in the format - [library + preparation]<.sequencer> eg: PCR.NOVASEQ + --reference REFERENCE + --output_file_name OUTPUT_FILE_NAME +``` diff --git a/docs/picard-tools/README.md b/docs/picard-tools/README.md new file mode 100644 index 00000000..094001ac --- /dev/null +++ b/docs/picard-tools/README.md @@ -0,0 +1,2 @@ +# Picard Tools + diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md similarity index 78% rename from picard_add_or_replace_read_groups_1.96/README.md rename to docs/picard-tools/picard_add_or_replace_read_groups_1.96.md index b07355a4..e5a70249 100644 --- a/picard_add_or_replace_read_groups_1.96/README.md +++ b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups +# AddOrReplaceReadGroups v1.96 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md new file mode 100644 index 00000000..70eb1246 --- /dev/null +++ b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md @@ -0,0 +1,90 @@ +# AddOrReplaceReadGroups v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl --help +usage: picard_add_or_replace_read_groups_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md new file mode 100644 index 00000000..707df7f2 --- /dev/null +++ b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md @@ -0,0 +1,95 @@ +# AddOrReplaceReadGroups v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardAddOrReplaceReadGroup_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl --help +usage: picard_add_or_replace_read_groups_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file ( sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --read_group_identifier READ_GROUP_IDENTIFIER + Read Group ID Default value: 1. This option can be set + to 'null' to clear the default value Required + --read_group_sequencing_center READ_GROUP_SEQUENCING_CENTER + Read Group sequencing center name Default value: null. + Required + --read_group_library READ_GROUP_LIBRARY + Read Group Library. Required + --read_group_platform_unit READ_GROUP_PLATFORM_UNIT + Read Group platform unit (eg. run barcode) Required. + --read_group_sample_name READ_GROUP_SAMPLE_NAME + Read Group sample name. Required + --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM + Read Group platform (e.g. illumina, solid) Required. + --read_group_description READ_GROUP_DESCRIPTION + Read Group description Default value: null. + --read_group_run_date READ_GROUP_RUN_DATE + Read Group run date Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md new file mode 100644 index 00000000..b88b626f --- /dev/null +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md @@ -0,0 +1,78 @@ +# CollectAlignmentSummaryMetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collect_alignment_summary_metrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md similarity index 85% rename from picard_collect_alignment_summary_metrics_2.8.1/README.md rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md index 16d0b11b..1a318d60 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/README.md +++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md @@ -1,19 +1,18 @@ -# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics +# CollectAlignmentSummaryMetrics v2.8.1 -## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml @@ -78,3 +77,4 @@ optional arguments: Default value: 0. This option can be set to 'null' to clear the default value. ``` + diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md new file mode 100644 index 00000000..14b0eaee --- /dev/null +++ b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md @@ -0,0 +1,78 @@ +# CollectMultipleMetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md new file mode 100644 index 00000000..259b84d5 --- /dev/null +++ b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md @@ -0,0 +1,80 @@ +# CollectMultipleMetrics v2.8.1 + +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_collectmultiplemetrics_2-8-1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. This option can be set + to 'null' to clear the default value. + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Note that while this argument + isn't required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. +``` + diff --git a/picard_fix_mate_information_1.96/README.md b/docs/picard-tools/picard_fix_mate_information_1.96.md similarity index 71% rename from picard_fix_mate_information_1.96/README.md rename to docs/picard-tools/picard_fix_mate_information_1.96.md index 567a78e3..22a9cd50 100644 --- a/picard_fix_mate_information_1.96/README.md +++ b/docs/picard-tools/picard_fix_mate_information_1.96.md @@ -1,26 +1,26 @@ -# CWL and Dockerfile for running Picard - FixMateInformation +# FixMateInformation v1.96 -## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile) +## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | -[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com") +[![](https://images.microbadger.com/badges/image/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/version/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [![](https://images.microbadger.com/badges/license/mskcc/picard_1.96:0.1.0.svg)](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -33,7 +33,7 @@ ### Usage -``` +```text usage: picard_fix_mate_information_1.96.cwl [-h] positional arguments: @@ -72,4 +72,5 @@ optional arguments: coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} -``` \ No newline at end of file +``` + diff --git a/docs/picard-tools/picard_fix_mate_information_2.21.2.md b/docs/picard-tools/picard_fix_mate_information_2.21.2.md new file mode 100644 index 00000000..659bf4b1 --- /dev/null +++ b/docs/picard-tools/picard_fix_mate_information_2.21.2.md @@ -0,0 +1,72 @@ +# FixMateInformation v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +```text +usage: picard_fix_mate_information_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md new file mode 100644 index 00000000..7039d4d4 --- /dev/null +++ b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md @@ -0,0 +1,77 @@ +# FixMateInformation v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir picardFixMate_toil_log +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +```text +usage: picard_fix_mate_information_4.1.8.1.cwl + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} +``` + diff --git a/docs/picard-tools/picard_hsmetrics_2.21.2.md b/docs/picard-tools/picard_hsmetrics_2.21.2.md new file mode 100644 index 00000000..8c43151b --- /dev/null +++ b/docs/picard-tools/picard_hsmetrics_2.21.2.md @@ -0,0 +1,86 @@ +# HSmetrics v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.21.2.cwl [-h] + +optional arguments: + -h, --help show this help message and exit + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. Default value: null. This option must + be specified at least 1 times. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. This option can be set to + 'null' to clear the default value. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. This option can be set + to 'null' to clear the default value. Possible values: + {true, false} + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. Default value: null. This option must be + specified at least 1 times. + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --metric_accumulation_level METRIC_ACCUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. This option can be set to 'null' to clear + the default value. + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. This option can be set to 'null' to clear + the default value. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. This + option can be set to 'null' to clear the default + value. +``` + diff --git a/docs/picard-tools/picard_hsmetrics_2.8.1.md b/docs/picard-tools/picard_hsmetrics_2.8.1.md new file mode 100644 index 00000000..627d3286 --- /dev/null +++ b/docs/picard-tools/picard_hsmetrics_2.8.1.md @@ -0,0 +1,26 @@ +# HSmetrics v2.8.1 + +## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash +> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +> usage: picard_hsmetrics_2.8.1.cwl [-h] +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_1.96.md b/docs/picard-tools/picard_mark_duplicates_1.96.md new file mode 100644 index 00000000..dd13d5ed --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_1.96.md @@ -0,0 +1,20 @@ +# MarkDuplicates v1.96 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_2.21.2.md b/docs/picard-tools/picard_mark_duplicates_2.21.2.md new file mode 100644 index 00000000..f978a4ba --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_2.21.2.md @@ -0,0 +1,77 @@ +# MarkDuplicates v2.21.2 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_2.21.2.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_2.8.1.md b/docs/picard-tools/picard_mark_duplicates_2.8.1.md new file mode 100644 index 00000000..cfb0fc92 --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_2.8.1.md @@ -0,0 +1,20 @@ +# MarkDuplicates v2.8.1 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) | +| R | 3.3.3 | r-base for opnejdk:8 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml +``` + diff --git a/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md new file mode 100644 index 00000000..fe6c11f1 --- /dev/null +++ b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md @@ -0,0 +1,115 @@ +# MarkDuplicates v4.1.8.1 + +## Version of tools in docker image + +| Tool | Version | Location | +| :--- | :--- | :--- | +| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: picard_mark_duplicates_4.1.8.1.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --assume_sort_order ASSUME_SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --read_name_regex READ_NAME_REGEX + MarkDuplicates can use the tile and cluster positions + to estimate the rate of optical duplication in + addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. + By default (with no READ_NAME_REGEX specified), + MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set + READ_NAME_REGEX to 'null' to disable optical duplicate + detection. Note that without optical duplicate counts, + library size estimation will be less accurate. If the + read name does not follow a standard Illumina colon- + separation convention, but does contain tile and x,y + coordinates, a regular expression can be specified to + extract three variables: tile/region, x coordinate and + y coordinate from a read name. The regular expression + must contain three capture groups for the three + variables, in order. It must match the entire read + name. e.g. if field names were separated by semi-colon + (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ + Note that if no READ_NAME_REGEX is specified, the read + name is split on ':'. For 5 element names, the 3rd, + 4th and 5th elements are assumed to be tile, x and y + values. For 7 element names (CASAVA 1.8), the 5th, + 6th, and 7th elements are assumed to be tile, x and y + values. + --sorting_collection_size_ratio SORTING_COLLECTION_SIZE_RATIO + This number, plus the maximum RAM available to the + JVM, determine the memory footprint used by some of + the sorting collections. If you are running out of + memory, try reducing this number. + --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for + writing compressed output + --use_jdk_inflater Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` + diff --git a/docs/postprocessing_variant_calls/README.md b/docs/postprocessing_variant_calls/README.md new file mode 100644 index 00000000..a4cf29de --- /dev/null +++ b/docs/postprocessing_variant_calls/README.md @@ -0,0 +1 @@ +# postprocessing variant calls (pv) diff --git a/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md b/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md new file mode 100644 index 00000000..167be1fb --- /dev/null +++ b/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md @@ -0,0 +1,47 @@ +## CWL pv_vardict_case-control_filter.cwl + +- CWL specification 1.0 +- Use example_inputs_case-control_filter.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner pv_vardict_case-control_filter.cwl example_inputs_case-control_filter.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/pv_vardict_case-control_filter.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/pv_vardict_case-control_filter_toil.log --jobStore /path/to/pv_vardict_case-control_filter_jobStore --batchSystem lsf --workDir /path/to/pv_vardict_case-control_filter_toil_log --outdir . --writeLogs /path/to/pv_vardict_case-control_filter_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/pv_vardict_case-control_filter.cwl /path/to/inputs.yaml > pv_vardict_case-control_filter_toil.stdout 2> pv_vardict_case-control_filter_toil.stderr & +``` + +### Usage: + +``` +Usage: pv vardict case-control filter [OPTIONS] + + This tool helps to filter vardict version 1.4.6 VCFs for case control + calling + +Options: + -i, --inputVcf FILE Input vcf generated by vardict which needs + to be processed [required] + --tsampleName TEXT Name of the tumor Sample [required] + -dp, --totalDepth INTEGER RANGE + Tumor total depth threshold [default: 20; + x>=20] + -ad, --alleledepth INTEGER RANGE + [x>=1] + -tnr, --tnRatio INTEGER Tumor-Normal variant fraction ratio + threshold [default: 1] + -vf, --variantFraction FLOAT Tumor variant fraction threshold [default: + 5e-05] + -mq, --minQual INTEGER Minimum variant call quality [default: 0] + -fg, --filterGermline Whether to remove calls without 'somatic' + status + -o, --outDir TEXT Full Path to the output dir + --help Show this message and exit. +``` + diff --git a/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md b/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md new file mode 100644 index 00000000..163beaf3 --- /dev/null +++ b/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md @@ -0,0 +1,44 @@ +## CWL pv_vardict_single_filter.cwl +- CWL specification 1.0 +- Use example_inputs_single_filter.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner pv_vardict_single_filter.cwl example_inputs_single_filter.yaml +``` +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/pv_vardict_single_filter.cwl /path/to/inputs.yaml +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/pv_vardict_single_filter_toil.log --jobStore /path/to/pv_vardict_single_filter_jobStore --batchSystem lsf --workDir /path/to/pv_vardict_single_filter_toil_log --outdir . --writeLogs /path/to/pv_vardict_single_filter_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/pv_vardict_single_filter_0.7.17.cwl /path/to/inputs.yaml > pv_vardict_single_filter_toil.stdout 2> pv_vardict_single_filter_toil.stderr & +``` + +### Usage + +``` +Usage: pv vardict single filter [OPTIONS] + + This tool helps to filter vardict version 1.4.6 VCFs for single sample + calling + +Options: + -i, --inputVcf FILE Input vcf generated by vardict which needs + to be processed [required] + --tsampleName TEXT Name of the tumor Sample [required] + -dp, --totalDepth INTEGER RANGE + Tumor total depth threshold [default: 20; + x>=20] + -ad, --alleledepth INTEGER RANGE + [x>=1] + -tnr, --tnRatio INTEGER Tumor-Normal variant fraction ratio + threshold [default: 1] + -vf, --variantFraction FLOAT Tumor variant fraction threshold [default: + 5e-05] + -mq, --minQual INTEGER Minimum variant call quality [default: 0] + -fg, --filterGermline Whether to remove calls without 'somatic' + status + -o, --outDir TEXT Full Path to the output dir + --help Show this message and exit. +``` diff --git a/docs/trim-galore/README.md b/docs/trim-galore/README.md new file mode 100644 index 00000000..2f5f0270 --- /dev/null +++ b/docs/trim-galore/README.md @@ -0,0 +1,2 @@ +# Trim Galore + diff --git a/trim_galore_0.6.2/README.md b/docs/trim-galore/trim_galore_0.6.2.md similarity index 83% rename from trim_galore_0.6.2/README.md rename to docs/trim-galore/trim_galore_0.6.2.md index 3727924e..4ef02a4d 100644 --- a/trim_galore_0.6.2/README.md +++ b/docs/trim-galore/trim_galore_0.6.2.md @@ -1,25 +1,25 @@ -# CWL and Dockerfile for running Trim Galore +# v0.6.2 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | -| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ | -| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip | -| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz | +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | +| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) | +| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) | +| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) | ## CWL -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -32,7 +32,7 @@ ### Usage -``` +```text usage: trim_galore_0.6.2.cwl [-h] positional arguments: @@ -92,4 +92,5 @@ optional arguments: --error_rate ERROR_RATE Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: 0.1) -``` \ No newline at end of file +``` + diff --git a/docs/ubuntu-utilites/README.md b/docs/ubuntu-utilites/README.md new file mode 100644 index 00000000..5d69693e --- /dev/null +++ b/docs/ubuntu-utilites/README.md @@ -0,0 +1,2 @@ +# Ubuntu utilites + diff --git a/docs/ubuntu-utilites/utilities_ubuntu_18.04.md b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md new file mode 100644 index 00000000..ba695d67 --- /dev/null +++ b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md @@ -0,0 +1,26 @@ +# v18.04 + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| Ubuntu base image | 18.04 | - | + +## Available tools + +| Tool | Description | +| :--- | :--- | +| sort.cwl | sort lines of text files | +| gzip.cwl | compress or expand files | +| mv.cwl | move \(rename\) files | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs\_toolname.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml +``` + diff --git a/docs/vardictjava/README.md b/docs/vardictjava/README.md new file mode 100644 index 00000000..5cf8e57f --- /dev/null +++ b/docs/vardictjava/README.md @@ -0,0 +1,3 @@ +# VardictJava + + diff --git a/docs/vardictjava/vardictjava_1.8.2.md b/docs/vardictjava/vardictjava_1.8.2.md new file mode 100644 index 00000000..21b4ac81 --- /dev/null +++ b/docs/vardictjava/vardictjava_1.8.2.md @@ -0,0 +1,73 @@ +# Vardict v1.8.2 - Single sample mode +To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here: +https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode + + + +## Version of tools in docker image \(/container/Dockerfile\) + +| Tool | Version | Location | +| :--- | :--- | :--- | +| ubuntu base image (alpine) | 3.8 | - | +| vardict | 1.8.2 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) | +| perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) | +| r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) | + +## CWL + +* CWL specification 1.0 +* Use example\_inputs.yaml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vardict_single_sample.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vardict_1.8.2/vardict_single_sample.cwl /path/to/inputs.yaml + +#Using Toil-cwl-runner +toil-cwl-runner --singularity vardict_single_sample.cwl example_inputs.yaml +``` + +### Usage + +```bash +> toil-cwl-runner vardict_single_sample.cwl --help +usage: vardict_single_sample.cwl [-h] [--bedfile BEDFILE] + [--input_bam_control INPUT_BAM_CONTROL] + --input_bam_case INPUT_BAM_CASE + --reference_fasta REFERENCE_FASTA + [--sample_name SAMPLE_NAME] + [--bed_file_column_for_region_start BED_FILE_COLUMN_FOR_REGION_START] + [--bed_file_column_for_region_end BED_FILE_COLUMN_FOR_REGION_END] + [--bed_file_column_for_gene_name BED_FILE_COLUMN_FOR_GENE_NAME] + [--bed_file_column_for_chromsome BED_FILE_COLUMN_FOR_CHROMSOME] + --control_sample_name CONTROL_SAMPLE_NAME + [--filter_variants] + [--minimum_allele_frequency MINIMUM_ALLELE_FREQUENCY] + --output_vcf OUTPUT_VCF + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --bedfile BEDFILE + --input_bam_control INPUT_BAM_CONTROL + --input_bam_case INPUT_BAM_CASE + --reference_fasta REFERENCE_FASTA + --sample_name SAMPLE_NAME + --bed_file_column_for_region_start BED_FILE_COLUMN_FOR_REGION_START + --bed_file_column_for_region_end BED_FILE_COLUMN_FOR_REGION_END + --bed_file_column_for_gene_name BED_FILE_COLUMN_FOR_GENE_NAME + --bed_file_column_for_chromsome BED_FILE_COLUMN_FOR_CHROMSOME + --control_sample_name CONTROL_SAMPLE_NAME + --filter_variants + --minimum_allele_frequency MINIMUM_ALLELE_FREQUENCY + --output_vcf OUTPUT_VCF +``` diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md new file mode 100644 index 00000000..e349a6cd --- /dev/null +++ b/docs/vcf2maf/README.md @@ -0,0 +1,3 @@ +# VCF2MAF + + diff --git a/docs/vcf2maf/vcf2maf_1.6.21.md b/docs/vcf2maf/vcf2maf_1.6.21.md new file mode 100644 index 00000000..6d8cd35d --- /dev/null +++ b/docs/vcf2maf/vcf2maf_1.6.21.md @@ -0,0 +1,76 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +Usage: + perl vcf2maf.pl --help + perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID + +--input-vcf Path to input file in VCF format +--output-maf Path to output MAF file +--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF] +--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR] +--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL] +--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id] +--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id] +--custom-enst List of custom ENST IDs that override canonical selection +--vep-path Folder containing the vep script [~/miniconda3/bin] +--vep-data VEP's base cache/plugin directory [~/.vep] +--vep-forks Number of forked processes to use when running VEP [4] +--vep-custom String to pass into VEP's --custom option [] +--vep-config Config file to pass into VEP's --config option [] +--vep-overwrite Allow VEP to overwrite output VCF if it exists +--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000] +--any-allele When reporting co-located variants, allow mismatched variant alleles too +--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found +--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events) +--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz] +--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004] +--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens] +--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37] +--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version] +--maf-center Variant calling center to report in MAF [.] +--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF [] +--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] +--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF [] +--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7] +--remap-chain Chain file to remap variants to a different assembly before running VEP +--verbose Print more things to log progress +--help Print a brief help message and quit +--man Print the detailed manual +``` diff --git a/docs/waltz/README.md b/docs/waltz/README.md new file mode 100644 index 00000000..0d43eaff --- /dev/null +++ b/docs/waltz/README.md @@ -0,0 +1,2 @@ +# Waltz + diff --git a/waltz_count_reads_3.1.1/README.md b/docs/waltz/waltz_count_reads_3.1.1.md similarity index 70% rename from waltz_count_reads_3.1.1/README.md rename to docs/waltz/waltz_count_reads_3.1.1.md index 8ad83443..e3131e9f 100644 --- a/waltz_count_reads_3.1.1/README.md +++ b/docs/waltz/waltz_count_reads_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - Count Reads +# CountReads v3.1.1 -## Version of tools in docker image (/container/Dockerfile) +## Version of tools in docker image \(/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -55,3 +56,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/waltz_pileupmatrices_3.1.1/README.md b/docs/waltz/waltz_pileupmatrices_3.1.1.md similarity index 70% rename from waltz_pileupmatrices_3.1.1/README.md rename to docs/waltz/waltz_pileupmatrices_3.1.1.md index b5aed666..cc432392 100644 --- a/waltz_pileupmatrices_3.1.1/README.md +++ b/docs/waltz/waltz_pileupmatrices_3.1.1.md @@ -1,24 +1,25 @@ -# CWL and Dockerfile for running Waltz - PileupMetrics +# PileupMetrics v3.1.1 -## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile) +## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\) -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar | +| Tool | Version | Location | +| :--- | :--- | :--- | +| java base image | 8 | - | +| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) | [![](https://img.shields.io/badge/version-3.1.1-blue)](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1) + ## CWL -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): +* CWL specification 1.0 +* Use example\_inputs.yml to see the inputs to the cwl +* Example Command using [toil](https://toil.readthedocs.io): ```bash > toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml ``` -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** +**If at MSK, using the JUNO cluster you can use the following command** ```bash #Using CWLTOOL @@ -56,3 +57,4 @@ optional arguments: --number_of_threads NUMBER_OF_THREADS --bed_file BED_FILE ``` + diff --git a/expression_tools/README.md b/expression_tools/README.md new file mode 100644 index 00000000..f0c79bf1 --- /dev/null +++ b/expression_tools/README.md @@ -0,0 +1,17 @@ +# CWL Expression tools + +## Available tools + +| Tool | Description | +| -------- | ------------------------ | +| put_in_dir.cwl | put the list of files into the same directory | + +## CWL + +- CWL specification 1.0 +- Use example_inputs_toolname.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner put_in_dir.cwl example_inputs_gzip.yaml +``` diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl new file mode 100644 index 00000000..382b64e5 --- /dev/null +++ b/expression_tools/put_in_dir.cwl @@ -0,0 +1,103 @@ +#!/usr/bin/env cwl-runner +# originally from https://github.com/mskcc/pluto-cwl + +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +class: ExpressionTool +id: put-in-dir + +inputs: + output_directory_name: + type: string + doc: >- + Put all `files` in a directory called `output_directory_name`. + output_subdirectory_name: + type: string? + doc: >- + If specified, nest all `files` within a directory called `output_subdirectory_name`, which itself is within `output_directory_name`. + files: + type: + type: array + items: + - File + - type: array + items: + - File + - Directory + - 'null' + +outputs: + directory: + type: Directory + +# This tool returns a Directory object, +# which holds all output files from the list +# of supplied input files +expression: | + ${ + var output_files = []; + var input_files = inputs.files.filter(function(single_file) { + return String(single_file).toUpperCase() != 'NONE'; + }); + + for (var i = 0; i < input_files.length; i++) { + // Handle list of list of files + if (input_files[i] && input_files[i].length) { + for (var ii = 0; ii < input_files[i].length; ii++) { + output_files.push(input_files[i][ii]); + } + // Handle list of files + } else if (input_files[i]) { + output_files.push(input_files[i]); + } + } + + if (inputs.output_subdirectory_name) { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': [ + { + 'class': 'Directory', + 'basename': inputs.output_subdirectory_name, + 'listing': output_files + } + ] + } + }; + } else { + return { + 'directory': { + 'class': 'Directory', + 'basename': inputs.output_directory_name, + 'listing': output_files + } + }; + } + + } + +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md new file mode 100644 index 00000000..10e9866a --- /dev/null +++ b/fastp_0.20.1/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Fastp + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| fastp | 0.20.1 | quay.io/biocontainers/fastp:0.20.1--h8b12597_0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner ./fastp_0.20.1.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool ./fastp_0.20.1.cwl example_inputs.yaml + +#Using toil-cwl-runner +> mkdir toil_log +> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/fastp-0_20_1/fastp-0_20_1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr & +``` + +### Usage +``` +usage: fastp_0.20.1.cwl [-h] --read1_input READ1_INPUT --read1_output_path + READ1_OUTPUT_PATH [--read2_input READ2_INPUT] + [--read2_output_path READ2_OUTPUT_PATH] + [--unpaired1_path UNPAIRED1_PATH] + [--unpaired2_path UNPAIRED2_PATH] + [--failed_reads_path FAILED_READS_PATH] + [--read1_adapter_sequence READ1_ADAPTER_SEQUENCE] + [--read2_adapter_sequence READ2_ADAPTER_SEQUENCE] + [--minimum_read_length MINIMUM_READ_LENGTH] + --json_output_path JSON_OUTPUT_PATH --html_output_path + HTML_OUTPUT_PATH + [job_order] + +Setup and execute Fastp + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --read1_input READ1_INPUT + read1 input file name + --read1_output_path READ1_OUTPUT_PATH + read1 output file name + --read2_input READ2_INPUT + read2 input file name, for PE data + --read2_output_path READ2_OUTPUT_PATH + read2 output file name + --unpaired1_path UNPAIRED1_PATH + for PE input, if read1 passed QC but read2 not, it + will be written to unpaired1. + --unpaired2_path UNPAIRED2_PATH + for PE input, if read2 passed QC but read1 not, it + will be written to unpaired2. + --failed_reads_path FAILED_READS_PATH + specify the file to store reads that cannot pass the + filters. + --read1_adapter_sequence READ1_ADAPTER_SEQUENCE + the adapter for read1. For SE data, if not specified, + the adapter will be auto-detected. For PE data, this + is used if R1/R2 are found not overlapped. + --read2_adapter_sequence READ2_ADAPTER_SEQUENCE + the adapter for read2. For PE data, this is used if + R1/R2 are found not overlapped. + --minimum_read_length MINIMUM_READ_LENGTH + reads shorter than length_required will be discarded, + default is 15. + --json_output_path JSON_OUTPUT_PATH + the json format report file name + --html_output_path HTML_OUTPUT_PATH + the html format report file name +``` diff --git a/fastp_0.20.1/example_inputs.yaml b/fastp_0.20.1/example_inputs.yaml new file mode 100644 index 00000000..5d3af3bf --- /dev/null +++ b/fastp_0.20.1/example_inputs.yaml @@ -0,0 +1,13 @@ +read1_input: + class: File + path: "./test_data/R1.fq" +read2_input: + class: File + path: "./test_data/R2.fq" +read1_output_path: "./R1.output" +read2_output_path: "./R2.output" +read1_adapter_sequence: "GATCGGAAGAGC" +read2_adapter_sequence: "AGATCGGAAGAGC" +minimum_read_length: 25 +json_output_path: "sample_name.json" +html_output_path: "sample_name.html" diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl new file mode 100644 index 00000000..f6356a6a --- /dev/null +++ b/fastp_0.20.1/fastp_0.20.1.cwl @@ -0,0 +1,225 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fastp_0_20_1 +baseCommand: + - fastp +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + doc: 'worker thread number, default is 2 (int [=2])' + - id: read1_input + type: File + inputBinding: + position: 0 + prefix: '--in1' + doc: | + read1 input file name + - id: read1_output_path + type: string + inputBinding: + position: 0 + prefix: '--out1' + doc: | + read1 output file name + - id: read2_input + type: File? + inputBinding: + position: 0 + prefix: '--in2' + doc: | + read2 input file name, for PE data + - id: read2_output_path + type: string? + inputBinding: + position: 0 + prefix: '--out2' + doc: | + read2 output file name + - id: unpaired1_path + type: string? + inputBinding: + position: 0 + prefix: '--unpaired1' + doc: > + for PE input, if read1 passed QC but read2 not, it will be written to + unpaired1. + - id: unpaired2_path + type: string? + inputBinding: + position: 0 + prefix: '--unpaired2' + doc: > + for PE input, if read2 passed QC but read1 not, it will be written to + unpaired2. + - id: failed_reads_path + type: string? + inputBinding: + position: 0 + prefix: '--failed_out' + doc: | + specify the file to store reads that cannot pass the filters. + - id: read1_adapter_sequence + type: string? + inputBinding: + position: 0 + prefix: '--adapter_sequence' + doc: > + the adapter for read1. For SE data, if not specified, the adapter will be + auto-detected. For PE data, this is used if R1/R2 are found not + overlapped. + - id: read2_adapter_sequence + type: string? + inputBinding: + position: 0 + prefix: '--adapter_sequence_r2' + doc: > + the adapter for read2. For PE data, this is used if R1/R2 are found not + overlapped. + - id: minimum_read_length + type: int? + inputBinding: + position: 0 + prefix: '--length_required' + doc: | + reads shorter than length_required will be discarded, default is 15. + - id: maximum_read_length + type: int? + inputBinding: + position: 0 + prefix: '--length_limit' + doc: > + reads longer than length_limit will be discarded, default 0 means no + limitation. + - id: max_len_read1 + type: int? + inputBinding: + position: 0 + prefix: '--max_len1' + doc: >- + if read1 is longer than max_len1, then trim read1 at its tail to make it + as long as max_len1. Default 0 means no limitation + - id: max_len_read2 + type: int? + inputBinding: + position: 0 + prefix: '--max_len2' + doc: >- + if read2 is longer than max_len2, then trim read2 at its tail to make it + as long as max_len2. Default 0 means no limitation. If it's not specified, + it will follow read1's settings + - default: fastp.json + id: json_output_path + type: string + inputBinding: + position: 0 + prefix: '--json' + doc: | + the json format report file name + - default: fastp.html + id: html_output_path + type: string + inputBinding: + position: 0 + prefix: '--html' + doc: | + the html format report file name + - id: disable_quality_filtering + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_quality_filtering' + doc: >- + quality filtering is enabled by default. If this option is specified, + quality filtering is disabled + - id: disable_trim_poly_g + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_trim_poly_g' + doc: >- + disable polyG tail trimming, by default trimming is automatically enabled + for Illumina NextSeq/NovaSeq data + - id: verbose + type: File? + inputBinding: + position: 0 + prefix: '--verbose' + doc: output verbose log information (i.e. when every 1M reads are processed) +outputs: + - id: fastp_json_output + type: File + outputBinding: + glob: $(inputs.json_output_path) + - id: fastp_html_output + type: File + outputBinding: + glob: $(inputs.html_output_path) + - id: fastp_read1_output + type: File + outputBinding: + glob: $(inputs.read1_output_path) + - id: fastp_read2_output + type: File? + outputBinding: + glob: $(inputs.read2_output_path) + - id: fastp_unpaired1_output + type: File? + outputBinding: + glob: $(inputs.unpaired1_path) + - id: fastp_unpaired2_output + type: File? + outputBinding: + glob: $(inputs.unpaired2_path) +doc: Setup and execute Fastp +label: fastp_0.20.1 +arguments: + - position: 0 + prefix: '--thread' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fastp:0.20.1--h8b12597_0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:fraihaa@mskcc.org' + 'foaf:name': Adrian Fraiha + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fastp + 'doap:revision': 0.20.1 diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..59eaa165 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +error_rate_post_umi: null +error_rate_pre_umi: null +input: /path/to/bam_file +max_reads_per_strand: null +memory_overhead: null +memory_per_job: null +min_input_base_quality: null +min_reads: + - 1 + - 1 + - 0 +number_of_threads: null +output_file_name: null +read_group_id: null +read_name_prefix: null +sort_order: null +trim: null diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..96211f57 --- /dev/null +++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl @@ -0,0 +1,226 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_call_duplex_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: read_name_prefix + type: string? + inputBinding: + position: 2 + prefix: '--read-name-prefix' + doc: The prefix all consensus read names + - id: read_group_id + type: string? + inputBinding: + position: 2 + prefix: '--read-group-id' + doc: The new read group ID for all the consensus reads. + - id: error_rate_pre_umi + type: int? + inputBinding: + position: 2 + prefix: '--error-rate-pre-umi' + doc: >- + The Phred-scaled error rate for an error prior to the UMIs being + integrated. + - id: error_rate_post_umi + type: int? + inputBinding: + position: 2 + prefix: '--error-rate-post-umi' + doc: >- + The Phred-scaled error rate for an error post the UMIs have been + integrated. + - id: min_input_base_quality + type: int? + inputBinding: + position: 2 + prefix: '--min-input-base-quality' + doc: Ignore bases in raw reads that have Q below this value. + - id: trim + type: boolean? + inputBinding: + position: 2 + prefix: '--trim' + doc: 'If true, quality trim input reads in addition to masking low Q bases' + - id: sort_order + type: string? + inputBinding: + position: 2 + prefix: '--sort-order' + doc: 'The sort order of the output, if :none: then the same as the input.' + - id: min_reads + type: 'int[]' + inputBinding: + position: 2 + prefix: '--min-reads' + itemSeparator: ' ' + shellQuote: false + doc: The minimum number of input reads to a consensus read. + - id: max_reads_per_strand + type: int? + inputBinding: + position: 2 + prefix: '--max-reads-per-strand' + doc: >- + The maximum number of reads to use when building a single-strand + consensus. If more than this many reads are present in a tag family, the + family is randomly downsampled to exactly max-reads reads. + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_call_duplex_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } +doc: >- + Calls duplex consensus sequences from reads generated from the same + double-stranded source molecule. Prior to running this tool, read must have + been grouped with GroupReadsByUmi using the paired strategy. Doing so will + apply (by default) MI tags to all reads of the form */A and */B where the /A + and /B suffixes with the same identifier denote reads that are derived from + opposite strands of the same source duplex molecule. + + + Reads from the same unique molecule are first partitioned by source strand and + assembled into single strand consensus molecules as described by + CallMolecularConsensusReads. Subsequently, for molecules that have at least + one observation of each strand, duplex consensus reads are assembled by + combining the evidence from the two single strand consensus reads. + + + Because of the nature of duplex sequencing, this tool does not support + fragment reads - if found in the input they are ignored. Similarly, read pairs + for which consensus reads cannot be generated for one or other read (R1 or R2) + are omitted from the output. + + + Consensus reads have a number of additional optional tags set in the resulting + BAM file. The tag names follow a pattern where the first letter (a, b or c) + denotes that the tag applies to the first single strand consensus (a), second + single-strand consensus (b) or the final duplex consensus (c). The second + letter is intended to capture the meaning of the tag (e.g. d=depth, m=min + depth, e=errors/error-rate) and is upper case for values that are one per read + and lower case for values that are one per base. +label: fgbio_call_duplex_consensus_reads_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx10G" + } + else { + return "-Xmx10G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: CallDuplexConsensusReads + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_cons.bam'); + } + - position: 2 + prefix: '--threads' + valueFrom: |- + ${ + if(inputs.number_of_threads) + return inputs.number_of_threads + return runtime.cores + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 20000 + coresMin: 16 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CallDuplexConsensusReads + 'doap:revision': 1.2.0 diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..38dd911b --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml @@ -0,0 +1,16 @@ +input: + class: File + metadata: {} + path: /path/to/bam +output_prefix: prefix +intervals: + class: File? + metadata: {} + path: /path/to/intervals +description: null +duplex_umi_counts: null +min_ab_reads: null +min_ba_reads: null +number_of_threads: null +umi_tag: null +mi_tag: null diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl new file mode 100644 index 00000000..28697020 --- /dev/null +++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl @@ -0,0 +1,294 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_collect_duplex_seq_metrics_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + doc: Input BAM file generated by GroupReadByUmi. + - id: output_prefix + type: string? + doc: Prefix of output files to write. + - id: intervals + type: File? + inputBinding: + position: 2 + prefix: '--intervals' + doc: 'Optional set of intervals over which to restrict analysis. [Optional].' + - id: description + type: string? + inputBinding: + position: 2 + prefix: '--description' + doc: >- + Description of data set used to label plots. Defaults to sample/library. + [Optional]. + - id: duplex_umi_counts + type: boolean? + inputBinding: + position: 2 + prefix: '--duplex-umi-counts' + doc: >- + If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI + observations. [Optional]. + - id: min_ab_reads + type: int? + inputBinding: + position: 2 + prefix: '--min-ab-reads' + doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].' + - id: min_ba_reads + type: int? + inputBinding: + position: 2 + prefix: '--min-ba-reads' + doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].' + - id: umi_tag + type: string? + inputBinding: + position: 2 + prefix: '--umi-tag' + doc: 'The tag containing the raw UMI. [Optional].' + - id: mi_tag + type: string? + inputBinding: + position: 2 + prefix: '--mi-tag' + doc: 'The output tag for UMI grouping. [Optional].' + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_collect_duplex_seq_metrics_family_size + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.family_sizes.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_family_sizes.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_yield_metrics.txt' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_umi_counts + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.umi_counts.txt' + } + else{ + return inputs.input.basename.replace('.bam','.umi_counts.txt') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_qc + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + '.duplex_qc.pdf' + } + else{ + return inputs.input.basename.replace('.bam','.duplex_qc.pdf') + } + } + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts + type: File? + outputBinding: + glob: |- + ${ + if (inputs.output_prefix) { + return inputs.output_prefix + '.duplex_umi_counts.txt' + } else { + return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt') + } + } +doc: >- + Collects a suite of metrics to QC duplex sequencing data. + + Inputs ------ + + The input to this tool must be a BAM file that is either: + + 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it + was produced in) 2. A BAM file that has MI tags present on all reads (usually + set by 'GroupReadsByUmi' and has been sorted with + 'SortBam' into 'TemplateCoordinate' order. + + Calculation of metrics may be restricted to a set of regions using the + '--intervals' parameter. This can significantly affect results as off-target + reads in duplex sequencing experiments often have very different properties + than on-target reads due to the lack of enrichment. + + Several metrics are calculated related to the fraction of tag families that + have duplex coverage. The definition of "duplex" is controlled by the + '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any + tag family with at least one observation of each strand as a duplex, but this + could be made more stringent, e.g. by setting '--min-ab-reads=3 + --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must + be the higher value. + + Outputs ------- + + The following output files are produced: + + 1. .family_sizes.txt: metrics on the frequency of different types of + families of different sizes 2. .duplex_family_sizes.txt: metrics on + the frequency of duplex tag families by the number of observations + from each strand + 3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, + 10%, 15%...100% of the data 4. .umi_counts.txt: metrics on the + frequency of observations of UMIs within reads and tag families 5. + .duplex_qc.pdf: a series of plots generated from the preceding metrics + files for visualization 6. .duplex_umi_counts.txt: (optional) metrics + on the frequency of observations of duplex UMIs within reads + and tag families. This file is only produced if the '--duplex-umi-counts' option is used as it requires significantly + more memory to track all pairs of UMIs seen when a large number of UMI sequences are present. + + Within the metrics files the prefixes 'CS', 'SS' and 'DS' are used to mean: + + * CS: tag families where membership is defined solely on matching genome + coordinates and strand * SS: single-stranded tag families where membership is + defined by genome coordinates, strand and UMI; ie. 50/A and + 50/B are considered different tag families. + * DS: double-stranded tag families where membership is collapsed across + single-stranded tag families from the same + double-stranded source molecule; i.e. 50/A and 50/B become one family + + Requirements ------------ + + For plots to be generated R must be installed and the ggplot2 package + installed with suggested dependencies. Successfully executing the following in + R will ensure a working installation: + + install.packages("ggplot2", repos="http://cran.us.r-project.org", + dependencies=TRUE) +label: fgbio_collect_duplex_seq_metrics_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: CollectDuplexSeqMetrics + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_prefix){ + return inputs.output_prefix + } + else{ + return inputs.input.basename.replace(/.bam/,'') + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio CollectDuplexSeqMetrics + 'doap:revision': 1.2.0 diff --git a/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..f30727a2 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/example_inputs.yaml @@ -0,0 +1,27 @@ +comment: null +description: null +input: + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R1_001.fastq.gz + - class: File + path: >- + /Users/shahr2/Documents/test_reference/test_fastq_to_bam/fastq/test_R2_001.fastq.gz +library: test +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +platform: Illumina +platform-model: novaseq +platform-unit: . +predicted-insert-size: null +read-group-id: test +read-structures: + - 3M2S+T + - 3M2S+T +run-date: null +sample: test +sequencing-center: mskcc +sort: null +umi-tag: null diff --git a/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl new file mode 100644 index 00000000..638e8449 --- /dev/null +++ b/fgbio_fastq_to_bam_1.2.0/fgbio_fastq_to_bam_1.2.0.cwl @@ -0,0 +1,255 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_fastq_to_bam_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: 'File[]' + inputBinding: + position: 2 + prefix: '--input' + itemSeparator: ' ' + shellQuote: false + label: PathToFastq + doc: 'Fastq files corresponding to each sequencing read (e.g. R1, I1, etc.).' + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: read-structures + type: 'string[]?' + inputBinding: + position: 2 + prefix: '--read-structures' + itemSeparator: ' ' + shellQuote: false + doc: >- + Read structures, one for each of the FASTQs. + https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures + - id: sort + type: boolean? + inputBinding: + position: 2 + prefix: '--sort' + shellQuote: false + doc: 'If true, queryname sort the BAM file, otherwise preserve input order.' + - id: umi-tag + type: string? + inputBinding: + position: 2 + prefix: '--umi-tag' + shellQuote: false + doc: Tag in which to store molecular barcodes/UMIs + - id: read-group-id + type: string? + inputBinding: + position: 2 + prefix: '--read-group-id' + shellQuote: false + doc: Read group ID to use in the file header. + - id: sample + type: string? + inputBinding: + position: 2 + prefix: '--sample' + shellQuote: false + doc: The name of the sequenced sample. + - id: library + type: string? + inputBinding: + position: 2 + prefix: '--library' + shellQuote: false + doc: The name/ID of the sequenced library. + - id: platform + type: string? + inputBinding: + position: 2 + prefix: '--platform' + shellQuote: false + doc: Sequencing Platform + - id: platform-unit + type: string? + inputBinding: + position: 2 + prefix: '--platform-unit' + shellQuote: false + doc: Platform unit (e.g. ‘..') + - id: platform-model + type: string? + inputBinding: + position: 2 + prefix: '--platform-model' + shellQuote: false + doc: >- + Platform model to insert into the group header (ex. miseq, hiseq2500, + hiseqX) + - id: sequencing-center + type: string? + inputBinding: + position: 2 + prefix: '--sequencing-center' + shellQuote: false + doc: The sequencing center from which the data originated + - id: predicted-insert-size + type: int? + inputBinding: + position: 2 + prefix: '--predicted-insert-size' + shellQuote: false + doc: 'Predicted median insert size, to insert into the read group header' + - id: description + type: string? + inputBinding: + position: 2 + prefix: '--description' + doc: Description of the read group. + - id: comment + type: string? + inputBinding: + position: 2 + prefix: '--comment' + doc: Comment(s) to include in the output file’s header + - id: run-date + type: string? + inputBinding: + position: 2 + prefix: '--run-date' + shellQuote: false + doc: 'Date the run was produced, to insert into the read group header' + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_fastq_to_bam_ubam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +doc: >- + Generates an unmapped BAM (or SAM or CRAM) file from fastq files. Takes in one + or more fastq files (optionally gzipped), each representing a different + sequencing read (e.g. R1, R2, I1 or I2) and can use a set of read structures + to allocate bases in those reads to template reads, sample indices, unique + molecular indices, or to designate bases to be skipped over. + + + Read structures are made up of pairs much like the CIGAR + string in BAM files. Four kinds of operators are recognized: + + + 1. T identifies a template read + + 2. B identifies a sample barcode read + + 3. M identifies a unique molecular index read + + 4. S identifies a set of bases that should be skipped or ignored + + + The last pair may be specified using a + sign instead of + number to denote “all remaining bases”. This is useful if, e.g., fastqs have + been trimmed and contain reads of varying length. For example to convert a + paired-end run with an index read and where the first 5 bases of R1 are a UMI + and the second five bases are monotemplate you might specify: +label: fgbio_fastq_to_bam_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: FastqToBam + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input[0].basename.replace(/.fastq.gz/,'_ubam.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio FastqToBam + 'doap:revision': 1.2.0 diff --git a/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..afe8426d --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/example_inputs.yaml @@ -0,0 +1,17 @@ +input: /path/to/bam_file +max_base_error_rate: null +max_no_call_fraction: null +max_read_error_rate: null +memory_overhead: null +memory_per_job: null +min_base_quality: null +min_mean_base_quality: null +min_reads: + - 2 + - 2 + - 1 +number_of_threads: null +output_file_name: test.bam +reference_fasta: /path/to/reference_fasta +require_single_strand_agreement: true +reverse_per_base_tags: null diff --git a/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl new file mode 100644 index 00000000..91687479 --- /dev/null +++ b/fgbio_filter_consensus_reads_1.2.0/fgbio_filter_consensus_reads_1.2.0.cwl @@ -0,0 +1,238 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_filter_consensus_reads_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input SAM or BAM file. + - id: output_file_name + type: string? + doc: Output SAM or BAM file to write consensus reads. + - id: reference_fasta + type: File + inputBinding: + position: 2 + prefix: '--ref' + doc: Reference fasta file. + secondaryFiles: + - .fai + - ^.dict + - id: reverse_per_base_tags + type: boolean? + inputBinding: + position: 2 + prefix: '--reverse-per-base-tags' + doc: 'Reverse [complement] per base tags on reverse strand reads.' + - id: min_reads + type: 'int[]?' + inputBinding: + position: 2 + prefix: '--min-reads' + itemSeparator: ' ' + shellQuote: false + doc: >- + The minimum number of reads supporting a consensus base/read. (Max 3 + values) + - id: max_read_error_rate + type: 'float[]?' + inputBinding: + position: 2 + prefix: '--max-read-error-rate' + itemSeparator: ' ' + doc: >- + The maximum raw-read error rate across the entire consensus read. (Max 3 + values) + - id: max_base_error_rate + type: 'float[]?' + inputBinding: + position: 2 + prefix: '--max-base-error-rate' + itemSeparator: ' ' + doc: The maximum error rate for a single consensus base. (Max 3 values) + - id: min_base_quality + type: int + inputBinding: + position: 2 + prefix: '--min-base-quality' + doc: Mask (make N) consensus bases with quality less than this threshold. + - id: max_no_call_fraction + type: float? + inputBinding: + position: 2 + prefix: '--max-no-call-fraction' + doc: Maximum fraction of no-calls in the read after filtering + - id: min_mean_base_quality + type: int? + inputBinding: + position: 2 + prefix: '--min-mean-base-quality' + doc: The minimum mean base quality across the consensus read + - id: require_single_strand_agreement + type: boolean? + inputBinding: + position: 2 + prefix: '--require-single-strand-agreement' + doc: >- + Mask (make N) consensus bases where the AB and BA consensus reads disagree + (for duplex-sequencing only). + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_filter_consensus_reads_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } + secondaryFiles: + - ^.bai +doc: >- + Filters consensus reads generated by CallMolecularConsensusReads or + CallDuplexConsensusReads. Two kinds of filtering are performed: + + + 1. Masking/filtering of individual bases in reads + + 2. Filtering out of reads (i.e. not writing them to the output file) + + + Base-level filtering/masking is only applied if per-base tags are present (see + CallDuplexConsensusReads and CallMolecularConsensusReads for descriptions of + these tags). Read-level filtering is always applied. When filtering reads, + secondary alignments and supplementary records may be removed independently if + they fail one or more filters; if either R1 or R2 primary alignments fail a + filter then all records for the template will be filtered out. + + + The filters applied are as follows: + + + 1. Reads with fewer than min-reads contributing reads are filtered out + + 2. Reads with an average consensus error rate higher than max-read-error-rate + are filtered out + + 3. Reads with mean base quality of the consensus read, prior to any masking, + less than min-mean-base-quality are filtered out (if specified) + + 4. Bases with quality scores below min-base-quality are masked to Ns + + 5. Bases with fewer than min-reads contributing raw reads are masked to Ns + + 6. Bases with a consensus error rate (defined as the fraction of contributing + reads that voted for a different base than the consensus call) higher than + max-base-error-rate are masked to Ns + + 7. For duplex reads, if require-single-strand-agreement is provided, masks to + Ns any bases where the base was observed in both single-strand consensus reads + and the two reads did not agree + + 8. Reads with a proportion of Ns higher than max-no-call-fraction after + per-base filtering are filtered out +label: fgbio_filter_consensus_reads_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: FilterConsensusReads + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_filtered.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio FilterConsensusReads + 'doap:revision': 1.2.0 diff --git a/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml new file mode 100644 index 00000000..9fe66421 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/example_inputs.yaml @@ -0,0 +1,13 @@ +assign_tag: null +edits: null +family_size_histogram: null +include_non_pf_reads: null +input: /path/to/bam_file +memory_overhead: null +memory_per_job: null +min_map_q: null +min_umi_length: null +number_of_threads: null +output_file_name: null +raw_tag: null +strategy: paired diff --git a/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl new file mode 100644 index 00000000..b30a4078 --- /dev/null +++ b/fgbio_group_reads_by_umi_1.2.0/fgbio_group_reads_by_umi_1.2.0.cwl @@ -0,0 +1,257 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_group_reads_by_umi_1_2_0 +baseCommand: + - fgbio +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 2 + prefix: '--input' + shellQuote: false + doc: The input BAM file. + - id: output_file_name + type: string? + doc: The output SAM or BAM file to be written. + - id: family_size_histogram + type: string? + inputBinding: + position: 2 + prefix: '--family-size-histogram' + doc: Optional output of tag family size counts. + - id: raw_tag + type: string? + inputBinding: + position: 2 + prefix: '--raw-tag' + doc: The tag containing the raw UMI. + - id: assign_tag + type: string? + inputBinding: + position: 2 + prefix: '--assign-tag' + doc: The output tag for UMI grouping. + - id: min_map_q + type: int? + inputBinding: + position: 2 + prefix: '--min-map-q' + doc: Minimum mapping quality. + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 2 + prefix: '--include-non-pf-reads' + - id: strategy + type: string + inputBinding: + position: 2 + prefix: '--strategy' + doc: 'The UMI assignment strategy. (identity,edit,adjacency,paired)' + - id: edits + type: int? + inputBinding: + position: 2 + prefix: '--edits' + doc: The allowable number of edits between UMIs. + - id: min_umi_length + type: int? + inputBinding: + position: 2 + prefix: '--min-umi-length' + doc: >- + The minimum UMI length. If not specified then all UMIs must have the same + length, otherwise discard reads with UMIs shorter than this length and + allow for differing UMI lengths. + - id: temporary_directory + type: string? + doc: 'Default value: null.' + - id: async_io + type: string? + inputBinding: + position: 0 + separate: false + prefix: '--async-io=' + doc: >- + 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].' +outputs: + - id: fgbio_group_reads_by_umi_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } + - id: fgbio_group_reads_by_umi_histogram + type: File? + outputBinding: + glob: |- + ${ + if(inputs.family_size_histogram) + return inputs.family_size_histogram + } +doc: >- + Groups reads together that appear to have come from the same original + molecule. Reads are grouped by template, and then templates are sorted by the + 5’ mapping positions of the reads from the template, used from earliest + mapping position to latest. Reads that have the same end positions are then + sub-grouped by UMI sequence. + + + Accepts reads in any order (including unsorted) and outputs reads sorted by: + + + The lower genome coordinate of the two outer ends of the templates + + The sequencing library + + The assigned UMI tag + + Read Name + + Reads are aggressively filtered out so that only high quality reads/mappings + are taken forward. Single-end reads must have mapping quality >= min-map-q. + Paired-end reads must have both reads mapped to the same chromosome with both + reads having mapping quality >= min-mapq. (Note: the MQ tag is required on + reads with mapped mates). + + + This is done with the expectation that the next step is building consensus + reads, where it is undesirable to either: + + + Assign reads together that are really from different source molecules + + Build two groups from reads that are really from the same molecule + + Errors in mapping reads could lead to both and therefore are minimized. + + + Grouping of UMIs is performed by one of three strategies: + + + 1. identity: only reads with identical UMI sequences are grouped together. + This strategy may be useful for evaluating data, but should generally be + avoided as it will generate multiple UMI groups per original molecule in the + presence of errors. + + 2. edit: reads are clustered into groups such that each read within a group + has at least one other read in the group with <= edits differences and there + are inter-group pairings with <= edits differences. Effective when there are + small numbers of reads per UMI, but breaks down at very high coverage of UMIs. + + 3. adjacency: a version of the directed adjacency method described in + umi_tools that allows for errors between UMIs but only when there is a count + gradient. + + 4. paired: similar to adjacency but for methods that produce template with a + pair of UMIs such that a read with A-B is related to but not identical to a + read with B-A. Expects the pair of UMIs to be stored in a single tag, + separated by a hyphen (e.g. ACGT-CCGG). The molecular IDs produced have more + structure than for single UMI strategies, and are of the form {base}/{AB|BA}. + E.g. two UMI pairs would be mapped as follows AAAA-GGGG -> 1/AB, GGGG-AAAA -> + 1/BA. + + edit, adjacency and paired make use of the --edits parameter to control the + matching of non-identical UMIs. + + + By default, all UMIs must be the same length. If --min-umi-length=len is + specified then reads that have a UMI shorter than len will be discarded, and + when comparing UMIs of different lengths, the first len bases will be + compared, where len is the length of the shortest UMI. The UMI length is the + number of [ACGT] bases in the UMI (i.e. does not count dashes and other + non-ACGT characters). This option is not implemented for reads with UMI pairs + (i.e. using the paired assigner). +label: fgbio_group_reads_by_umi_1.2.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } + else { + return "-Xmx12G" + } + } + - position: 0 + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 1 + valueFrom: GroupReadsByUmi + - position: 0 + prefix: '--tmp-dir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + shellQuote: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.bam/,'_group.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio GroupReadsByUmi + 'doap:revision': 1.2.0 diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml b/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml new file mode 100644 index 00000000..96445c8c --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.8/example_inputs.yaml @@ -0,0 +1,2 @@ +input_bam: /path/to/simplex_duplex_fgbio.bam +output_filename: simplex_filtered.bam \ No newline at end of file diff --git a/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl new file mode 100644 index 00000000..2599e4af --- /dev/null +++ b/fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl @@ -0,0 +1,71 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: fgbio_postprocessing_simplex_filter_0.1.8 +baseCommand: + - simplex_filter +inputs: + - id: input_bam + type: File + inputBinding: + position: 0 + prefix: '--input_bam' + doc: Input file (bam or sam). Required. + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: '--output_filename' + doc: Output file (bam or sam). + - id: min_simplex_reads + type: int? + inputBinding: + position: 0 + prefix: '--min_simplex_reads' + doc: Minimum number of simplex reads to pass filter for consensus reads +outputs: + - id: fgbio_postprocessing_simplex_bam + type: File + outputBinding: + glob: |- + ${ + if (inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input_bam.basename.replace(/.bam$/,'_simplex.bam') + } + } + secondaryFiles: + - ^.bai +label: fgbio_postprocessing_simplex_filter_0.1.8 +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/fgbio_postprocessing:0.2.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': fgbio_postprocessing + 'doap:revision': 0.1.8 diff --git a/gatk_ApplyBQSR_4.1.2.0/README.md b/gatk_ApplyBQSR_4.1.2.0/README.md deleted file mode 100644 index 6dd376f7..00000000 --- a/gatk_ApplyBQSR_4.1.2.0/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Apply BQSR - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl --help - -usage: gatk_ApplyBQSR_4.1.2.0.cwl [-h] --reference REFERENCE - [--create_output_bam_index] - --bqsr_recal_file BQSR_RECAL_FILE --input - INPUT [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--emit_original_quals] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--global_qscore_prior GLOBAL_QSCORE_PRIOR] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantize_quals QUANTIZE_QUALS] [--quiet] - [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--read_validation_stringency READ_VALIDATION_STRINGENCY] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_jdk_deflater] [--use_jdk_inflater] - [--use_original_qualities] - [--memory_overhead MEMORY_OVERHEAD] - [--memory_per_job MEMORY_PER_JOB] - [--number_of_threads NUMBER_OF_THREADS] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --reference REFERENCE - Reference sequence - --create_output_bam_index - --bqsr_recal_file BQSR_RECAL_FILE - Input recalibration table for BQSR. Only run ApplyBQSR - with the covariates table created from the input BAM - --input INPUT A BAM file containing input read data - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --cloud_index_prefetch_buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --emit_original_quals - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --global_qscore_prior GLOBAL_QSCORE_PRIOR - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantize_quals QUANTIZE_QUALS - --quiet - --read_filter READ_FILTER - --read_index READ_INDEX - --read_validation_stringency READ_VALIDATION_STRINGENCY - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_jdk_deflater - --use_jdk_inflater - --use_original_qualities - --memory_overhead MEMORY_OVERHEAD - --memory_per_job MEMORY_PER_JOB - --number_of_threads NUMBER_OF_THREADS \ No newline at end of file diff --git a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl index 5fda71e8..14f9ac57 100644 --- a/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl +++ b/gatk_ApplyBQSR_4.1.2.0/gatk_ApplyBQSR_4.1.2.0.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: gatk_apply_bqsr_4_1_2_0 baseCommand: - gatk @@ -174,10 +175,14 @@ inputs: position: 6 prefix: '--QUIET' - id: read_filter - type: string? + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' inputBinding: position: 6 - prefix: '--read-filter' - id: read_index type: string? inputBinding: @@ -225,7 +230,7 @@ inputs: - id: number_of_threads type: int? outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: |- @@ -264,8 +269,6 @@ requirements: - class: ResourceRequirement ramMin: 10000 coresMin: 8 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement dockerPull: 'broadinstitute/gatk:4.1.2.0' - class: InlineJavascriptRequirement @@ -286,4 +289,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': gatk4 - 'doap:revision': 4.1.2.0 \ No newline at end of file + 'doap:revision': 4.1.2.0 diff --git a/gatk_BaseRecalibrator_4.1.2.0/README.md b/gatk_BaseRecalibrator_4.1.2.0/README.md deleted file mode 100644 index 005acb15..00000000 --- a/gatk_BaseRecalibrator_4.1.2.0/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# CWL and Dockerfile for running GATK4 - Base Recalibrator - -## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk) - -| Tool | Version | Location | -|--- |--- |--- | -| GATK | 4.1.2.0 | https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0 | - -[![](https://img.shields.io/badge/version-4.1.2.0-blue)](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) -## CWL - -- CWL specification 1.0 -- Use example_inputs.yml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml -``` - -**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** - -```bash -#Using CWLTOOL -> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml - -#Using toil-cwl-runner -> mkdir run_directory -> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr & -``` - -## Usage - -```bash -> toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl --help - -usage: gatk_baserecalibrator_4.1.2.0.cwl [-h] --input INPUT --known_sites_1 - KNOWN_SITES_1 --reference REFERENCE - [--output_file_name OUTPUT_FILE_NAME] - [--add_output_sam_program_record] - [--add_output_vcf_command_line] - [--arguments_file ARGUMENTS_FILE] - [--binary_tag_name BINARY_TAG_NAME] - [--bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY] - [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER] - [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER] - [--create_output_bam_index] - [--create_output_bam_md5] - [--create_output_variant_index] - [--create_output_variant_md5] - [--default_base_qualities DEFAULT_BASE_QUALITIES] - [--deletions_default_quality DELETIONS_DEFAULT_QUALITY] - [--disable_bam_index_caching] - [--disable_read_filter DISABLE_READ_FILTER] - [--disable_sequence_dictionary_validation] - [--exclude_intervals EXCLUDE_INTERVALS] - [--gatk_config_file GATK_CONFIG_FILE] - [--gcs_max_retries GCS_MAX_RETRIES] - [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS] - [--indels_context_size INDELS_CONTEXT_SIZE] - [--insertions_default_quality INSERTIONS_DEFAULT_QUALITY] - [--interval_exclusion_padding INTERVAL_EXCLUSION_PADDING] - [--interval_merging_rule INTERVAL_MERGING_RULE] - [--interval_padding INTERVAL_PADDING] - [--interval_set_rule INTERVAL_SET_RULE] - [--intervals INTERVALS] [--lenient] - [--low_quality_tail LOW_QUALITY_TAIL] - [--maximum_cycle_value MAXIMUM_CYCLE_VALUE] - [--mismatches_context_size MISMATCHES_CONTEXT_SIZE] - [--mismatches_default_quality MISMATCHES_DEFAULT_QUALITY] - [--preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN] - [--quantizing_levels QUANTIZING_LEVELS] - [--QUIET] [--read_filter READ_FILTER] - [--read_index READ_INDEX] - [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES] - [--sequence_dictionary SEQUENCE_DICTIONARY] - [--sites_only_vcf_output] - [--use_original_qualities] - [--number_of_threads NUMBER_OF_THREADS] - [--memory_per_job MEMORY_PER_JOB] - [--memory_overhead MEMORY_OVERHEAD] - [--known_sites_2 KNOWN_SITES_2] - [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input INPUT BAM/SAM file containing reads - --known_sites_1 KNOWN_SITES_1 - One or more databases of known polymorphic sites used - to exclude regions around known polymorphisms from - analysis - --reference REFERENCE - Reference sequence file - --output_file_name OUTPUT_FILE_NAME - Output file name. Not Required - --add_output_sam_program_record - --add_output_vcf_command_line - --arguments_file ARGUMENTS_FILE - --binary_tag_name BINARY_TAG_NAME - --bqsr_baq_gap_open_penalty BQSR_BAQ_GAP_OPEN_PENALTY - --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER - --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER - --create_output_bam_index - --create_output_bam_md5 - --create_output_variant_index - --create_output_variant_md5 - --default_base_qualities DEFAULT_BASE_QUALITIES - --deletions_default_quality DELETIONS_DEFAULT_QUALITY - --disable_bam_index_caching - --disable_read_filter DISABLE_READ_FILTER - --disable_sequence_dictionary_validation - --exclude_intervals EXCLUDE_INTERVALS - --gatk_config_file GATK_CONFIG_FILE - --gcs_max_retries GCS_MAX_RETRIES - --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS - --indels_context_size INDELS_CONTEXT_SIZE - --insertions_default_quality INSERTIONS_DEFAULT_QUALITY - --interval_exclusion_padding INTERVAL_EXCLUSION_PADDING - --interval_merging_rule INTERVAL_MERGING_RULE - --interval_padding INTERVAL_PADDING - --interval_set_rule INTERVAL_SET_RULE - --intervals INTERVALS - --lenient - --low_quality_tail LOW_QUALITY_TAIL - --maximum_cycle_value MAXIMUM_CYCLE_VALUE - --mismatches_context_size MISMATCHES_CONTEXT_SIZE - --mismatches_default_quality MISMATCHES_DEFAULT_QUALITY - --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN - --quantizing_levels QUANTIZING_LEVELS - --QUIET - --read_filter READ_FILTER - --read_index READ_INDEX - --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES - --sequence_dictionary SEQUENCE_DICTIONARY - --sites_only_vcf_output - --use_original_qualities - --number_of_threads NUMBER_OF_THREADS - --memory_per_job MEMORY_PER_JOB - --memory_overhead MEMORY_OVERHEAD - --known_sites_2 KNOWN_SITES_2 \ No newline at end of file diff --git a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl index cf12e785..ba59e2d0 100644 --- a/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl +++ b/gatk_BaseRecalibrator_4.1.2.0/gatk_baserecalibrator_4.1.2.0.cwl @@ -266,7 +266,7 @@ inputs: secondaryFiles: - .idx outputs: - - id: output + - id: gatk_base_recalibrator_output type: File outputBinding: glob: |- diff --git a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl index 1753956b..f288b782 100644 --- a/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl +++ b/gatk_apply_bqsr_4.1.0.0/gatk_apply_bqsr_4.1.0.0.cwl @@ -204,7 +204,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_apply_bqsr_bam type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.bam' diff --git a/gatk_apply_bqsr_4.1.8.1/example_inputs.yml b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..094a1bc1 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/example_inputs.yml @@ -0,0 +1,48 @@ +reference: + class: File + path: chr14_chr16.fasta +input: + class: File + path: SeraCare_0-5_14.bam +bqsr_recal_file: + class: File + path: SeraCare_0-5_14.recal.table +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +cloud_index_prefetch_buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +emit_original_quals: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +global_qscore_prior: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +memory_overhead: +memory_per_job: +number_of_threads: +preserve_qscores_less_than: +quantize_quals: +quiet: +read_filter: +read_index: +read_validation_stringency: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_jdk_deflater: +use_jdk_inflater: +use_original_qualities: diff --git a/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl new file mode 100644 index 00000000..efe37787 --- /dev/null +++ b/gatk_apply_bqsr_4.1.8.1/gatk_apply_bqsr_4.1.8.1.cwl @@ -0,0 +1,326 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_apply_bqsr_4_1_8_1 +baseCommand: + - gatk + - ApplyBQSR +inputs: + - id: reference + type: File + inputBinding: + position: 4 + prefix: '--reference' + doc: Reference sequence + secondaryFiles: + - .fai + - ^.dict + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-index' + - id: bqsr_recal_file + type: File + inputBinding: + position: 4 + prefix: '--bqsr-recal-file' + doc: >- + Input recalibration table for BQSR. Only run ApplyBQSR with the covariates + table created from the input BAM + - id: input + type: File + inputBinding: + position: 4 + prefix: '--input' + doc: A BAM file containing input read data + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 6 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: File? + inputBinding: + position: 6 + prefix: '--arguments_file' + - id: cloud_index_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 6 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 6 + prefix: '--create-output-variant-md5' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' + inputBinding: + position: 6 + doc: Read filters to be disabled before analysis + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 6 + prefix: '--disable-sequence-dictionary-validation' + - default: true + id: emit_original_quals + type: boolean? + inputBinding: + position: 6 + prefix: '--emit-original-quals' + - id: exclude_intervals + type: string? + inputBinding: + position: 6 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 6 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 6 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 6 + prefix: '--gcs-project-for-requester-pays' + - id: global_qscore_prior + type: float? + inputBinding: + position: 6 + prefix: '--global-qscore-prior' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-merging-rule' + - id: interval_padding + type: int? + inputBinding: + position: 6 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 6 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 6 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 6 + prefix: '--lenient' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 6 + prefix: '--preserve-qscores-less-than' + - id: quantize_quals + type: int? + inputBinding: + position: 6 + prefix: '--quantize-quals' + - id: quiet + type: boolean? + inputBinding: + position: 6 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 6 + - id: read_index + type: string? + inputBinding: + position: 6 + prefix: '--read-index' + - id: read_validation_stringency + type: string? + inputBinding: + position: 6 + prefix: '--read-validation-stringency' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 6 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 6 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 6 + prefix: '--sites-only-vcf-output' + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-deflater' + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 6 + prefix: '--use-jdk-inflater' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 6 + prefix: '--use-original-qualities' + - id: memory_overhead + type: int? + - id: memory_per_job + type: int? + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_apply_bqsr_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } + secondaryFiles: + - ^.bai +label: gatk_apply_bqsr_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.1 diff --git a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl index d923ebe4..fac760ef 100644 --- a/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl +++ b/gatk_base_recalibrator_4.1.0.0/gatk_base_recalibrator_4.1.0.0.cwl @@ -234,7 +234,7 @@ inputs: position: 0 prefix: '--use-original-qualities' outputs: - - id: output + - id: gatk_base_recalibrator_output type: File? outputBinding: glob: '$(inputs.input.basename.replace(''.bam'', '''')).recal.table' diff --git a/gatk_base_recalibrator_4.1.8.1/example_inputs.yml b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..01b02cbb --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/example_inputs.yml @@ -0,0 +1,57 @@ +input: + class: File + metadata: {} + path: input.bam +reference: + class: File + metadata: {} + path: ref.fasta +known_sites: + - class: File + path: dbsnp_137_14_16.b37.vcf + - class: File + path: Mills_and_1000G_gold_standard-14_16.indels.b37.vcf +QUIET: +add_output_sam_program_record: +add_output_vcf_command_line: +arguments_file: +binary_tag_name: +bqsr_baq_gap_open_penalty: +cloud-index-prefetch-buffer: +cloud_prefetch_buffer: +create_output_bam_index: +create_output_bam_md5: +create_output_variant_index: +create_output_variant_md5: +default_base_qualities: +deletions_default_quality: +disable_bam_index_caching: +disable_read_filter: +disable_sequence_dictionary_validation: +exclude_intervals: +gatk_config_file: +gcs_max_retries: +gcs_project_for_requester_pays: +indels_context_size: +insertions_default_quality: +interval_exclusion_padding: +interval_merging_rule: +interval_padding: +interval_set_rule: +intervals: +lenient: +low_quality_tail: +maximum_cycle_value: +memory_overhead: +memory_per_job: +mismatches_context_size: +mismatches_default_quality: +number_of_threads: +preserve_qscores_less_than: +quantizing_levels: +read_filter: +read_index: +seconds_between_progress_updates: +sequence_dictionary: +sites_only_vcf_output: +use_original_qualities: diff --git a/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl new file mode 100644 index 00000000..a38cddb6 --- /dev/null +++ b/gatk_base_recalibrator_4.1.8.1/gatk_base_recalibrator_4.1.8.1.cwl @@ -0,0 +1,352 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_base_recalibrator_4_1_8_1 +baseCommand: + - gatk + - BaseRecalibrator +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--input' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: known_sites + type: + type: array + items: File + inputBinding: + prefix: '--known-sites' + inputBinding: + position: 3 + doc: >- + One or more databases of known polymorphic sites used to exclude regions + around known polymorphisms from analysis + secondaryFiles: + - .idx + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--reference' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: add_output_sam_program_record + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-sam-program-record' + - id: add_output_vcf_command_line + type: boolean? + inputBinding: + position: 10 + prefix: '--add-output-vcf-command-line' + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: binary_tag_name + type: string? + inputBinding: + position: 10 + prefix: '--binary-tag-name' + - id: bqsr_baq_gap_open_penalty + type: float? + inputBinding: + position: 10 + prefix: '--bqsr-baq-gap-open-penalty' + - id: cloud-index-prefetch-buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-index-prefetch-buffer' + - id: cloud_prefetch_buffer + type: int? + inputBinding: + position: 10 + prefix: '--cloud-prefetch-buffer' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-index' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-bam-md5' + - id: create_output_variant_index + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-index' + - id: create_output_variant_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--create-output-variant-md5' + - id: default_base_qualities + type: int? + inputBinding: + position: 10 + prefix: '--default-base-qualities' + - id: deletions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--deletions-default-quality' + - id: disable_bam_index_caching + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-bam-index-caching' + - id: disable_read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--disable-read-filter' + inputBinding: + position: 10 + doc: Read filters to be disabled before analysis + - id: disable_sequence_dictionary_validation + type: boolean? + inputBinding: + position: 10 + prefix: '--disable-sequence-dictionary-validation' + - id: exclude_intervals + type: string? + inputBinding: + position: 10 + prefix: '--exclude-intervals' + - id: gatk_config_file + type: File? + inputBinding: + position: 10 + prefix: '--gatk-config-file' + - id: gcs_max_retries + type: int? + inputBinding: + position: 10 + prefix: '--gcs-max-retries' + - id: gcs_project_for_requester_pays + type: string? + inputBinding: + position: 10 + prefix: '--gcs-project-for-requester-pays' + - id: indels_context_size + type: int? + inputBinding: + position: 10 + prefix: '--indels-context-size' + - id: insertions_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--insertions-default-quality' + - id: interval_exclusion_padding + type: int? + inputBinding: + position: 10 + prefix: '--interval-exclusion-padding' + - id: interval_merging_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-merging-rule' + - id: interval_padding + type: int? + inputBinding: + position: 10 + prefix: '--interval-padding' + - id: interval_set_rule + type: string? + inputBinding: + position: 10 + prefix: '--interval-set-rule' + - id: intervals + type: string? + inputBinding: + position: 10 + prefix: '--intervals' + - id: lenient + type: boolean? + inputBinding: + position: 10 + prefix: '--lenient' + - id: low_quality_tail + type: int? + inputBinding: + position: 10 + prefix: '--low-quality-tail' + - id: maximum_cycle_value + type: int? + inputBinding: + position: 10 + prefix: '--maximum-cycle-value' + - id: mismatches_context_size + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-context-size' + - id: mismatches_default_quality + type: int? + inputBinding: + position: 10 + prefix: '--mismatches-default-quality' + - id: preserve_qscores_less_than + type: int? + inputBinding: + position: 10 + prefix: '--preserve-qscores-less-than' + - id: quantizing_levels + type: int? + inputBinding: + position: 10 + prefix: '--quantizing-levels' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: read_filter + type: + - 'null' + - type: array + items: string + inputBinding: + prefix: '--read-filter' + inputBinding: + position: 10 + - id: read_index + type: string? + inputBinding: + position: 10 + prefix: '--read-index' + - id: seconds_between_progress_updates + type: float? + inputBinding: + position: 10 + prefix: '--seconds-between-progress-updates' + - id: sequence_dictionary + type: File? + inputBinding: + position: 10 + prefix: '--sequence-dictionary' + - id: sites_only_vcf_output + type: boolean? + inputBinding: + position: 10 + prefix: '--sites-only-vcf-output' + - id: use_original_qualities + type: boolean? + inputBinding: + position: 10 + prefix: '--use-original-qualities' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_base_recalibrator_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } +label: gatk_base_recalibrator_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--tmp-dir' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--output' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_bqsr.table') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..d1c1901d --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/README.md @@ -0,0 +1,99 @@ +# CWL for running GATK - CollectAlignmentSummaryMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_alignment_summary_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME [--reference REFERENCE] + [--adaptor_sequence ADAPTOR_SEQUENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS] + [--is_bisulfite_sequenced] [--max_insert_size MAX_INSERT_SIZE] + [--validation_stringency VALIDATION_STRINGENCY] [--assume_sorted] + [--stop_after STOP_AFTER] [--create_index] [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --adaptor_sequence ADAPTOR_SEQUENCE + List of adapter sequences to use when processing the + alignment metrics. This argument may be specified 0 or + more times. Default value: [AATGATACGGCGACCACCGAGATCTA + CACTCTTTCCCTACACGACGCTCTTCCGATCT, + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --expected_pair_orientations EXPECTED_PAIR_ORIENTATIONS + Paired-end reads that do not have this expected + orientation will be considered chimeric. This argument + may be specified 0 or more times. Default value: [FR]. + Possible values: {FR, RF, TANDEM} + --is_bisulfite_sequenced + Whether the SAM or BAM file consists of bisulfite + sequenced reads. Default value: false. Possible + values: {true, false} + --max_insert_size MAX_INSERT_SIZE + Paired-end reads above this insert size will be + considered chimeric along with inter-chromosomal + pairs. Default value: 100000. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..665cef4b --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/example_inputs.yaml @@ -0,0 +1,28 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +input: + class: File? + metadata: {} + path: "/path/to/fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +output_file_name: "alignment_summary_metrics.txt" +adaptor_sequence: null +metrics_acciumulation_level: null +expected_pair_orientations: null +is_bisulfite_sequenced: false +max_insert_size: null +validation_stringency: null +assume_sorted: null +stop_after: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..13a024dd --- /dev/null +++ b/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl @@ -0,0 +1,240 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_alignment_summary_metrics_4.1.8.0 +baseCommand: + - gatk + - CollectAlignmentSummaryMetrics +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: File to write the output to. Required. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: adaptor_sequence + type: string? + inputBinding: + position: 0 + prefix: '--ADAPTER_SEQUENCE' + doc: >- + List of adapter sequences to use when processing the alignment metrics. + This argument may be specified 0 or more times. Default value: + [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, + AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, + AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: expected_pair_orientations + type: string? + inputBinding: + position: 0 + prefix: '--EXPECTED_PAIR_ORIENTATIONS' + doc: >- + Paired-end reads that do not have this expected orientation will be + considered chimeric. This argument may be specified 0 or more times. + Default value: [FR]. Possible values: {FR, RF, TANDEM} + - id: is_bisulfite_sequenced + type: boolean? + inputBinding: + position: 0 + prefix: '--IS_BISULFITE_SEQUENCED' + doc: >- + Whether the SAM or BAM file consists of bisulfite sequenced reads. + Default value: false. Possible values: {true, false} + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: '--MAX_INSERT_SIZE' + doc: >- + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: '--ASSUME_SORTED' + doc: >- + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: '--STOP_AFTER' + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_alignment_summary_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if (inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } +label: GATK-CollectAlignmentSummaryMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_hs_metrics_4.1.8.0/README.md b/gatk_collect_hs_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..2f0ab1b8 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/README.md @@ -0,0 +1,136 @@ +# CWL for running GATK - CollectHsMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_hs_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl + [-h] --input INPUT --bait_intervals BAIT_INTERVALS --target_intervals + TARGET_INTERVALS --output_file_name OUTPUT_FILE_NAME + [--per_base_coverage PER_BASE_COVERAGE] + [--per_target_coverage PER_TARGET_COVERAGE] + [--theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT] + [--allele_fraction ALLELE_FRACTION] [--bait_set_name BAIT_SET_NAME] + [--clip_overlapping_reads] [--coverage_cap COVERAGE_CAP] + [--include_indels] [--minimum_base_quality MINIMUM_BASE_QUALITY] + [--minimum_mapping_quality MINIMUM_MAPPING_QUALITY] + [--near_distance NEAR_DISTANCE] [--sample_size SAMPLE_SIZE] + [--reference REFERENCE] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--validation_stringency VALIDATION_STRINGENCY] [--create_index] + [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --bait_intervals BAIT_INTERVALS + An interval list file that contains the locations of + the baits used. This argument must be specified at + least once. Required. + --target_intervals TARGET_INTERVALS + An interval list file that contains the locations of + the targets. This argument must be specified at least + once. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. Required. + --per_base_coverage PER_BASE_COVERAGE + An optional file to output per base coverage + information to. The per-base file contains one line + per target base and can grow very large. It is not + recommended for use with large target sets. Default + value: null. + --per_target_coverage PER_TARGET_COVERAGE + An optional file to output per target coverage + information to. Default value: null. + --theoretical_sensitivity_output THEORETICAL_SENSITIVITY_OUTPUT + Output for Theoretical Sensitivity metrics where the + allele fractions are provided by the ALLELE_FRACTION + argument. Default value: null. + --allele_fraction ALLELE_FRACTION + Allele fraction for which to calculate theoretical + sensitivity. This argument may be specified 0 or more + times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, + 0.1, 0.2, 0.3, 0.5]. + --bait_set_name BAIT_SET_NAME + Bait set name. If not provided it is inferred from the + filename of the bait intervals. Default value: null. + --clip_overlapping_reads + True if we are to clip overlapping reads, false + otherwise. Default value: true. Possible values: + {true, false} + --coverage_cap COVERAGE_CAP + Parameter to set a max coverage limit for Theoretical + Sensitivity calculations. Default is 200. Default + value: 200. + --include_indels If true count inserted bases as on target and deleted + bases as covered by a read. Default value: false. + Possible values: {true, false} + --minimum_base_quality MINIMUM_BASE_QUALITY + Minimum base quality for a base to contribute + coverage. Default value: 20. + --minimum_mapping_quality MINIMUM_MAPPING_QUALITY + Minimum mapping quality for a read to contribute + coverage. Default value: 20. + --near_distance NEAR_DISTANCE + The maximum distance between a read and the nearest + probe/bait/amplicon for the read to be considered + 'near probe' and included in percent selected. Default + value: 250. + --sample_size SAMPLE_SIZE + Sample Size used for Theoretical Het Sensitivity + sampling. Default is 10000. Default value: 10000. + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS +``` diff --git a/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..44a42fc1 --- /dev/null +++ b/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl @@ -0,0 +1,318 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_hs_metrics_4_1_8_0 +baseCommand: + - gatk + - CollectHsMetrics +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: '--BAIT_INTERVALS' + doc: >- + An interval list file that contains the locations of the baits used. This + argument must be specified at least once. Required. + - id: target_intervals + type: File + inputBinding: + position: 0 + prefix: '--TARGET_INTERVALS' + doc: >- + An interval list file that contains the locations of the targets. This + argument must be specified at least once. Required. + - id: output_file_name + type: string? + doc: The output file to write the metrics to. Required. + - id: per_base_coverage + type: string? + doc: >- + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. + - id: per_target_coverage + type: string? + doc: >- + An optional file to output per target coverage information to. Default + value: null. + - id: theoretical_sensitivity_output + type: string? + inputBinding: + position: 0 + prefix: '--THEORETICAL_SENSITIVITY_OUTPUT' + doc: >- + Output for Theoretical Sensitivity metrics where the allele fractions are + provided by the ALLELE_FRACTION argument. Default value: null. + - id: allele_fraction + type: float? + inputBinding: + position: 0 + prefix: '--ALLELE_FRACTION' + doc: >- + Allele fraction for which to calculate theoretical sensitivity. This + argument may be specified 0 or more times. Default value: [0.001, 0.005, + 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]. + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: '--BAIT_SET_NAME' + doc: >- + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null. + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_OVERLAPPING_READS' + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: + true. Possible values: {true, false} + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: '--COVERAGE_CAP' + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. + - id: include_indels + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_INDELS' + doc: >- + If true count inserted bases as on target and deleted bases as covered by + a read. Default value: false. Possible values: {true, false} + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: '--MINIMUM_BASE_QUALITY' + doc: >- + Minimum base quality for a base to contribute coverage. Default value: + 20. + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: '--MINIMUM_MAPPING_QUALITY' + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: + 20. + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: '--NEAR_DISTANCE' + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: '--SAMPLE_SIZE' + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_hs_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - id: gatk_collect_hs_metrics_per_base_coverage_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } + - id: gatk_collect_hs_metrics_per_target_coverage_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } +label: GATK-CollectHsMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt') + } + } + - position: 0 + prefix: '--PER_TARGET_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_target_coverage){ + return inputs.per_target_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt') + } + } + - position: 0 + prefix: '--PER_BASE_COVERAGE' + valueFrom: |- + ${ + if(inputs.per_base_coverage){ + return inputs.per_base_coverage + } else { + return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/README.md b/gatk_collect_insert_size_metrics_4.1.8.0/README.md new file mode 100644 index 00000000..b0ea21bf --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/README.md @@ -0,0 +1,105 @@ +# CWL for running GATK - CollectInsertSizeMetrics + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_collect_insert_size_metrics_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: ./GitHub/cwl-commandlinetools/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + --output_file_name OUTPUT_FILE_NAME --histogram_file HISTOGRAM_FILE + [--deviations DEVIATIONS] [--histogram_width HISTOGRAM_WIDTH] + [--minimum_pct MINIMUM_PCT] + [--metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL] + [--include_duplicates] [--validation_stringency VALIDATION_STRINGENCY] + [--assume_sorted] [--stop_after STOP_AFTER] [--create_index] + [--create_md5_file] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + File to write the output to. Required. + --histogram_file HISTOGRAM_FILE + File to write insert size Histogram chart to. + Required. + --deviations DEVIATIONS + Generate mean, sd and plots by trimming the data down + to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This + is done because insert size data typically includes + enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading + regarding the real distribution. Default value: 10.0. + This option can be set to 'null' to clear the default + value. + --histogram_width HISTOGRAM_WIDTH + Explicitly sets the Histogram width, overriding + automatic truncation of Histogram tail. Also, when + calculating mean and standard deviation, only bins <= + Histogram_WIDTH will be included. Default value: null. + --minimum_pct MINIMUM_PCT + When generating the Histogram, discard any data + categories (out of FR, TANDEM, RF) that have fewer + than this percentage of overall reads. (Range: 0 to + 1). Default value: 0.05. This option can be set to + 'null' to clear the default value. + --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL + The level(s) at which to accumulate metrics. Default + value: [ALL_READS]. This option can be set to 'null' + to clear the default value. Possible values: + {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option + may be specified 0 or more times. This option can be + set to 'null' to clear the default list. + --include_duplicates If true, also include reads marked as duplicates in + the insert size histogram. Default value: false. This + option can be set to 'null' to clear the default + value. Possible values: {true, false} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --assume_sorted If true (default), then the sort order in the header + file will be ignored. Default value: true. This option + can be set to 'null' to clear the default value. + Possible values: {true, false} + --stop_after STOP_AFTER + Stop after processing N reads, mainly for debugging. + Default value: 0. This option can be set to 'null' to + clear the default value. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} +``` diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml b/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..59a104cd --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/example_inputs.yaml @@ -0,0 +1,22 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +output_file_name: "insert_size_metrics.txt" +histogram_file: "histogram.pdf" +deviations: 10.0 +histogram_width: 800 +minimum_pct: 0 +metrics_acciumulation_level: null +include_duplicates: null +validation_stringency: null +assume_sorted: null +stop_after: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl new file mode 100644 index 00000000..bc83b149 --- /dev/null +++ b/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl @@ -0,0 +1,252 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_collect_insert_size_metrics_4_1_8_0 +baseCommand: + - gatk + - CollectInsertSizeMetrics +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: File to write the output to. Required. + - id: histogram_file + type: string? + doc: File to write insert size Histogram chart to. Required. + - id: deviations + type: float? + inputBinding: + position: 0 + prefix: '--DEVIATIONS' + doc: >- + Generate mean, sd and plots by trimming the data down to MEDIAN + + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size + data typically includes enough anomalous values from chimeras and other + artifacts to make the mean and sd grossly misleading regarding the real + distribution. Default value: 10.0. This option can be set to 'null' to + clear the default value. + - id: histogram_width + type: int? + inputBinding: + position: 0 + prefix: '--HISTOGRAM_WIDTH' + doc: >- + Explicitly sets the Histogram width, overriding automatic truncation of + Histogram tail. Also, when calculating mean and standard deviation, only + bins <= Histogram_WIDTH will be included. Default value: null. + - id: minimum_pct + type: float? + inputBinding: + position: 0 + prefix: '--MINIMUM_PCT' + doc: >- + When generating the Histogram, discard any data categories (out of FR, + TANDEM, RF) that have fewer than this percentage of overall reads. (Range: + 0 to 1). Default value: 0.05. This option can be set to 'null' to clear + the default value. + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: '--METRIC_ACCUMULATION_LEVEL' + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: include_duplicates + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_DUPLICATES' + doc: >- + If true, also include reads marked as duplicates in the insert size + histogram. Default value: false. This option can be set to 'null' to + clear the default value. Possible values: {true, false} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: '--ASSUME_SORTED' + doc: >- + If true (default), then the sort order in the header file will be + ignored. Default value: true. This option can be set to 'null' to clear + the default value. Possible values: {true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: '--STOP_AFTER' + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_collect_insert_size_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - id: gatk_collect_insert_size_metrics_histogram_pdf + type: File + outputBinding: + glob: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } +label: GATK-CollectInsertSizeMetrics +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt') + } + } + - position: 2 + prefix: '-H' + valueFrom: |- + ${ + if(inputs.histogram_file){ + return inputs.histogram_file + } else { + return inputs.input.basename.replace(/.bam/, '_histogram.pdf') + } + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_downsamplesam_4.1.8.1/example_inputs.yml b/gatk_downsamplesam_4.1.8.1/example_inputs.yml new file mode 100644 index 00000000..8ea893b7 --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/example_inputs.yml @@ -0,0 +1,21 @@ +QUIET: null +arguments_file: null +create_output_bam_index: null +create_output_bam_md5: null +input: + class: File + path: input.bam +lenient: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +output_file_name: null +output_file_name_metrics: null +probability: null +random_seed: null +reference: + class: File + path: reference.fasta +strategy: null +temporary_directory: null + diff --git a/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl new file mode 100644 index 00000000..b4078135 --- /dev/null +++ b/gatk_downsamplesam_4.1.8.1/gatk_downsamplesam_4.1.8.1.cwl @@ -0,0 +1,201 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_downsamplesam_4_1_8_1 +baseCommand: + - gatk + - DownsampleSam +inputs: + - id: input + type: File + inputBinding: + position: 3 + prefix: '--INPUT' + doc: BAM/SAM file containing reads + secondaryFiles: + - ^.bai + - id: reference + type: File + inputBinding: + position: 3 + prefix: '--REFERENCE_SEQUENCE' + doc: Reference sequence file + secondaryFiles: + - .fai + - ^.dict + - id: output_file_name + type: string? + doc: Output file name. Not Required + - id: output_metrics_file_name + type: string? + doc: Output file name for metrics file. Not Required + - id: probability + type: float? + inputBinding: + position: 4 + prefix: '--PROBABILITY' + doc: 'The probability of keeping any individual read, between 0 and 1.' + - id: random_seed + type: int? + inputBinding: + position: 4 + prefix: '--RANDOM_SEED' + doc: >- + Random seed used for deterministic results. Setting to null will cause + multiple invocations to produce different results. + - id: strategy + type: string? + inputBinding: + position: 4 + prefix: '--STRATEGY' + doc: >- + The --STRATEGY argument is an enumerated type (Strategy), which can have + one of the following values: + + + HighAccuracy + + ConstantMemory + + Chained + + default Strategy ConstantMemory + - id: arguments_file + type: + - 'null' + - type: array + items: File + inputBinding: + position: 0 + prefix: '--arguments_file' + - id: create_output_bam_index + type: boolean? + inputBinding: + position: 10 + prefix: '--CREATE_INDEX' + - id: create_output_bam_md5 + type: boolean? + inputBinding: + position: 10 + prefix: '--CREATE_MD5_FILE' + - id: QUIET + type: boolean? + inputBinding: + position: 10 + prefix: '--QUIET' + - id: "validation_stringency" + type: string? + inputBinding: + position: 6 + prefix: '--VALIDATION_STRINGENCY' + - id: number_of_threads + type: int? + - id: memory_per_job + type: int? + - id: memory_overhead + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null.' +outputs: + - id: gatk_downsamplesam_output_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + secondaryFiles: + - ^.bai + - id: gatk_downsamplesam_output_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +label: gatk_downsample_sam_4.1.8.1 +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0){ + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx12G" + } else { + return "-Xmx12G" + } + } + - position: 2 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '--OUTPUT' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.bam') + } + } + - position: 2 + prefix: '--METRICS_FILE' + valueFrom: |- + ${ + if(inputs.output_metrics_file_name){ + return inputs.output_metrics_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_downsamplesam.metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 20000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl index 636dc04a..f474b88d 100644 --- a/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl +++ b/gatk_mark_duplicates_4.1.0.0/gatk_mark_duplicates_4.1.0.0.cwl @@ -137,14 +137,14 @@ inputs: position: 0 prefix: '--TAGGING_POLICY' outputs: - - id: output_md_bam + - id: gatk_mark_duplicates_bam doc: Output marked duplicate bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''md.bam'', ''bam''))' secondaryFiles: - ^.bai - - id: output_md_metrics + - id: gatk_mark_duplicates_metrics doc: Output marked duplicate metrics type: File outputBinding: diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..11eb7e0d --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/example_inputs.yaml @@ -0,0 +1,18 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" +output_file_name: null +chart_output: null +validation_stringency: null +assume_sorted: null +pf_reads_only: null +reference: + class: File + metadata: {} + path: ref.fasta +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl new file mode 100644 index 00000000..14044a27 --- /dev/null +++ b/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl @@ -0,0 +1,197 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_mean_quality_by_cycle_4_1_8_0 +baseCommand: + - gatk + - MeanQualityByCycle +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output_file_name + type: string? + doc: The output file to write the metrics to. + - id: chart_output + type: string? + doc: A file (with .pdf extension) to write the chart to. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: '--ASSUME_SORTED' + doc: | + If true (default), then the sort order in the header file will be ignored. + - id: pf_reads_only + type: boolean? + inputBinding: + position: 1 + prefix: '--PF_READS_ONLY' + doc: | + If set to true calculate mean quality over PF reads only. Default value: false. Possible values: {true, false} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: >- + Directory with space available to be used by this program for temporary storage of working files. +outputs: + - id: gatk_mean_quality_by_cycle_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - id: gatk_mean_quality_by_cycle_chart_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } +label: GATK-MeanQualityByCycle +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx14G" + } + else { + return "-Xmx14G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) { + return inputs.temporary_directory; + } + return runtime.tmpdir; + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt') + } + } + - position: 0 + prefix: '--CHART_OUTPUT' + valueFrom: |- + ${ + if(inputs.chart_output){ + return inputs.chart_output + } else { + return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf') + } + } + +requirements: + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_mean_quality_by_cycle/README.md b/gatk_mean_quality_by_cycle/README.md new file mode 100644 index 00000000..53b29701 --- /dev/null +++ b/gatk_mean_quality_by_cycle/README.md @@ -0,0 +1,76 @@ +# CWL for running GATK - MeanQualityByCycle + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_mean_quality_by_cycle_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_mean_quality_by_cycle_4.1.8.0.cwl + [-h] --input INPUT [--output_file_name OUTPUT_FILE_NAME] + [--chart_output CHART_OUTPUT] [--assume_sorted] [--pf_reads_only] + [--reference REFERENCE] [--validation_stringency VALIDATION_STRINGENCY] + [--create_index] [--create_md5_file] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output_file_name OUTPUT_FILE_NAME + The output file to write the metrics to. + --chart_output CHART_OUTPUT + A file (with .pdf extension) to write the chart to. + --assume_sorted If true (default), then the sort order in the header + file will be ignored. + --pf_reads_only If set to true calculate mean quality over PF reads + only. Default value: false. Possible values: {true, + false} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..004d24ec --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/example_inputs.yaml @@ -0,0 +1,44 @@ +unmapped_bam: + class: File + path: "/path/to/bam" +reference: + class: File + path: "/path/to/reference.fasta" +aligned_bam: + - class: File + path: "/path/to/bam" +output_file_name: null +add_mate_cigar: null +add_pg_tag_to_reads: null +aligned_reads_only: null +include_duplicates: null +aligner_proper_pair_flags: null +attributes_to_remove: null +attributes_to_retain: null +attributes_to_reverse: null +attributes_to_reverse_complement: null +clip_adapters: null +clip_overlapping_reads: null +expected_orientations: null +hard_clip_overlapping_reads: null +include_secondary_alignments: null +is_bisulfite_sequence: null +jump_size: null +matching_dictionary_tags: null +max_insertions_or_deletions: null +min_unclipped_bases: null +paired_run: null +primary_alignment_strategy: null +read1_aligned_bam: null +read1_trim: null +read2_aligned_bam: null +read2_trim: null +sort_order: null +unmap_contaminant_reads: null +unmapped_read_strategy: null +validation_stringency: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl new file mode 100644 index 00000000..1dd7658b --- /dev/null +++ b/gatk_merge_bam_alignment_4.1.8.0/gatk_merge_bam_alignment_4.1.8.0.cwl @@ -0,0 +1,557 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_bam_alignment_4_1_8_0 +baseCommand: + - gatk + - MergeBamAlignment +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: unmapped_bam + type: File + inputBinding: + position: 0 + prefix: '--UNMAPPED_BAM' + doc: > + Original SAM or BAM file of unmapped reads, which must be in queryname + order. Reads MUST + + be unmapped. Required. + - id: reference + type: File + inputBinding: + position: 0 + prefix: '--REFERENCE_SEQUENCE' + doc: | + Reference sequence file. Required. + secondaryFiles: + - ^.dict + - id: output_file_name + type: string? + doc: | + Merged SAM or BAM file to write to. Required. + - id: add_mate_cigar + type: boolean? + inputBinding: + position: 0 + prefix: '--ADD_MATE_CIGAR' + doc: > + Adds the mate CIGAR tag (MC) if true, does not if false. Default value: + true. Possible + + values: {true, false} + - id: add_pg_tag_to_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--ADD_PG_TAG_TO_READS' + doc: > + Add PG tag to each read in a SAM or BAM Default value: true. Possible + values: {true, + + false} + - id: aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data. This argument may be specified 0 + or more times. + + Default value: null. Cannot be used in conjunction with argument(s) + READ1_ALIGNED_BAM + + (R1_ALIGNED) READ2_ALIGNED_BAM (R2_ALIGNED) + - id: aligned_reads_only + type: boolean? + inputBinding: + position: 0 + prefix: '--ALIGNED_READS_ONLY' + doc: > + Whether to output only aligned reads. Default value: false. Possible + values: {true, + + false} + - id: aligner_proper_pair_flags + type: boolean? + inputBinding: + position: 0 + prefix: '--ALIGNER_PROPER_PAIR_FLAGS' + doc: > + Use the aligners idea of what a proper pair is rather than computing in + this program. + + Default value: false. Possible values: {true, false} + - id: attributes_to_remove + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REMOVE' + doc: > + Attributes from the alignment record that should be removed when merging. + This overrides + + ATTRIBUTES_TO_RETAIN if they share common tags. This argument may be + specified 0 or more + + times. Default value: null. + - id: attributes_to_retain + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_RETAIN' + doc: > + Reserved alignment attributes (tags starting with X, Y, or Z) that should + be brought over + + from the alignment data when merging. This argument may be specified 0 or + more times. + + Default value: null. + - id: attributes_to_reverse + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REVERSE' + doc: > + Attributes on negative strand reads that need to be reversed. This + argument may be + + specified 0 or more times. Default value: [OQ, U2]. + - id: attributes_to_reverse_complement + type: string? + inputBinding: + position: 0 + prefix: '--ATTRIBUTES_TO_REVERSE_COMPLEMENT' + doc: > + Attributes on negative strand reads that need to be reverse complemented. + This argument + + may be specified 0 or more times. Default value: [E2, SQ]. + - id: clip_adapters + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_ADAPTERS' + doc: > + Whether to clip adapters where identified. Default value: true. Possible + values: {true, + + false} + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--CLIP_OVERLAPPING_READS' + doc: > + For paired reads, clip the 3' end of each read if necessary so that it + does not extend + + past the 5' end of its mate. Clipping will be either soft or hard + clipping, depending on + + CLIP_OVERLAPPING_READS_OPERATOR setting. Hard clipped bases and their + qualities will be + + stored in the XB and XQ tags respectively. Default value: true. Possible + values: {true, + + false} + - id: expected_orientations + type: string? + inputBinding: + position: 0 + prefix: '--EXPECTED_ORIENTATIONS' + doc: > + The expected orientation of proper read pairs. Replaces JUMP_SIZE This + argument may be + + specified 0 or more times. Default value: null. Possible values: {FR, RF, + TANDEM} Cannot + + be used in conjunction with argument(s) JUMP_SIZE (JUMP) + - id: hard_clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--HARD_CLIP_OVERLAPPING_READS' + doc: > + If true, hard clipping will be applied to overlapping reads. By default, + soft clipping is + + used. Default value: false. Possible values: {true, false} + - id: include_secondary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_SECONDARY_ALIGNMENTS' + doc: > + If false, do not write secondary alignments to output. Default value: + true. Possible + + values: {true, false} + - id: is_bisulfite_sequence + type: boolean? + inputBinding: + position: 0 + prefix: '--IS_BISULFITE_SEQUENCE' + doc: > + Whether the lane is bisulfite sequence (used when calculating the NM + tag). Default value: + + false. Possible values: {true, false} + - id: jump_size + type: int? + inputBinding: + position: 0 + prefix: '--JUMP_SIZE' + doc: > + The expected jump size (required if this is a jumping library). + Deprecated. Use + + EXPECTED_ORIENTATIONS instead Default value: null. Cannot be used in + conjunction with + + argument(s) EXPECTED_ORIENTATIONS (ORIENTATIONS) + - id: matching_dictionary_tags + type: string? + inputBinding: + position: 0 + prefix: '--MATCHING_DICTIONARY_TAGS' + doc: > + List of Sequence Records tags that must be equal (if present) in the + reference dictionary + + and in the aligned file. Mismatching tags will cause an error if in this + list, and a + + warning otherwise. This argument may be specified 0 or more times. + Default value: [M5, + + LN]. + - id: max_insertions_or_deletions + type: int? + inputBinding: + position: 0 + prefix: '--MAX_INSERTIONS_OR_DELETIONS' + doc: > + The maximum number of insertions or deletions permitted for an alignment + to be included. + + Alignments with more than this many insertions or deletions will be + ignored. Set to -1 to + + allow any number of insertions or deletions. Default value: 1. + - id: min_unclipped_bases + type: int? + inputBinding: + position: 0 + prefix: '--MIN_UNCLIPPED_BASES' + doc: > + If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or + else the read will + + be marked as contaminant. Default value: 32. + - id: paired_run + type: boolean? + inputBinding: + position: 0 + prefix: '--PAIRED_RUN' + doc: > + DEPRECATED. This argument is ignored and will be removed. Default value: + true. Possible + + values: {true, false} + - id: primary_alignment_strategy + type: string? + inputBinding: + position: 0 + prefix: '--PRIMARY_ALIGNMENT_STRATEGY' + doc: > + Strategy for selecting primary alignment when the aligner has provided + more than one + + alignment for a pair or fragment, and none are marked as primary, more + than one is marked + + as primary, or the primary alignment is filtered out for some reason. For + all strategies, + + ties are resolved arbitrarily. Default value: BestMapq. BestMapq (Expects + that multiple + + alignments will be correlated with HI tag, and prefers the pair of + alignments with the + + largest MAPQ, in the absence of a primary selected by the aligner.) + + EarliestFragment (Prefers the alignment which maps the earliest base in + the read. Note + + that EarliestFragment may not be used for paired reads.) + + BestEndMapq (Appropriate for cases in which the aligner is not pair-aware, + and does not + + output the HI tag. It simply picks the alignment for each end with the + highest MAPQ, and + + makes those alignments primary, regardless of whether the two alignments + make sense + + together.) + + MostDistant (Appropriate for a non-pair-aware aligner. Picks the alignment + pair with the + + largest insert size. If all alignments would be chimeric, it picks the + alignments for each + + end with the best MAPQ.) + - id: read1_aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--READ1_ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data from the first read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + + argument(s) ALIGNED_BAM (ALIGNED) + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ1_TRIM' + doc: > + The number of bases trimmed from the beginning of read 1 prior to + alignment Default + + value: 0. + - id: read2_aligned_bam + type: + - 'null' + - type: array + items: File + inputBinding: + prefix: '--READ2_ALIGNED_BAM' + inputBinding: + position: 1 + doc: > + SAM or BAM file(s) with alignment data from the second read of a pair. + This argument may + + be specified 0 or more times. Default value: null. Cannot be used in + conjunction with + + argument(s) ALIGNED_BAM (ALIGNED) + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ2_TRIM' + doc: > + The number of bases trimmed from the beginning of read 2 prior to + alignment Default + + value: 0. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: > + The order in which the merged reads should be output. Default value: + coordinate. Possible + + values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: unmap_contaminant_reads + type: boolean? + inputBinding: + position: 1 + prefix: '--UNMAP_CONTAMINANT_READS' + doc: > + Detect reads originating from foreign organisms (e.g. bacterial DNA in a + non-bacterial + + sample),and unmap + label those reads accordingly. Default value: false. + Possible values: + + {true, false} + - id: unmapped_read_strategy + type: string? + inputBinding: + position: 1 + prefix: '--UNMAPPED_READ_STRATEGY' + doc: > + How to deal with alignment information in reads that are being unmapped + (e.g. due to + + cross-species contamination.) Currently ignored unless + UNMAP_CONTAMINANT_READS = true. + + Note that the DO_NOT_CHANGE strategy will actually reset the cigar and set + the mapping + + quality on unmapped reads since otherwisethe result will be an invalid + record. To force no + + change use the DO_NOT_CHANGE_INVALID strategy. Default value: + DO_NOT_CHANGE. Possible + + values: {COPY_TO_TAG, DO_NOT_CHANGE, DO_NOT_CHANGE_INVALID, MOVE_TO_TAG} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_merge_bam_alignment_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') + } + } + secondaryFiles: + - ^.bai +label: GATK-MergeBamAlignment +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 1 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.unmapped_bam.basename.replace(/.bam|.sam/, '_merged.bam') + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..7fcf6550 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/example_inputs.yaml @@ -0,0 +1,21 @@ +input: + - class: File + path: "/path/to/bam" + - class: File + path: "/path/to/bam" +output_file_name: null +assume_sorted: null +comment: null +create_index: null +create_md5_file: null +intervals: null +merge_sequence_dictionaries: null +reference_sequence: + class: File + path: "/path/to/reference.fasta" +sort_order: null +validation_stringency: null +verbosity: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl new file mode 100644 index 00000000..c38ae9d9 --- /dev/null +++ b/gatk_merge_sam_files_4.1.8.0/gatk_merge_sam_files_4.1.8.0.cwl @@ -0,0 +1,261 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_merge_sam_files_4_1_8_0 +baseCommand: + - gatk + - MergeSamFiles +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: + type: array + items: File + inputBinding: + prefix: '-I' + inputBinding: + position: 1 + doc: > + SAM or BAM input file This argument must be specified at least once. + Required. + - id: output_file_name + type: string? + doc: SAM or BAM file to write merged result to Required. + - id: assume_sorted + type: boolean? + inputBinding: + position: 1 + prefix: '--ASSUME_SORTED' + doc: > + If true, assume that the input files are in the same sort order as the + requested output + + sort order, even if their headers say otherwise. Default value: false. + Possible values: + + {true, false} + - id: comment + type: string? + inputBinding: + position: 1 + prefix: '--COMMENT' + doc: > + Comment(s) to include in the merged output files header. This argument + may be specified + + 0 or more times. Default value: null. + - id: create_index + type: boolean? + inputBinding: + position: 1 + prefix: '--CREATE_INDEX' + doc: > + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: + + false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 1 + prefix: '--CREATE_MD5_FILE' + doc: > + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: + + false. Possible values: {true, false} + - id: intervals + type: File? + inputBinding: + position: 1 + prefix: '--INTERVALS' + doc: > + An interval list file that contains the locations of the positions to + merge. Assume bam + + are sorted and indexed. The resulting file will contain alignments that + may overlap with + + genomic regions outside the requested region. Unmapped reads are + discarded. Default + + value: null. + - id: merge_sequence_dictionaries + type: boolean? + inputBinding: + position: 1 + prefix: '--MERGE_SEQUENCE_DICTIONARIES' + doc: > + Merge the sequence dictionaries Default value: false. Possible values: + {true, false} + - id: reference_sequence + type: File? + inputBinding: + position: 1 + prefix: '--REFERENCE_SEQUENCE' + doc: | + Reference sequence file. Default value: null. + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: > + Sort order of output file Default value: coordinate. Possible values: + {unsorted, + + queryname, coordinate, duplicate, unknown} + - id: use_threading + type: boolean? + inputBinding: + position: 1 + prefix: '--USE_THREADING' + doc: > + Option to create a background thread to encode, compress and write to disk + the output + + file. The threaded version uses about 20% more CPU and decreases runtime + by ~20% when + + writing out a compressed BAM file. Default value: false. Possible values: + {true, false} + - id: validation_stringency + type: string? + inputBinding: + position: 1 + prefix: '--VALIDATION_STRINGENCY' + doc: > + Validation stringency for all SAM files read by this program. Setting + stringency to + + SILENT can improve performance when processing a BAM file in which + variable-length data + + (read, qualities, tags) do not otherwise need to be decoded. Default + value: STRICT. + + Possible values: {STRICT, LENIENT, SILENT} + - id: verbosity + type: string? + inputBinding: + position: 1 + prefix: '--VERBOSITY' + doc: > + Control verbosity of logging. Default value: INFO. Possible values: + {ERROR, WARNING, + + INFO, DEBUG} + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_merge_sam_files_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +label: GATK-MergeSamFiles +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 2 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return 'merged.bam' + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_revert_sam/4.1.8.0/example_inputs.yaml b/gatk_revert_sam/4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..b195dfa8 --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/example_inputs.yaml @@ -0,0 +1,26 @@ +input: + class: File + path: "/path" +output: null +output_map: null +attribute_to_clear: null +max_discard_fraction: null +library_name: null +max_records_in_ram: null +output_by_readgroup: null +output_by_readgroup_file_format: null +remove_alignment_information: 'false' +remove_duplicate_information: 'true' +restore_hardclips: 'false' +restore_original_qualities: 'false' +sample_alias: null +sanitize: null +sort_order: 'unsorted' +reference_sequence: null +validation_stringency: 'SILENT' +compression_level: null +create_index: null +create_md5_file: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl new file mode 100644 index 00000000..bde0f2de --- /dev/null +++ b/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl @@ -0,0 +1,310 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_revert_sam_4_1_8_0 +baseCommand: + - gatk + - RevertSam +inputs: + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: An aligned SAM or BAM file. Required. + - id: output + type: string? + doc: >- + The output SAM/BAM file to create, or an output directory if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT_MAP (OM) + - id: output_map + type: string? + doc: >- + Tab separated file with two columns, READ_GROUP_ID and OUTPUT, providing file mapping only used if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT (O) + - id: attribute_to_clear + type: + - "null" + - type: array + items: string + inputBinding: + position: 0 + prefix: '--ATTRIBUTE_TO_CLEAR' + doc: >- + When removing alignment information, the set of optional tags to remove. This may be specified 0 or more times. Default value: [NM, UQ, PG, MD, MQ, SA, MC, AS]. + - id: max_discard_fraction + type: float? + inputBinding: + position: 0 + prefix: '--MAX_DISCARD_FRACTION' + doc: >- + If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to + sanitization thenthe program will exit with an Exception instead of exiting cleanly. + Output BAM will still be valid. Default value: 0.01. + - id: library_name + type: string? + inputBinding: + position: 0 + prefix: '--LIBRARY_NAME' + doc: >- + The library name to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same library name. Default value: null. + - id: max_records_in_ram + type: int? + inputBinding: + position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + doc: >- + When writing files that need to be sorted, this will specify the number of records stored + in RAM before spilling to disk. Increasing this number reduces the number of file handles + needed to sort the file, and increases the amount of RAM needed. Default value: 500000. + - id: output_by_readgroup + type: string? + default: 'false' + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP' + doc: >- + When true, outputs each read group in a separate file. Default value: false. Possible values: {true, false} + - id: output_by_readgroup_file_format + type: string? + inputBinding: + position: 0 + prefix: '--OUTPUT_BY_READGROUP_FILE_FORMAT' + doc: >- + When using OUTPUT_BY_READGROUP, the output file format can be set to a certain format. + Default value: dynamic. sam (Generate SAM files.) + bam (Generate BAM files.) + cram (Generate CRAM files.) + dynamic (Generate files based on the extention of INPUT.) + - id: remove_alignment_information + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--REMOVE_ALIGNMENT_INFORMATION' + doc: >- + Remove all alignment information from the file. Default value: true. Possible values: {true, false} + - id: remove_duplicate_information + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--REMOVE_DUPLICATE_INFORMATION' + doc: | + Remove duplicate read flags from all reads. Note that if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may have the unusual but sometimes + desirable trait of having unmapped reads that are marked as duplicates. Default value: + true. Possible values: {true, false} + - id: restore_hardclips + type: string? + default: 'true' + inputBinding: + position: 0 + prefix: '--RESTORE_HARDCLIPS' + doc: >- + When true, restores reads and qualities of records with hard-clips containing XB and XQ tags. Default value: true. Possible values: {true, false} + - id: restore_original_qualities + type: string? + default: 'true' + inputBinding: + position: 1 + prefix: '--RESTORE_ORIGINAL_QUALITIES' + doc: | + True to restore original qualities from the OQ field to the QUAL field if available. Default value: true. Possible values: {true, false} + - id: sample_alias + type: string? + inputBinding: + position: 1 + prefix: '--SAMPLE_ALIAS' + doc: | + The sample alias to use in the reverted output file. This will override the existing + sample alias in the file and is used only if all the read groups in the input file have + the same sample alias. Default value: null. + - id: sanitize + type: string? + default: 'false' + inputBinding: + position: 1 + prefix: '--SANITIZE' + doc: | + WARNING: This option is potentially destructive. If enabled will discard reads in order to + produce a consistent output BAM. Reads discarded include (but are not limited to) paired + reads with missing mates, duplicated records, records with mismatches in length of bases + and qualities. This option can only be enabled if the output sort order is queryname and + will always cause sorting to occur. Default value: false. Possible values: {true, false} + - id: sort_order + type: string? + inputBinding: + position: 1 + prefix: '--SORT_ORDER' + doc: | + The sort order to create the reverted output file with. Default value: queryname. Possible values: {unsorted, queryname, coordinate, duplicate, unknown} + - id: reference + type: File? + inputBinding: + position: 0 + prefix: '-R' + doc: >- + Reference sequence file. Note that while this argument is not required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.fasta.fai + - ^.dict + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false} + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_revert_sam_output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output){ + return inputs.output + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam') + } + } + - id: gatk_revert_sam_output_map + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_map){ + return inputs.output_map + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.tsv') + } + } +label: GATK-RevertSam +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output){ + return inputs.output; + } else if (inputs.output_map) { + return null; + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam'); + } + } + - position: 0 + prefix: '-OM' + valueFrom: |- + ${ + if(inputs.output_map){ + return inputs.output_map; + } else { + return null; + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/gatk_revert_sam/README.md b/gatk_revert_sam/README.md new file mode 100644 index 00000000..90ea2bfa --- /dev/null +++ b/gatk_revert_sam/README.md @@ -0,0 +1,164 @@ +# CWL for running GATK - RevertSam + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_revert_sam_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_revert_sam_4.1.8.0.cwl [-h] --input INPUT [--output OUTPUT] + [--output_map OUTPUT_MAP] + [--attribute_to_clear ATTRIBUTE_TO_CLEAR] + [--max_discard_fraction MAX_DISCARD_FRACTION] + [--library_name LIBRARY_NAME] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_by_readgroup OUTPUT_BY_READGROUP] + [--output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT] + [--remove_alignment_information REMOVE_ALIGNMENT_INFORMATION] + [--remove_duplicate_information REMOVE_DUPLICATE_INFORMATION] + [--restore_hardclips RESTORE_HARDCLIPS] + [--restore_original_qualities RESTORE_ORIGINAL_QUALITIES] + [--sample_alias SAMPLE_ALIAS] + [--sanitize SANITIZE] + [--sort_order SORT_ORDER] + [--reference REFERENCE] + [--validation_stringency VALIDATION_STRINGENCY] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--create_md5_file] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --input INPUT An aligned SAM or BAM file. Required. + --output OUTPUT The output SAM/BAM file to create, or an output + directory if OUTPUT_BY_READGROUP is true. Required. + Cannot be used in conjunction with argument(s) + OUTPUT_MAP (OM) + --output_map OUTPUT_MAP + Tab separated file with two columns, READ_GROUP_ID and + OUTPUT, providing file mapping only used if + OUTPUT_BY_READGROUP is true. Required. Cannot be used + in conjunction with argument(s) OUTPUT (O) + --attribute_to_clear ATTRIBUTE_TO_CLEAR + When removing alignment information, the set of + optional tags to remove. This may be specified 0 or + more times. Default value: [NM, UQ, PG, MD, MQ, SA, + MC, AS]. + --max_discard_fraction MAX_DISCARD_FRACTION + If SANITIZE=true and higher than MAX_DISCARD_FRACTION + reads are discarded due to sanitization thenthe + program will exit with an Exception instead of exiting + cleanly. Output BAM will still be valid. Default + value: 0.01. + --library_name LIBRARY_NAME + The library name to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same library name. Default value: + null. + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_by_readgroup OUTPUT_BY_READGROUP + When true, outputs each read group in a separate file. + Default value: false. Possible values: {true, false} + --output_by_readgroup_file_format OUTPUT_BY_READGROUP_FILE_FORMAT + When using OUTPUT_BY_READGROUP, the output file format + can be set to a certain format. Default value: + dynamic. sam (Generate SAM files.) bam (Generate BAM + files.) cram (Generate CRAM files.) dynamic (Generate + files based on the extention of INPUT.) + --remove_alignment_information REMOVE_ALIGNMENT_INFORMATION + Remove all alignment information from the file. + Default value: true. Possible values: {true, false} + --remove_duplicate_information REMOVE_DUPLICATE_INFORMATION + Remove duplicate read flags from all reads. Note that + if this is false and + REMOVE_ALIGNMENT_INFORMATION==true, the output may + have the unusual but sometimes desirable trait of + having unmapped reads that are marked as duplicates. + Default value: true. Possible values: {true, false} + --restore_hardclips RESTORE_HARDCLIPS + When true, restores reads and qualities of records + with hard-clips containing XB and XQ tags. Default + value: true. Possible values: {true, false} + --restore_original_qualities RESTORE_ORIGINAL_QUALITIES + True to restore original qualities from the OQ field + to the QUAL field if available. Default value: true. + Possible values: {true, false} + --sample_alias SAMPLE_ALIAS + The sample alias to use in the reverted output file. + This will override the existing sample alias in the + file and is used only if all the read groups in the + input file have the same sample alias. Default value: + null. + --sanitize SANITIZE WARNING: This option is potentially destructive. If + enabled will discard reads in order to produce a + consistent output BAM. Reads discarded include (but + are not limited to) paired reads with missing mates, + duplicated records, records with mismatches in length + of bases and qualities. This option can only be + enabled if the output sort order is queryname and will + always cause sorting to occur. Default value: false. + Possible values: {true, false} + --sort_order SORT_ORDER + The sort order to create the reverted output file + with. Default value: queryname. Possible values: + {unsorted, queryname, coordinate, duplicate, unknown} + --reference REFERENCE + Reference sequence file. Note that while this argument + is not required, without it only a small subset of the + metrics will be calculated. Note also that if a + reference sequence is provided, it must be accompanied + by a sequence dictionary. Default value: null. + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false} + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/gatk_sam_to_fastq_4.1.8.0/README.md b/gatk_sam_to_fastq_4.1.8.0/README.md new file mode 100644 index 00000000..491eac17 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/README.md @@ -0,0 +1,175 @@ +# CWL for running GATK - SamToFastq + +## Version of tools in docker image + +| Tool | Version | Location | +|--- |--- |--- | +| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner gatk_sam_to_fastq_4.1.8.0.cwl example_inputs.yaml +``` + +### Usage + +```bash +usage: gatk_sam_to_fastq_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --input INPUT [--fastq FASTQ] + [--clipping_action CLIPPING_ACTION] + [--clipping_attribute CLIPPING_ATTRIBUTE] + [--clipping_min_length CLIPPING_MIN_LENGTH] + [--compress_outputs_per_rg] + [--compression_level COMPRESSION_LEVEL] + [--create_index] [--include_non_pf_reads] + [--include_non_primary_alignments] + [--interleave] + [--max_records_in_ram MAX_RECORDS_IN_RAM] + [--output_dir OUTPUT_DIR] + [--create_md5_file] [--output_per_rg] + [--quality QUALITY] [--re_reverse] + [--read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE] + [--read1_trim READ1_TRIM] + [--read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE] + [--read2_trim READ2_TRIM] + [--reference_sequence REFERENCE_SEQUENCE] + [--rg_tag RG_TAG] + [--second_end_fastq SECOND_END_FASTQ] + [--unpaired_fastq UNPAIRED_FASTQ] + [--validation_stringency VALIDATION_STRINGENCY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input SAM/BAM file to extract reads from Required. + --fastq FASTQ Output FASTQ file (single-end fastq or, if paired, + first end of the pair FASTQ). Required. Cannot be used + in conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) OUTPUT_DIR (ODIR) + --clipping_action CLIPPING_ACTION + The action that should be taken with clipped reads: + 'X' means the reads and qualities should be trimmed at + the clipped position; 'N' means the bases should be + changed to Ns in the clipped region; and any integer + means that the base qualities should be set to that + value in the clipped region. Default value: null. + --clipping_attribute CLIPPING_ATTRIBUTE + The attribute that stores the position at which the + SAM record should be clipped Default value: null. + --clipping_min_length CLIPPING_MIN_LENGTH + When performing clipping with the CLIPPING_ATTRIBUTE + and CLIPPING_ACTION parameters, ensure that the + resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read + is shorter than CLIPPING_MIN_LENGTH then the original + read length will be maintained. Default value: 0. + --compress_outputs_per_rg + Compress output FASTQ files per read group using gzip + and append a .gz extension to the file names. Default + value: false. Possible values: {true, false} Cannot be + used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --compression_level COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and VCF). Default value: 2. + --create_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value: false. + Possible values: {true, false} + --include_non_pf_reads + Include non-PF reads from the SAM file into the output + FASTQ files. PF means 'passes filtering'. Reads whose + 'not passing quality controls' flag is set are non-PF + reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} + --include_non_primary_alignments + If true, include non-primary alignments in the output. + Support of non-primary alignments in SamToFastq is not + comprehensive, so there may be exceptions if this is + set to true and there are paired reads with non- + primary alignments. Default value: false. Possible + values: {true, false} + --interleave Will generate an interleaved fastq if paired, each + line will have /1 or /2 to describe which end it came + from Default value: false. Possible values: {true, + false} + --max_records_in_ram MAX_RECORDS_IN_RAM + When writing files that need to be sorted, this will + specify the number of records stored in RAM before + spilling to disk. Increasing this number reduces the + number of file handles needed to sort the file, and + increases the amount of RAM needed. Default value: + 500000. + --output_dir OUTPUT_DIR + Directory in which to output the FASTQ file(s). Used + only when OUTPUT_PER_RG is true. Default value: null. + Cannot be used in conjunction with argument(s) FASTQ + (F). + --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ + files created. Default value: false. Possible values: + {true, false}. + --output_per_rg Output a FASTQ file per read group (two FASTQ files + per read group if the group is paired). Default value: + false. Possible values: {true, false} Cannot be used + in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + --quality QUALITY End-trim reads using the phred/bwa quality trimming + algorithm and this quality. Default value: null. + --re_reverse Re-reverse bases and qualities of reads with negative + strand flag set before writing them to FASTQ Default + value: true. Possible values: {true, false} + --read1_max_bases_to_write READ1_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 1 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read1_trim READ1_TRIM + The number of bases to trim from the beginning of read + 1. Default value: 0. + --read2_max_bases_to_write READ2_MAX_BASES_TO_WRITE + The maximum number of bases to write from read 2 after + trimming. If there are fewer than this many bases left + after trimming, all will be written. If this value is + null then all bases left after trimming will be + written. Default value: null. + --read2_trim READ2_TRIM + The number of bases to trim from the beginning of read + 2. Default value: 0. + --reference_sequence REFERENCE_SEQUENCE + Reference sequence file. Default value: null. + --rg_tag RG_TAG The read group tag (PU or ID) to be used to output a + FASTQ file per read group. Default value: PU. + --second_end_fastq SECOND_END_FASTQ + Output FASTQ file (if paired, second end of the pair + FASTQ). Default value: null. Cannot be used in + conjunction with argument(s) OUTPUT_PER_RG (OPRG) + COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --unpaired_fastq UNPAIRED_FASTQ + Output FASTQ file for unpaired reads; may only be + provided in paired-FASTQ mode Default value: null. + Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + Possible values: {STRICT, LENIENT, SILENT} +``` diff --git a/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml new file mode 100644 index 00000000..aa1c7e46 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/example_inputs.yaml @@ -0,0 +1,32 @@ +input: + class: File + metadata: {} + path: "/path/to/bam" +fastq: null +clipping_action: null +clipping_attribute: null +clipping_min_length: null +compress_outputs_per_rg: null +compression_level: null +create_index: null +include_non_pf_reads: null +include_non_primary_alignments: null +interleave: null +max_records_in_ram: null +output_dir: null +create_md5_file: null +output_per_rg: null +quality: null +re_reverse: null +read1_max_bases_to_write: null +read1_trim: null +read2_max_bases_to_write: null +read2_trim: null +reference_sequence: null +rg_tag: null +second_end_fastq: null +unpaired_fastq: null +validation_stringency: null +memory_overhead: null +memory_per_job: null +number_of_threads: null diff --git a/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl new file mode 100644 index 00000000..6516f711 --- /dev/null +++ b/gatk_sam_to_fastq_4.1.8.0/gatk_sam_to_fastq_4.1.8.0.cwl @@ -0,0 +1,360 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: gatk_sam_to_fastq_4_1_8_0 +baseCommand: + - gatk + - SamToFastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: fastq + type: string? + doc: >- + Output FASTQ file (single-end fastq or, if paired, first end of the pair + FASTQ) + - id: input + type: File + inputBinding: + position: 0 + prefix: '--INPUT' + doc: Input SAM/BAM file to extract reads from Required. + - id: clipping_action + type: string? + inputBinding: + position: 0 + prefix: '--CLIPPING_ACTION' + doc: >- + The action that should be taken with clipped reads: 'X' means the reads + and qualities should be trimmed at the clipped position; 'N' means the + bases should be changed to Ns in the clipped region; and any integer means + that the base qualities should be set to that value in the clipped + region. Default value: null. + - id: clipping_attribute + type: string? + inputBinding: + position: 0 + prefix: '--CLIPPING_ATTRIBUTE' + doc: >- + The attribute that stores the position at which the SAM record should be + clipped Default value: null. + - id: clipping_min_length + type: int? + inputBinding: + position: 0 + prefix: '--CLIPPING_MIN_LENGTH' + doc: >- + When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION + parameters, ensure that the resulting reads after clipping are at least + CLIPPING_MIN_LENGTH bases long. If the original read is shorter than + CLIPPING_MIN_LENGTH then the original read length will be maintained. + Default value: 0. + - id: compress_outputs_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: '--COMPRESS_OUTPUTS_PER_RG' + doc: >- + Compress output FASTQ files per read group using gzip and append a .gz + extension to the file names. Default value: false. Possible values: + {true, false} Cannot be used in conjunction with argument(s) FASTQ (F) + SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) + - id: compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and VCF). + Default value: 2. + - id: create_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value: false. Possible values: {true, false} + - id: include_non_pf_reads + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_NON_PF_READS' + doc: >- + Include non-PF reads from the SAM file into the output FASTQ files. PF + means 'passes filtering'. Reads whose 'not passing quality controls' flag + is set are non-PF reads. See GATK Dictionary for more info. Default + value: false. Possible values: {true, false} + - id: include_non_primary_alignments + type: boolean? + inputBinding: + position: 0 + prefix: '--INCLUDE_NON_PRIMARY_ALIGNMENTS' + doc: >- + If true, include non-primary alignments in the output. Support of + non-primary alignments in SamToFastq is not comprehensive, so there may be + exceptions if this is set to true and there are paired reads with + non-primary alignments. Default value: false. Possible values: {true, + false} + - id: interleave + type: boolean? + inputBinding: + position: 0 + prefix: '--INTERLEAVE' + doc: >- + Will generate an interleaved fastq if paired, each line will have /1 or /2 + to describe which end it came from Default value: false. Possible values: + {true, false} + - default: 50000 + id: max_records_in_ram + type: int? + inputBinding: + position: 0 + prefix: '--MAX_RECORDS_IN_RAM' + doc: >- + When writing files that need to be sorted, this will specify the number of + records stored in RAM before spilling to disk. Increasing this number + reduces the number of file handles needed to sort the file, and increases + the amount of RAM needed. Default value: 500000. + - id: output_dir + type: string? + inputBinding: + position: 0 + prefix: '--OUTPUT_DIR' + doc: >- + Directory in which to output the FASTQ file(s). Used only when + OUTPUT_PER_RG is true. Default value: null. Cannot be used in conjunction + with argument(s) FASTQ (F). + - id: create_md5_file + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_MD5_FILE' + doc: >- + Whether to create an MD5 digest for any BAM or FASTQ files created. + Default value: false. Possible values: {true, false}. + - id: output_per_rg + type: boolean? + inputBinding: + position: 0 + prefix: '--OUTPUT_PER_RG' + doc: >- + Output a FASTQ file per read group (two FASTQ files per read group if the + group is paired). Default value: false. Possible values: {true, false} + Cannot be used in conjunction with argument(s) FASTQ (F) SECOND_END_FASTQ + (F2) UNPAIRED_FASTQ (FU) + - id: quality + type: int? + inputBinding: + position: 0 + prefix: '--QUALITY' + doc: >- + End-trim reads using the phred/bwa quality trimming algorithm and this + quality. Default value: null. + - id: re_reverse + type: boolean? + inputBinding: + position: 0 + prefix: '--RE_REVERSE' + doc: >- + Re-reverse bases and qualities of reads with negative strand flag set + before writing them to FASTQ Default value: true. Possible values: {true, + false} + - id: read1_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: '--READ1_MAX_BASES_TO_WRITE' + doc: >- + The maximum number of bases to write from read 1 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. + - id: read1_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ1_TRIM' + doc: >- + The number of bases to trim from the beginning of read 1. Default value: + 0. + - id: read2_max_bases_to_write + type: int? + inputBinding: + position: 0 + prefix: '--READ2_MAX_BASES_TO_WRITE' + doc: >- + The maximum number of bases to write from read 2 after trimming. If there + are fewer than this many bases left after trimming, all will be written. + If this value is null then all bases left after trimming will be written. + Default value: null. + - id: read2_trim + type: int? + inputBinding: + position: 0 + prefix: '--READ2_TRIM' + doc: >- + The number of bases to trim from the beginning of read 2. Default value: + 0. + - id: reference_sequence + type: File? + inputBinding: + position: 0 + prefix: '--REFERENCE_SEQUENCE' + doc: 'Reference sequence file. Default value: null.' + - id: rg_tag + type: string? + inputBinding: + position: 0 + prefix: '--RG_TAG' + doc: >- + The read group tag (PU or ID) to be used to output a FASTQ file per read + group. Default value: PU. + - id: second_end_fastq + type: string? + inputBinding: + position: 0 + prefix: '--SECOND_END_FASTQ' + doc: >- + Output FASTQ file (if paired, second end of the pair FASTQ). Default + value: null. Cannot be used in conjunction with argument(s) OUTPUT_PER_RG + (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + - id: unpaired_fastq + type: string? + inputBinding: + position: 0 + prefix: '--UNPAIRED_FASTQ' + doc: >- + Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ + mode Default value: null. Cannot be used in conjunction with argument(s) + OUTPUT_PER_RG (OPRG) COMPRESS_OUTPUTS_PER_RG (GZOPRG) + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. Possible values: {STRICT, LENIENT, + SILENT} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: gatk_sam_to_fastq_fastq + type: File + outputBinding: + glob: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') + } + } + - id: gatk_sam_to_fastq_unpaired_fastq + type: File? + outputBinding: + glob: |- + ${ + if(inputs.unpaired_fastq){ + return inputs.unpaired_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_unpaired.fastq') + } + } + - id: gatk_sam_to_fastq_second_end_fastq + type: File? + outputBinding: + glob: |- + ${ + if(inputs.second_end_fastq){ + return inputs.second_end_fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R2.fastq') + } + } +label: GATK-SamToFastq +arguments: + - position: 0 + prefix: '--java-options' + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '--FASTQ' + valueFrom: |- + ${ + if(inputs.fastq){ + return inputs.fastq + } else { + return inputs.input.basename.replace(/.bam|.sam/, '_R1.fastq') + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charles Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': gatk4 + 'doap:revision': 4.1.8.0 diff --git a/getbasecountsmultisample/1.2.5/README.md b/getbasecountsmultisample/1.2.5/README.md new file mode 100644 index 00000000..e44235f8 --- /dev/null +++ b/getbasecountsmultisample/1.2.5/README.md @@ -0,0 +1,89 @@ +# CWL and Dockerfile for running BWA MEM + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +| ------ | ------- | ----------------------------------------------- | +| ubuntu | 16.04 | - | +| GetBaseCountMultiSample | 1.2.5 | https://github.com/msk-access/GetBaseCountMultiSample/releases/tag/1.2.5 | + + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner getbasecountmultisample_1.2.5.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/gbcms_toil.log --jobStore /path/to/gbcms_jobStore --batchSystem lsf --workDir /path/to/gbcms_toil_log --outdir . --writeLogs /path/to/gbcms_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/getbasecountmultisample_1.2.5.cwl /path/to/inputs.yaml > gbcms_toil.stdout 2> gbcms_toil.stderr & +``` + +### Usage + +``` +usage: getbasecountsmultisample_1.2.5.cwl [-h] + [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] + --genotyping_bams GENOTYPING_BAMS + --genotyping_bams_ids + GENOTYPING_BAMS_IDS + --filter_duplicate FILTER_DUPLICATE + --fragment_count FRAGMENT_COUNT + --maf MAF [--maq MAQ] [--omaf] + --output OUTPUT --ref_fasta + REF_FASTA [--vcf VCF] + [--generic_counting GENERIC_COUNTING] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --genotyping_bams GENOTYPING_BAMS + Input bam file + --genotyping_bams_ids GENOTYPING_BAMS_IDS + Input bam, sample identifier to be used for "Tumor + Sample Barcode" for maf or Sample name in the header + for vcf + --filter_duplicate FILTER_DUPLICATE + Whether to filter reads that are marked as duplicate. + 0=off, 1=on. Default 1 + --fragment_count FRAGMENT_COUNT + Whether to output fragment read counts DPF/RDF/ADF. + 0=off, 1=on. Default 0 + --maf MAF Input variant file in TCGA maf format. --maf or --vcf + need to be specified at least once. But --maf and + --vcf are mutually exclusive + --maq MAQ Mapping quality threshold. Default 20 + --omaf Output the result in maf format + --output OUTPUT Filename for output of raw fillout data in MAF/VCF + format + --ref_fasta REF_FASTA + Input reference sequence file + --vcf VCF Input variant file in vcf-like format(the first 5 + columns are used). --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually + exclusive + --generic_counting GENERIC_COUNTING + se the newly implemented generic counting algorithm. + Works better for complex variants. You may get + different allele count result from the default + counting algorithm +``` diff --git a/getbasecountsmultisample/1.2.5/container/Dockerfile b/getbasecountsmultisample/1.2.5/container/Dockerfile new file mode 100644 index 00000000..d8c10a6a --- /dev/null +++ b/getbasecountsmultisample/1.2.5/container/Dockerfile @@ -0,0 +1,50 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS######################## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG LICENSE="Apache-2.0" +ARG GBCMS_VERSION=1.2.5 +ARG VCS_REF +################## METADATA ######################## + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.revision=${VCS_REF} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.gbcms=${GBCMS_VERSION} \ + org.opencontainers.image.source="https://github.com/msk-access/GetBaseCountsMultiSample/releases/" + +LABEL org.opencontainers.image.description="This container uses Ubuntu 16.04 as the base image to build GetBaseCountsMultiSample version ${GBCMS_VERSION}" + +################## INSTALL ########################## + +WORKDIR /usr/src + +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + wget ca-certificates openssl gcc g++ make zlib1g-dev cmake libjsoncpp-dev && \ + apt-get clean autoclean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN wget --no-check-certificate "https://github.com/msk-access/GetBaseCountsMultiSample/archive/refs/tags/${GBCMS_VERSION}.tar.gz" && \ + tar xzvf /usr/src/${GBCMS_VERSION}.tar.gz && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ + rm -r build/ && \ + mkdir build && \ + cd build/ && \ + cmake -DCMAKE_CXX_FLAGS=-std=c++03 .. && \ + make && \ + make install && \ + cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ + cd /usr/src/GetBaseCountsMultiSample-${GBCMS_VERSION}/ && \ + make && \ + cp GetBaseCountsMultiSample /usr/local/bin/ + diff --git a/getbasecountsmultisample/1.2.5/example_input.yaml b/getbasecountsmultisample/1.2.5/example_input.yaml new file mode 100644 index 00000000..5aa7ca49 --- /dev/null +++ b/getbasecountsmultisample/1.2.5/example_input.yaml @@ -0,0 +1,27 @@ +filter_duplicate: 0 +fragment_count: 1 +generic_counting: true +genotyping_bams: + - class: File + path: /Users/shahr2/Documents/test_reference/bam/duplex/SeraCare_0-5.bam + - class: File + path: /Users/shahr2/Documents/test_reference/bam/SeraCare_0-5.bam +genotyping_bams_ids: + - test1 + - test2 +maf: + class: File + path: >- + /Users/shahr2/Downloads/SeraCare_0-5.F22.combined-variants.vep_keptrmv_taggedHotspots.maf +maq: null +memory_overhead: null +memory_per_job: null +number_of_threads: null +omaf: true +output: test.maf +ref_fasta: + class: File + path: >- + /Users/shahr2/Documents/test_reference/reference/versions/hg19/Homo_sapiens_assembly19.fasta +vcf: null + diff --git a/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl new file mode 100644 index 00000000..1a073fac --- /dev/null +++ b/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl @@ -0,0 +1,173 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: getbasecountsmultisample_1_2_5 +baseCommand: + - GetBaseCountsMultiSample +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: genotyping_bams + type: + - File + - type: array + items: File + doc: Input bam file + - id: genotyping_bams_ids + type: + - string + - type: array + items: string + doc: >- + Input bam, sample identifier to be used for "Tumor Sample Barcode" for maf + or Sample name in the header for vcf + - id: filter_duplicate + type: int + inputBinding: + position: 0 + prefix: '--filter_duplicate' + doc: >- + Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default + 1 + - id: fragment_count + type: int + inputBinding: + position: 0 + prefix: '--fragment_count' + doc: 'Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0' + - id: maf + type: File + inputBinding: + position: 0 + prefix: '--maf' + doc: >- + Input variant file in TCGA maf format. --maf or --vcf need to be specified + at least once. But --maf and --vcf are mutually exclusive + - id: maq + type: int? + inputBinding: + position: 0 + prefix: '--maq' + doc: Mapping quality threshold. Default 20 + - id: omaf + type: boolean? + inputBinding: + position: 0 + prefix: '--omaf' + doc: Output the result in maf format + - id: output + type: string? + inputBinding: + position: 0 + prefix: '--output' + valueFrom: |- + ${ + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') + } else { + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') + } + } + doc: Filename for output of raw fillout data in MAF/VCF format + - id: ref_fasta + type: File + inputBinding: + position: 0 + prefix: '--fasta' + doc: Input reference sequence file + - id: vcf + type: File? + inputBinding: + position: 0 + prefix: '--vcf' + doc: >- + Input variant file in vcf-like format(the first 5 columns are used). --maf + or --vcf need to be specified at least once. But --maf and --vcf are + mutually exclusive + - id: generic_counting + type: boolean? + inputBinding: + position: 0 + prefix: '--generic_counting' + doc: >- + Use the newly implemented generic counting algorithm. Works better for + complex variants. You may get different allele count result from the + default counting algorithm +outputs: + - id: fillout + type: File + outputBinding: + glob: |- + ${ + if (inputs.output) { + return inputs.output + } else if (inputs.genotyping_bams.length) { + return inputs.maf.basename.replace('.maf', '_fillout.maf') + } else { + return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf') + } + } +label: getbasecountsmultisample_1.2.5 +arguments: + - position: 0 + prefix: '' + shellQuote: false + valueFrom: | + $('--bam_fof bam_fof.tsv') + - position: 0 + prefix: '--thread' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 16000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gbcms:1.2.5' + - class: InitialWorkDirRequirement + listing: + - entryname: bam_fof.tsv + entry: |- + ${ + if (typeof(inputs.genotyping_bams_ids) == 'object') { + return inputs.genotyping_bams_ids.map(function(sid, i) { + return sid + "\t" + + inputs.genotyping_bams[i].path + }).join("\n") + } else { + return inputs.genotyping_bams_ids + "\t" + inputs.genotyping_bams.path + "\n" + } + } + writable: false + - class: InlineJavascriptRequirement + - class: StepInputExpressionRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': GetBaseCountsMultiSample + 'doap:revision': 1.2.5 diff --git a/manta_1.5.1/container/Dockerfile b/manta_1.5.1/container/Dockerfile new file mode 100644 index 00000000..f1171f34 --- /dev/null +++ b/manta_1.5.1/container/Dockerfile @@ -0,0 +1,83 @@ +################## BASE IMAGE ###################### + +FROM ubuntu:16.04 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG VCS_REF +ARG UBUNTU_VERSION=16.04 +ARG LICENSE="Apache-2.0" +ARG MANTA_VERSION=1.5.1 +ARG SAMTOOLS_VERSION=1.9 +ARG HTSLIB_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.label-schema.vcs-ref=${VCS_REF} \ + org.label-schema.vcs-url="https://github.com/msk-access/cwl-commandlinetools" \ + org.opencontainers.image.version.manta=${MANTA_VERSION} \ + org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} \ + org.opencontainers.image.version.samtools=${HTSLIB_VERSION} \ + org.opencontainers.image.version.ubuntu=${UBUNTU_VERSION} \ + org.opencontainers.image.source.manta="https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2" \ + org.opencontainers.image.source.samtools=" https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2" \ + org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2" + +LABEL org.opencontainers.image.description="This container uses ubuntu ${UBUNTU_VERSION} as the base image to build \ + manta version ${MANTA_VERSION}, \ + samtools version ${SAMTOOLS_VERSION} and \ + htslib version ${HTSLIB_VERSION}" + +################## INSTALL ########################## + + +#UBUNTU +RUN apt-get update \ + && apt install -y g++ libbz2-dev liblzma-dev make ncurses-dev wget zlib1g-dev libcurl4-openssl-dev wget python bzip2 \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#MANTA +RUN apt-get update \ + && wget https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && tar -jxvf manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64.tar.bz2 \ + && rm -rf /manta-${MANTA_VERSION}.centos6_x86_64/share/demo \ + && ln -s $PWD/manta-${MANTA_VERSION}.centos6_x86_64 /usr/local/bin/manta \ + && cp $PWD/manta-${MANTA_VERSION}.centos6_x86_64/libexec/convertInversion.py /usr/local/bin \ + && chmod +x /usr/local/bin/* \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* + +#SAMTOOLS +RUN apt-get update && apt-get install -y \ + && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd htslib-${HTSLIB_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && cd ../.. \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd samtools-${SAMTOOLS_VERSION} \ + && ./configure --enable-libcurl \ + && make \ + && rm -r /htslib-${HTSLIB_VERSION}/test \ + && cp -r /htslib-${HTSLIB_VERSION}/* /usr/local/bin \ + && rm -r /samtools-${SAMTOOLS_VERSION}/test \ + && cp -r /samtools-${SAMTOOLS_VERSION}/* /usr/local/bin \ + && apt-get clean \ + && apt-get purge \ + && apt-get remove --yes --purge build-essential \ + && rm -rf /var/lib/apt/lists/* /tmp/* /:var/tmp/* diff --git a/manta_1.5.1/example_inputs.yaml b/manta_1.5.1/example_inputs.yaml new file mode 100644 index 00000000..bd82b9b3 --- /dev/null +++ b/manta_1.5.1/example_inputs.yaml @@ -0,0 +1,10 @@ +call_regions: null +generateEvidenceBam: true +memory_overhead: null +memory_per_job: null +non_wgs: true +normal_bam: /path/to/normal_bam +number_of_threads: null +output_contigs: true +reference_fasta: /path/to/reference_fasta +tumor_bam: /path/to/tumor_bam diff --git a/manta_1.5.1/manta_1.5.1.cwl b/manta_1.5.1/manta_1.5.1.cwl new file mode 100644 index 00000000..9c07140f --- /dev/null +++ b/manta_1.5.1/manta_1.5.1.cwl @@ -0,0 +1,177 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: manta_1_5_1 +baseCommand: + - /usr/bin/python + - /usr/local/bin/manta/bin/configManta.py +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: call_regions + type: File? + inputBinding: + position: -5 + prefix: '--callRegions' + doc: >- + bgzip-compressed, tabix-indexed BED file specifiying regions to which + variant analysis will be restricted + secondaryFiles: + - .tbi + - id: non_wgs + type: boolean? + inputBinding: + position: -6 + prefix: '--exome' + doc: toggles on settings for WES + - id: normal_bam + type: File? + inputBinding: + position: -2 + prefix: '--normalBam' + doc: >- + Normal sample BAM or CRAM file. May be specified more than once, multiple + inputs will be treated as each BAM file representing a different sample. + [optional] (no default) + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: output_contigs + type: boolean? + inputBinding: + position: -7 + prefix: '--outputContig' + doc: >- + if true, outputs assembled contig sequences in final VCF files, in the + INFO field CONTIG + - id: reference_fasta + type: File + inputBinding: + position: -4 + prefix: '--referenceFasta' + doc: 'samtools-indexed reference fasta file [required]' + secondaryFiles: + - .fai + - id: tumor_bam + type: File + inputBinding: + position: -3 + prefix: '--tumorBam' + doc: Tumor sample BAM or CRAM file. Only up to one tumor bam file accepted. + secondaryFiles: + - |- + ${ + if (self.nameext === ".bam") { + return self.basename + ".bai" + } + else { + return self.basename + ".crai" + } + } + - id: generateEvidenceBam + type: boolean? + inputBinding: + position: -8 + prefix: '--generateEvidenceBam' + separate: false + doc: Generate a bam of supporting reads for all SVs +outputs: + - id: manta_all_candidates_vcf_gz + type: File + outputBinding: + glob: results/variants/candidateSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_diploid_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/diploidSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_small_candidates_vcf_gz + type: File + outputBinding: + glob: results/variants/candidateSmallIndels.vcf.gz + secondaryFiles: + - .tbi + - id: manta_somatic_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/somaticSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_tumor_only_variants_vcf_gz + type: File? + outputBinding: + glob: results/variants/tumorSV.vcf.gz + secondaryFiles: + - .tbi + - id: manta_evidence_bams + type: 'File[]?' + outputBinding: + glob: |- + ${ if(inputs.generateEvidenceBam){ + return 'results/evidence/*.bam' + } + } + secondaryFiles: + - .bai +doc: Setup and execute Manta 1.51 +label: manta_1.5.1 +arguments: + - position: -1 + prefix: '--runDir' + valueFrom: $(runtime.outdir) + - position: 0 + shellQuote: false + valueFrom: '&&' + - /usr/bin/python + - runWorkflow.py + - '-m' + - local + - position: 1 + prefix: '-j' + valueFrom: $(runtime.cores) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 24000 + coresMin: 12 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/manta:1.5.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': manta + 'doap:revision': 1.5.1 diff --git a/marianas_collapsing_first_pass_1.8.1/README.md b/marianas_collapsing_first_pass_1.8.1/README.md deleted file mode 100644 index eba9e12c..00000000 --- a/marianas_collapsing_first_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIBamToCollapsedFastqFirstPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl index df5a19f0..024f9b53 100644 --- a/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl +++ b/marianas_collapsing_first_pass_1.8.1/marianas_first_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_first_pass_cwl baseCommand: - java @@ -44,19 +45,19 @@ inputs: - .fai - ^.dict outputs: - - id: first_pass_output_file + - id: marianas_first_pass_output_file type: File outputBinding: glob: first-pass.txt - - id: alt_allele_file + - id: marianas_first_pass_alt_allele_file type: File outputBinding: glob: first-pass-alt-alleles.txt - - id: first_pass_insertions + - id: marianas_first_pass_insertions type: File outputBinding: glob: first-pass-insertions.txt - - id: first_pass_output_dir + - id: marianas_first_pass_output_dir type: Directory outputBinding: glob: . @@ -123,21 +124,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_collapsing_second_pass_1.8.1/README.md b/marianas_collapsing_second_pass_1.8.1/README.md deleted file mode 100644 index b3cdf8d7..00000000 --- a/marianas_collapsing_second_pass_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - DuplexUMIToCollapsedFastqSecondPass - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml -``` \ No newline at end of file diff --git a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl index 7975c3c2..4ec6b3ac 100644 --- a/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl +++ b/marianas_collapsing_second_pass_1.8.1/marianas_second_pass.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_collapsing_second_pass_cwl baseCommand: - java @@ -48,19 +49,19 @@ inputs: inputBinding: position: 9 outputs: - - id: collapsed_fastq_1 + - id: marianas_second_pass_collapsed_fastq_1 type: File outputBinding: glob: collapsed_R1_.fastq - - id: collapsed_fastq_2 + - id: marianas_second_pass_collapsed_fastq_2 type: File outputBinding: glob: collapsed_R2_.fastq - - id: second_pass_alt_alleles + - id: marianas_second_pass_alt_alleles type: File outputBinding: glob: second-pass-alt-alleles.txt - - id: second_pass_insertions + - id: marianas_second_pass_insertions type: File outputBinding: glob: second-pass-insertions.txt @@ -127,21 +128,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 20000 + inputs.memory_overhead -# } else { -# return 20000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_process_loop_umi_1.8.1/README.md b/marianas_process_loop_umi_1.8.1/README.md deleted file mode 100644 index a5b4e900..00000000 --- a/marianas_process_loop_umi_1.8.1/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# CWL and Dockerfile for running Marianas - ProcessLoopUMIFastq - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/container/Dockerfile b/marianas_process_loop_umi_1.8.1/container/Dockerfile index 343ec6a6..ef4d2eff 100644 --- a/marianas_process_loop_umi_1.8.1/container/Dockerfile +++ b/marianas_process_loop_umi_1.8.1/container/Dockerfile @@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \ org.opencontainers.image.licenses=${LICENSE} \ org.opencontainers.image.version.java=${JAVA_VERSION} \ org.opencontainers.image.version.marianas=${MARIANAS_VERSION} \ - org.opencontainers.image.source.marianas="https://github.com/mskcc/Marianas/releases/" + org.opencontainers.image.source="https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to use Marianas version ${MARIANAS_VERSION}" @@ -30,5 +30,5 @@ ENV MARIANAS_VERSION 1.8.1 WORKDIR /usr/src/ -RUN wget https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar -RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar +RUN wget https://github.com/mskcc/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar +RUN cp -s /usr/src/Marianas-1.8.1.jar /usr/local/bin/Marianas-1.8.1.jar \ No newline at end of file diff --git a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl index 616ddb01..018aff9a 100644 --- a/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl +++ b/marianas_process_loop_umi_1.8.1/marianas_process_loop_umi.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: marianas_process_loop_umi_cwl baseCommand: - java @@ -30,19 +31,19 @@ inputs: inputBinding: position: 0 outputs: - - id: processed_fastq_1 + - id: marianas_process_loop_umi_processed_fastq_1 type: File outputBinding: glob: '$(inputs.fastq1.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: processed_fastq_2 + - id: marianas_process_loop_umi_processed_fastq_2 type: File outputBinding: glob: '$(inputs.fastq2.basename.replace(''.fastq.gz'', ''_umi-clipped.fastq.gz''))' - - id: clipping_info + - id: marianas_process_loop_umi_clipping_info type: File outputBinding: glob: info.txt - - id: composite_umi_frequencies + - id: marianas_process_loop_umi_composite_umi_frequencies type: File outputBinding: glob: composite-umi-frequencies.txt @@ -110,21 +111,9 @@ arguments: requirements: - class: ResourceRequirement ramMin: 20000 -# ramMin: |- -# ${ -# if (inputs.memory_per_job && inputs.memory_overhead) { -# return inputs.memory_per_job + inputs.memory_overhead -# } else if (inputs.memory_per_job && !inputs.memory_overhead) { -# return inputs.memory_per_job + 2000 -# } else if (!inputs.memory_per_job && inputs.memory_overhead) { -# return 8000 + inputs.memory_overhead -# } else { -# return 10000 -# } -# } coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/marianas_separate_bams_1.8.1/README.md b/marianas_separate_bams_1.8.1/README.md deleted file mode 100644 index 998ce5c6..00000000 --- a/marianas_separate_bams_1.8.1/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# CWL and Dockerfile for running Marianas - SeparateBams - -## Version of tools in docker image (../marianas_process_loop_umi_1.8.1/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| Marianas | 1.8.1 | https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml -``` - -### Usage - -```bash -usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl - [-h] --input_bam INPUT_BAM [job_order] - -positional arguments: - job_order Job input json file - -optional arguments: - -h, --help show this help message and exit - --input_bam INPUT_BAM -``` diff --git a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl index 64dd863f..4488cf91 100644 --- a/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl +++ b/marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: marianas_separate_bams_1.8.1 + sbg: 'https://www.sevenbridges.com/' +id: marianas_separate_bams_1_8_1 baseCommand: - java inputs: @@ -13,13 +14,13 @@ inputs: inputBinding: position: 1 outputs: - - id: duplex-bam + - id: marianas_separate_bams_duplex_bam type: File outputBinding: glob: '*duplex.bam' secondaryFiles: - ^.bai - - id: simplex-bam + - id: marianas_separate_bams_simplex_bam type: File outputBinding: glob: '*simplex.bam' @@ -91,7 +92,7 @@ requirements: ramMin: 30000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/marianas:0.1.0' + dockerPull: 'ghcr.io/msk-access/marianas:1.8.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/merge_fastq_0.1.7/container/Dockerfile b/merge_fastq_0.1.7/container/Dockerfile new file mode 100644 index 00000000..86cb1d51 --- /dev/null +++ b/merge_fastq_0.1.7/container/Dockerfile @@ -0,0 +1,39 @@ +################## BASE IMAGE ###################### + +FROM continuumio/miniconda3:4.7.12 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG MINICONDA3_VERSION=4.7.12 +ARG LICENSE="Apache-2.0" +ARG MERGE_FASTQ_VERSION=0.1.7 +ARG VCS_REF +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.miniconda3=${MINICONDA3_VERSION} \ + org.opencontainers.image.version.merge_fastq=${MERGE_FASTQ_VERSION} \ + org.opencontainers.image.source.merge_fastq="https://pypi.org/project/merge-fastq/" \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/cwl-commandlinetools.git" \ + org.opencontainers.image.vcs-ref=${VCS_REF} + +LABEL org.opencontainers.image.description="This container uses miniconda ${MINICONDA3_VERSION} as the base image to build \ + merge fastq version ${MERGE_FASTQ_VERSION}" + +################## INSTALL ########################## + +#Ubuntu +RUN conda create --name merge_fastq python=3.7 && \ + echo "source activate merge_fastq" > ~/.bashrc + +#Set the path of environment as default +ENV PATH /opt/conda/envs/merge_fastq/bin:$PATH + +#MERGE_FASTQ +RUN pip install --upgrade merge-fastq==${MERGE_FASTQ_VERSION} diff --git a/merge_fastq_0.1.7/example_inputs.yaml b/merge_fastq_0.1.7/example_inputs.yaml new file mode 100644 index 00000000..31e86cce --- /dev/null +++ b/merge_fastq_0.1.7/example_inputs.yaml @@ -0,0 +1,20 @@ +fastq1: + - class: File + path: >- + /path/to/R1_001.fastq.gz + - class: File + path: >- + /path/to/R1_002.fastq.gz +fastq2: + - class: File + path: >- + /path/to/R2_001.fastq.gz + - class: File + path: >- + /path/to/R2_002.fastq.gz +memory_overhead: null +memory_per_job: null +number_of_threads: null +out_fastq1_name: null +out_fastq2_name: null +output_path: null diff --git a/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl new file mode 100644 index 00000000..cad51a26 --- /dev/null +++ b/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl @@ -0,0 +1,113 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: merge_fastq_0_1_7 +baseCommand: + - merge_fastq +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: fastq1 + type: + type: array + items: File + inputBinding: + prefix: '--fastq1' + inputBinding: + position: 0 + doc: >- + Full path to gziped READ1 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz + [required] + - id: fastq2 + type: + type: array + items: File + inputBinding: + prefix: '--fastq2' + inputBinding: + position: 1 + doc: >- + Full path to gziped READ2 fastq files, can be specified multiple times for + example: --fastq1 test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz + [required] + - id: output_path + type: string? + inputBinding: + position: 2 + prefix: '--output-path' + doc: 'Full path to write the output files (default: Current working directory)' + - id: out_fastq1_name + type: string? + inputBinding: + position: 2 + prefix: '--out-fastq1' + doc: >- + Name of the merged output READ1 fastq file(default: + merged_fastq_R1.fastq.gz) + - id: out_fastq2_name + type: string? + inputBinding: + position: 2 + prefix: '--out-fastq2' + doc: >- + Name of the merged output READ2 fastq file(default: + merged_fastq_R2.fastq.gz) +outputs: + - id: merge_fastq_1 + type: File + outputBinding: + glob: |- + ${ + if(inputs.out_fastq1_name){ + return inputs.out_fastq1_name + } else { + return 'merged_fastq_R1.fastq.gz' + } + } + - id: merge_fastq_2 + type: File + outputBinding: + glob: |- + ${ + if(inputs.out_fastq2_name){ + return inputs.out_fastq2_name + } else { + return 'merged_fastq_R2.fastq.gz' + } + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/merge_fastq:0.1.7' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': merge_fastq + 'doap:revision': 0.1.7 diff --git a/mosdepth_0.3.3/example_inputs.yml b/mosdepth_0.3.3/example_inputs.yml new file mode 100644 index 00000000..540ecf3c --- /dev/null +++ b/mosdepth_0.3.3/example_inputs.yml @@ -0,0 +1,10 @@ +bed: + class: File + path: "/path/to/bed" +input_bam: +- class: File + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam/index" +prefix: "sample_prefix" \ No newline at end of file diff --git a/mosdepth_0.3.3/mosdepth_0.3.3.cwl b/mosdepth_0.3.3/mosdepth_0.3.3.cwl new file mode 100644 index 00000000..7d46c539 --- /dev/null +++ b/mosdepth_0.3.3/mosdepth_0.3.3.cwl @@ -0,0 +1,137 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mosdepth_0_3_3 +baseCommand: + - mosdepth +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: bed + type: File? + inputBinding: + position: 0 + prefix: '-b' + shellQuote: false + doc: optional BED file or (integer) window-sizes. + - id: chrom + type: File? + inputBinding: + position: 0 + prefix: '-c' + shellQuote: false + doc: chromosome to restrict depth calculation. + - id: input_bam + type: + - File + - type: array + items: File + doc: Required list of input bam file (s) separated by comma + secondaryFiles: + - ^.bai + - id: prefix + type: string? + doc: Prefix for the output files + - id: flag + type: int? + inputBinding: + position: 0 + prefix: '-F' + doc: exclude reads with any of the bits in FLAG set + - id: mapq + type: int? + inputBinding: + position: 0 + prefix: '-Q' + doc: mapping quality threshold. reads with a mapping quality less than this are ignored +outputs: + - id: per_base_bed + type: File + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.per-base.bed.gz' + } else { + return 'per-base.bed.gz' + } + } + - id: per_region_bed + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.regions.bed.gz' + } else { + return 'regions.bed.gz' + } + } + - id: global_distribution + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.global.dist.txt' + } else { + return 'mosdepth.global.dist.txt' + } + } + - id: region_distribution + type: File? + outputBinding: + glob: |- + ${ + if (inputs.prefix) { + return inputs.prefix + '.mosdepth.region.dist.txt' + } else { + return 'mosdepth.region.dist.txt' + } + } +doc: 'fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.' +label: mosdepth_0.3.3 +arguments: + - position: 0 + prefix: '-t' + valueFrom: $(runtime.cores) + - position: 99 + valueFrom: $(inputs.prefix) + - position: 100 + valueFrom: $(inputs.input_bam) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/mosdepth:0.3.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charalk@mskcc.org' + 'foaf:name': Carmelina + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mosdepth + 'doap:revision': 0.3.3 diff --git a/msisensor_0.2/msisensor-0.2.cwl b/msisensor_0.2/msisensor-0.2.cwl index 14757a55..d7edc89e 100644 --- a/msisensor_0.2/msisensor-0.2.cwl +++ b/msisensor_0.2/msisensor-0.2.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/msisensor_0.6/msisensor-0.6.cwl b/msisensor_0.6/msisensor-0.6.cwl index d0436482..e2f26ac6 100644 --- a/msisensor_0.6/msisensor-0.6.cwl +++ b/msisensor_0.6/msisensor-0.6.cwl @@ -164,7 +164,7 @@ inputs: prefix: -y outputs: - output: + msisensor_output: type: File outputBinding: glob: |- diff --git a/multiqc/1.10.1.7/README.md b/multiqc/1.10.1.7/README.md new file mode 100644 index 00000000..18ffe64f --- /dev/null +++ b/multiqc/1.10.1.7/README.md @@ -0,0 +1,56 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.12 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1.7/multiqc.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +> toil-cwl-runner multiqc.cwl --helpusage: multiqc_1.10.1.cwl [-h] + [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY] + [--qc_files_dir QC_FILES_DIR] + [--qc_list_of_dirs QC_LIST_OF_DIRS] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY + qc files which shall be part of the multiqc summary; + optional, only one of qc_files_array or + qc_files_array_of_array must be provided + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --qc_list_of_dirs QC_LIST_OF_DIRS + qc files in multiple directories + --report_name REPORT_NAME + name used for the html report + --config CONFIG +``` diff --git a/multiqc/1.10.1.7/example_inputs.json b/multiqc/1.10.1.7/example_inputs.json new file mode 100644 index 00000000..87e4b59e --- /dev/null +++ b/multiqc/1.10.1.7/example_inputs.json @@ -0,0 +1,20 @@ +{ + "config": { + "class": "File", + "path": "/path_to/config.yaml" + }, + "qc_files_array": null, + "qc_files_array_of_array": [], + "qc_files_dir": null, + "qc_list_of_dirs": [ + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_a" + }, + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics_pool_b" + } + ], + "report_name": null +} diff --git a/multiqc/1.10.1.7/multiqc.cwl b/multiqc/1.10.1.7/multiqc.cwl new file mode 100644 index 00000000..1f308ad2 --- /dev/null +++ b/multiqc/1.10.1.7/multiqc.cwl @@ -0,0 +1,70 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - multiqc +arguments: + - valueFrom: . + position: 999 +inputs: + - id: qc_files_dir + type: Directory? + doc: | + qc files in a Directory + - default: multiqc_1.10.1.7 + id: report_name + type: string + inputBinding: + position: 0 + prefix: '--filename' + doc: name used for the html report and the corresponding zip file + - id: config + type: File? + inputBinding: + position: 0 + prefix: '--config' +outputs: + - id: multiqc_output_dir + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = inputs.report_name.replace('.html', ''); + return self[0] + } + - id: multiqc_html + type: File + outputBinding: + glob: |- + ${ + if (inputs.report_name) { + return inputs.report_name + ".html" + } else { + return "multiqc_1.10.1.7.html" + } + } + - id: multiqc_zip + type: File? + outputBinding: + glob: $(inputs.report_name)_data.zip +doc: | + Run multiqc on log files from supported bioinformatic tools. +hints: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/multiqc:v1.10.1.7' +requirements: + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.qc_files_dir) + writable: true + - class: InlineJavascriptRequirement diff --git a/multiqc/1.12/README.md b/multiqc/1.12/README.md new file mode 100644 index 00000000..411afef8 --- /dev/null +++ b/multiqc/1.12/README.md @@ -0,0 +1,48 @@ +# CWL and Dockerfile for running merge_fastq + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| multiqc | 1.12 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.json to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner multiqc.cwl example_inputs.json +``` + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/multiqc_1.10.1/multiqc.cwl /path/to/example_inputs.json + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.12/multiqc_1.12.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +usage: multiqc_1.12.cwl [-h] [--qc_files_dir QC_FILES_DIR] + [--report_name REPORT_NAME] [--config CONFIG] + [job_order] + +Run multiqc on log files from supported bioinformatic tools. + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --qc_files_dir QC_FILES_DIR + qc files in a Directory + --report_name REPORT_NAME + name used for the html report and the corresponding + zip file + --config CONFIG +``` \ No newline at end of file diff --git a/multiqc/1.12/example_inputs.json b/multiqc/1.12/example_inputs.json new file mode 100644 index 00000000..a502f1f1 --- /dev/null +++ b/multiqc/1.12/example_inputs.json @@ -0,0 +1,16 @@ +{ + "config": { + "class": "File", + "path": "/path_to/config.yaml" + }, + "qc_files_array": null, + "qc_files_array_of_array": [], + "qc_files_dir": null, + "qc_list_of_dirs": [ + { + "class": "Directory", + "path": "/test/picard/collapsed_bam_duplex_metrics" + } + ], + "report_name": null +} diff --git a/multiqc/1.12/multiqc_1.12.cwl b/multiqc/1.12/multiqc_1.12.cwl new file mode 100644 index 00000000..f0e60341 --- /dev/null +++ b/multiqc/1.12/multiqc_1.12.cwl @@ -0,0 +1,87 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - multiqc +arguments: + - valueFrom: . + position: 999 +inputs: + - id: qc_files_dir + type: Directory? + doc: | + qc files in a Directory + - default: multiqc_1.12 + id: report_name + type: string + inputBinding: + position: 0 + prefix: '--filename' + doc: name used for the html report and the corresponding zip file + - id: config + type: File? + inputBinding: + position: 0 + prefix: '--config' +outputs: + - id: multiqc_output_dir + type: Directory + outputBinding: + glob: . + outputEval: |- + ${ + self[0].basename = inputs.report_name.replace('.html', ''); + return self[0] + } + - id: multiqc_html + type: File + outputBinding: + glob: |- + ${ + if (inputs.report_name) { + return inputs.report_name + ".html" + } else { + return "multiqc_1.12.html" + } + } + - id: multiqc_zip + type: File? + outputBinding: + glob: $(inputs.report_name)_data.zip +doc: | + Run multiqc on log files from supported bioinformatic tools. +hints: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/multiqc:v1.12' +requirements: + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 + - class: InitialWorkDirRequirement + listing: + - entry: $(inputs.qc_files_dir) + writable: true + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charalk@mskcc.org' + 'foaf:name': Carmelina Charalambous + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:charalk@mskcc.org' + 'foaf:name': Carmelina Charalambous + 'foaf:name': Memorial Sloan Kettering Cancer Center diff --git a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl index a1838139..abcd85ac 100644 --- a/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl +++ b/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl @@ -42,7 +42,7 @@ inputs: position: 4 prefix: '--normal-sample' outputs: - - id: output + - id: mutect2_vcf_gz type: File? outputBinding: glob: >- diff --git a/mutect_1.1.5/container/Dockerfile b/mutect_1.1.5/container/Dockerfile new file mode 100644 index 00000000..6a87b817 --- /dev/null +++ b/mutect_1.1.5/container/Dockerfile @@ -0,0 +1,31 @@ +################## BASE IMAGE ###################### +FROM openjdk:7 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=0.1.0 +ARG MUTECT_VERSION=1.1.5 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Yu Hu" + +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.mutect=${MUTECT_VERSION} +LABEL org.opencontainers.image.source="https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip" + +LABEL org.opencontainers.image.description="This container uses java7 as the base image to build mutect version ${MUTECT_VERSION}" + +################## INSTALL ########################## + +ENV MUTECT_VERSION=${MUTECT_VERSION} + +#Get Mutect +RUN wget https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip +RUN unzip muTect-${MUTECT_VERSION}-bin.zip +RUN rm muTect-${MUTECT_VERSION}-bin.zip +RUN chmod a+x muTect-${MUTECT_VERSION}.jar +RUN mv muTect-${MUTECT_VERSION}.jar /usr/local/bin/muTect.jar + diff --git a/mutect_1.1.5/example_inputs.yaml b/mutect_1.1.5/example_inputs.yaml new file mode 100644 index 00000000..cd5ea18b --- /dev/null +++ b/mutect_1.1.5/example_inputs.yaml @@ -0,0 +1,32 @@ +input_file_normal: + class: File + path: /path/to/normal/bam/file +input_file_tumor: + class: File + path: /path/to/tumor/bam/file +normal_sample_name: normalsamplename +tumor_sample_name: tumorsamplename + +read_filter: BadCigar +downsample_to_coverage: 50000 +fraction_contamination: 0.0005 +minimum_mutation_cell_fraction: 0.0005 + +cosmic: + class: File + path: /path/to/cosmic/coding/mutations/file +dbsnp: + class: File + path: /path/to/dbsnp/vcf/file +intervals: + class: File + path: /path/to/canonicaltargets/bed/file +reference_sequence: + class: File + path: /path/to/genomic/reference/file + +out: output.mutect.txt +vcf: output.mutect.vcf + +tmp_dir: /path/to/tmp/dir + diff --git a/mutect_1.1.5/mutect_1.1.5.cwl b/mutect_1.1.5/mutect_1.1.5.cwl new file mode 100644 index 00000000..456c328a --- /dev/null +++ b/mutect_1.1.5/mutect_1.1.5.cwl @@ -0,0 +1,739 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mutect_v1_1_5 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: BQSR + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--BQSR' + doc: >- + The input covariates table file which enables on-the-fly base quality + score recalibration + - id: absolute_copy_number_data + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--absolute_copy_number_data' + doc: >- + Absolute Copy Number Data, as defined by Absolute, to use in power + calculations + - id: arg_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--arg_file' + doc: Reads arguments from the specified file + - id: bam_tumor_sample_name + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--bam_tumor_sample_name' + doc: >- + if the tumor bam contains multiple samples, only use read groups with SM + equal to this value + - id: baq + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--baq' + doc: >- + Type of BAQ calculation to apply in the engine + (OFF|CALCULATE_AS_NECESSARY| RECALCULATE) + - id: baqGapOpenPenalty + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--baqGapOpenPenalty' + doc: >- + BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps + better for whole genome call sets + - id: clipping_bias_pvalue_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--clipping_bias_pvalue_threshold' + doc: >- + pvalue threshold for fishers exact test of clipping bias in mutant reads + vs ref reads + - id: cosmic + type: File? + inputBinding: + position: 0 + prefix: '--cosmic' + doc: VCF file of COSMIC sites + secondaryFiles: + - ^.vcf.idx + - id: coverage_20_q20_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--coverage_20_q20_file' + doc: write out 20x of Q20 coverage in WIGGLE format to this file + - id: coverage_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--coverage_file' + doc: write out coverage in WIGGLE format to this file + - id: dbsnp + type: File? + inputBinding: + position: 0 + prefix: '--dbsnp' + doc: VCF file of DBSNP information + secondaryFiles: + - ^.vcf.idx + - id: dbsnp_normal_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--dbsnp_normal_lod' + doc: LOD threshold for calling normal non-variant at dbsnp sites + - id: defaultBaseQualities + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--defaultBaseQualities' + doc: >- + If reads are missing some or all base quality scores, this value will be + used for all base quality scores + - default: false + id: disableRandomization + type: boolean? + inputBinding: + position: 0 + prefix: '--disableRandomization' + doc: >- + Completely eliminates randomization from nondeterministic methods. To be + used mostly in the testing framework where dynamic parallelism can result + in differing numbers of calls to the generator. + - default: false + id: disable_indel_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--disable_indel_quals' + doc: >- + If true, disables printing of base insertion and base deletion tags (with + -BQSR) + - id: downsample_to_coverage + type: int? + inputBinding: + position: 0 + prefix: '--downsample_to_coverage' + doc: Target coverage threshold for downsampling to coverage + - default: NONE + id: downsampling_type + type: string? + inputBinding: + position: 0 + prefix: '--downsampling_type' + doc: >- + Type of reads downsampling to employ at a given locus. Reads will be + selected randomly to be removed from the pile based on the method + described here (NONE|ALL_READS| BY_SAMPLE) given locus; note that + downsampled reads are randomly selected from all possible reads at a locus + - default: false + id: emit_original_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--emit_original_quals' + doc: >- + If true, enables printing of the OQ tag with the original base qualities + (with -BQSR) + - default: true + id: enable_extended_output + type: boolean + inputBinding: + position: 0 + prefix: '--enable_extended_output' + - id: excludeIntervals + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--excludeIntervals' + doc: >- + One or more genomic intervals to exclude from processing. Can be + explicitly specified on the command line or in a file (including a rod + file) + - default: false + id: filter_mismatching_base_and_quals + type: boolean? + inputBinding: + position: 0 + prefix: '--filter_mismatching_base_and_quals' + doc: >- + if a read has mismatching number of bases and base qualities, filter out + the read instead of blowing up. + - default: false + id: force_alleles + type: boolean? + inputBinding: + position: 0 + prefix: '--force_alleles' + doc: force output for all alleles at each site + - default: false + id: force_output + type: boolean? + inputBinding: + position: 0 + prefix: '--force_output' + doc: force output for each site + - id: fraction_contamination + type: float? + inputBinding: + position: 0 + prefix: '--fraction_contamination' + doc: >- + estimate of fraction (0-1) of physical contamination with other unrelated + samples + - id: fraction_mapq0_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--fraction_mapq0_threshold' + doc: >- + threshold for determining if there is relatedness between the alt and ref + allele read piles + - id: gap_events_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--gap_events_threshold' + doc: >- + how many gapped events (ins/del) are allowed in proximity to this + candidate + - id: gatk_key + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--gatk_key' + doc: >- + GATK Key file. Required if running with -et NO_ET. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + - id: heavily_clipped_read_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--heavily_clipped_read_fraction' + doc: >- + if this fraction or more of the bases in a read are soft/hard clipped, do + not use this read for mutation calling + - id: initial_tumor_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--initial_tumor_lod' + doc: Initial LOD threshold for calling tumor variant + - id: input_file_normal + type: File? + inputBinding: + position: 0 + prefix: '--input_file:normal' + doc: SAM or BAM file(s) + secondaryFiles: + - ^.bai + - id: input_file_tumor + type: File? + inputBinding: + position: 0 + prefix: '--input_file:tumor' + doc: SAM or BAM file(s) + secondaryFiles: + - ^.bai + - id: interval_merging + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_merging' + doc: >- + Indicates the interval merging rule we should use for abutting intervals + (ALL| OVERLAPPING_ONLY) + - id: interval_padding + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_padding' + doc: >- + Indicates how many basepairs of padding to include around each of the + intervals specified with the -L/ + - id: interval_set_rule + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--interval_set_rule' + doc: >- + Indicates the set merging approach the interval parser should use to + combine the various -L or -XL inputs (UNION| INTERSECTION) + - id: intervals + type: + - string + - File + - 'null' + inputBinding: + position: 0 + prefix: '--intervals' + doc: >- + One or more genomic intervals over which to operate. Can be explicitly + specified on the command line or in a file (including a rod file) + - default: false + id: keep_program_records + type: boolean? + inputBinding: + position: 0 + prefix: '--keep_program_records' + doc: >- + Should we override the Walkers default and keep program records from the + SAM header + - id: log_to_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--log_to_file' + doc: Set the logging location + - id: logging_level + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--logging_level' + doc: >- + Set the minimum level of logging, i.e. setting INFO gets you INFO up to + FATAL, setting ERROR gets you ERROR and FATAL level logging. + - id: maxRuntime + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--maxRuntime' + doc: >- + If provided, that GATK will stop execution cleanly as soon after + maxRuntime has been exceeded, truncating the run but not exiting with a + failure. By default the value is interpreted in minutes, but this can be + changed by maxRuntimeUnits + - id: maxRuntimeUnits + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--maxRuntimeUnits' + doc: >- + The TimeUnit for maxRuntime (NANOSECONDS| + MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS) + - id: max_alt_allele_in_normal_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_allele_in_normal_fraction' + doc: threshold for maximum alternate allele fraction in normal + - id: max_alt_alleles_in_normal_count + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_alleles_in_normal_count' + doc: threshold for maximum alternate allele counts in normal + - id: max_alt_alleles_in_normal_qscore_sum + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--max_alt_alleles_in_normal_qscore_sum' + doc: threshold for maximum alternate allele quality score sum in normal + - id: min_qscore + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--min_qscore' + doc: threshold for minimum base quality score + - id: minimum_mutation_cell_fraction + type: float? + inputBinding: + position: 0 + prefix: '--minimum_mutation_cell_fraction' + doc: >- + minimum fraction of cells which are presumed to have a mutation, used to + handle non-clonality and contamination + - id: minimum_normal_allele_fraction + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--minimum_normal_allele_fraction' + doc: >- + minimum allele fraction to be considered in normal, useful for normal + sample contaminated with tumor + - default: false + id: monitorThreadEfficiency + type: boolean? + inputBinding: + position: 0 + prefix: '--monitorThreadEfficiency' + doc: Enable GATK threading efficiency monitoring + - default: false + id: nonDeterministicRandomSeed + type: boolean? + inputBinding: + position: 0 + prefix: '--nonDeterministicRandomSeed' + doc: >- + Makes the GATK behave non deterministically, that is, the random numbers + generated will be different in every run + - default: false + id: noop + type: boolean? + inputBinding: + position: 0 + prefix: '--noop' + doc: 'used for debugging, basically exit as soon as we get the reads' + - id: normal_depth_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--normal_depth_file' + doc: write out normal read depth in WIGGLE format to this file + - id: normal_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--normal_lod' + doc: LOD threshold for calling normal non-germline + - id: normal_sample_name + type: string + inputBinding: + position: 0 + prefix: '--normal_sample_name' + doc: name to use for normal in output files + - id: num_bam_file_handles + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--num_bam_file_handles' + doc: The total number of BAM file handles to keep open simultaneously + - id: num_cpu_threads_per_data_thread + type: string? + inputBinding: + position: 0 + prefix: '--num_cpu_threads_per_data_thread' + doc: >- + How many CPU threads should be allocated per data thread to running this + analysis? + - id: num_threads + type: string? + inputBinding: + position: 0 + prefix: '--num_threads' + doc: How many data threads should be allocated to running this analysis. + - default: false + id: only_passing_calls + type: boolean? + inputBinding: + position: 0 + prefix: '--only_passing_calls' + doc: only emit passing calls + - default: mutect_variants.txt + id: out + type: string? + inputBinding: + position: 0 + prefix: '--out' + doc: Call-stats output + - id: pedigree + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigree' + doc: Pedigree files for samples + - id: pedigreeString + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigreeString' + doc: Pedigree string for samples + - id: pedigreeValidationType + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pedigreeValidationType' + doc: >- + How strict should we be in validating the pedigree information? + (STRICT|SILENT) + - id: performanceLog + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--performanceLog' + doc: 'If provided, a GATK runtime performance log will be written to this file' + - id: phone_home + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--phone_home' + doc: >- + What kind of GATK run report should we generate? STANDARD is the default, + can be NO_ET so nothing is posted to the run repository. Please see + -phone-home-and-how-does-it-affect-me#latest for details. + (NO_ET|STANDARD|STDOUT) + - id: pir_mad_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pir_mad_threshold' + doc: threshold for clustered read position artifact MAD + - id: pir_median_threshold + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--pir_median_threshold' + doc: threshold for clustered read position artifact median + - id: power_constant_af + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_constant_af' + doc: Allelic fraction constant to use in power calculations + - id: power_constant_qscore + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_constant_qscore' + doc: Phred scale quality score constant to use in power calculations + - id: power_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--power_file' + doc: write out power in WIGGLE format to this file + - id: preserve_qscores_less_than + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--preserve_qscores_less_than' + doc: >- + Bases with quality scores less than this threshold wont be recalibrated + (with -BQSR) + - id: read_buffer_size + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--read_buffer_size' + doc: Number of reads per SAM file to buffer in memory + - id: read_filter + type: string? + inputBinding: + position: 0 + prefix: '--read_filter' + doc: Specify filtration criteria to apply to each read individually + - id: read_group_black_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--read_group_black_list' + doc: >- + Filters out read groups matching - or a .txt file containing + the filter strings one per line. + - id: reference_sequence + type: File + inputBinding: + position: 0 + prefix: '--reference_sequence' + secondaryFiles: + - .fai + - ^.dict + - default: false + id: remove_program_records + type: boolean? + inputBinding: + position: 0 + prefix: '--remove_program_records' + doc: >- + Should we override the Walkers default and remove program records from the + SAM header + - default: false + id: required_maximum_alt_allele_mapping_quality_score + type: boolean? + inputBinding: + position: 0 + prefix: '--required_maximum_alt_allele_mapping_quality_score' + doc: >- + required minimum value for + tumor alt allele + maximum mapping quality score + - default: false + id: somatic_classification_normal_power_threshold + type: boolean? + inputBinding: + position: 0 + prefix: '--somatic_classification_normal_power_threshold' + doc: >- + Power threshold for normal to + determine germline vs + variant + - id: tag + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tag' + doc: >- + Arbitrary tag string to identify this GATK run as part of a group of runs, + for later analysis + - id: tumor_depth_file + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_depth_file' + doc: write out tumor read depth in WIGGLE format to this file + - id: tumor_f_pretest + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_f_pretest' + doc: >- + for computational efficiency, reject sites with allelic fraction below + this threshold + - id: tumor_lod + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--tumor_lod' + doc: LOD threshold for calling tumor variant + - id: tumor_sample_name + type: string + inputBinding: + position: 0 + prefix: '--tumor_sample_name' + doc: name to use for tumor in output files + - id: unsafe + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--unsafe' + doc: >- + If set, enables unsafe operations - nothing will be checked at runtime. + For expert users only who know what they are doing. We do not support + usage of this argument. (ALLOW_UNINDEXED_BAM| ALLOW_UNSET_BAM_SORT_ORDER| + NO_READ_ORDER_VERIFICATION| ALLOW_SEQ_DICT_INCOMPATIBILITY| + LENIENT_VCF_PROCESSING|ALL) + - default: false + id: useOriginalQualities + type: boolean? + inputBinding: + position: 0 + prefix: '--useOriginalQualities' + doc: >- + If set, use the original base quality scores from the OQ tag when present + instead of the standard scores + - id: validation_strictness + type: 'string[]?' + inputBinding: + position: 0 + prefix: '--validation_strictness' + doc: How strict should we be with validation (STRICT|LENIENT|SILENT) + - default: mutect_variants.vcf + id: vcf + type: string? + inputBinding: + position: 0 + prefix: '--vcf' + doc: VCF output of mutation candidates +outputs: + - id: mutect_output_txt + type: File? + outputBinding: + glob: $(inputs.out) + - id: mutext_output_vcf + type: File? + outputBinding: + glob: $(inputs.vcf) +arguments: + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xms" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xms" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx1G" + } + else { + return "-Xmx1G" + } + } + - '-XX:-UseGCOverheadLimit' + - '-jar' + - /usr/local/bin/muTect.jar + - '--analysis_type' + - MuTect +requirements: + - class: ResourceRequirement + ramMin: 34000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/mutect:1.1.5' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:huy1@mskcc.org' + 'foaf:name': Yu Hu + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': MuTect + 'doap:revision': 1.1.5 diff --git a/octopus/0.7.4/example_input.yaml b/octopus/0.7.4/example_input.yaml new file mode 100644 index 00000000..70b44e10 --- /dev/null +++ b/octopus/0.7.4/example_input.yaml @@ -0,0 +1,21 @@ +error_models: null +input: + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-L001-d/current/C-0A8NCE-L001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam + - class: File + path: >- + /juno/work/access/production/data/bams/C-0A8NCE/C-0A8NCE-N001-d/current/C-0A8NCE-N001-d_cl_aln_srt_MD_IR_FX_BR__aln_srt_IR_FX-duplex.bam +normalId: C-0A8NCE-N001-d +output_file_name: oct.vcf +reference: + class: File + metadata: {} + path: >- + /juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta +skipRegions_file: null +skipRegions_singleEntry: null +somaticOnlyCalls: null +targettedCalling_file: null +targettedCalling_singleEntry: null +tumorOnlySample: null \ No newline at end of file diff --git a/octopus/0.7.4/octopus_0-7-4.cwl b/octopus/0.7.4/octopus_0-7-4.cwl new file mode 100644 index 00000000..0c7d74c1 --- /dev/null +++ b/octopus/0.7.4/octopus_0-7-4.cwl @@ -0,0 +1,123 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + sbg: 'https://www.sevenbridges.com/' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' +id: octopus_0_7_4 +baseCommand: + - octopus +inputs: + - id: input + type: 'File[]' + inputBinding: + position: 0 + prefix: '-I' + secondaryFiles: ^.bai + doc: Tumor and normal bam files with .bai + - id: normalId + type: string? + inputBinding: + position: 0 + prefix: '-N' + doc: add the name of the normal sample + - id: tumorOnlySample + type: boolean? + inputBinding: + position: 0 + prefix: '-C' + doc: mention this parameter if it is tumor only sample. + - id: somaticOnlyCalls + type: boolean? + inputBinding: + position: 0 + prefix: '--somatics-only' + doc: if somatics only call is required. Use this with -f ON parameter + - id: targettedCalling_singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-T' + doc: >- + list of regions to call variants from. + + eg 1. chr1: all of chr1. + + 2. chr2:10,000,000: the single position 10000000 in chr2. + + chr3:5,000,000-: everything from 3. chr3:5,000,000 onwards. + + 4. chr4:100,000,000-200,000,000: everything between chr4:100,000,000 and + chr4:200,000,000. The interval is half open so position chr4:200,000,000 + is not included. + - id: skipRegions_singleEntry + type: string? + inputBinding: + position: 0 + prefix: '-K' + doc: to skip a set of regions + - id: targettedCalling_file + type: File? + inputBinding: + position: 0 + prefix: '-t' + doc: regions in a text or bed file + - id: skipRegions_file + type: File? + inputBinding: + position: 0 + prefix: '-k' + doc: regions in text or bed file format + - id: error_models + type: string? + inputBinding: + position: 0 + prefix: '--sequence-error-model' + doc: >- + error model will be in the format - [library preparation]<.sequencer> + eg: PCR.NOVASEQ + - id: reference + type: File + inputBinding: + position: 0 + prefix: '-R' + secondaryFiles: + - .fai + - id: output_file_name + type: string + inputBinding: + position: 0 + prefix: '-o' +outputs: + - id: outputVCF + type: File + outputBinding: + glob: '${ if (inputs.output) return inputs.output; return null; }' +label: octopus +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/octopus:0.7.4' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': octopus + 'doap:revision': 0.7.4 + diff --git a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl index 091ebc34..576e07ae 100644 --- a/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl +++ b/picard_add_or_replace_read_groups_1.96/picard_add_or_replace_read_groups_1.96.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_add_or_replace_read_groups_1_96 baseCommand: - java @@ -45,7 +46,7 @@ inputs: doc: >- Read Group ID Default value: 1. This option can be set to 'null' to clear the default value Required - - id: read_group_sequnecing_center + - id: read_group_sequencing_center type: string inputBinding: position: 0 @@ -53,7 +54,7 @@ inputs: separate: false doc: 'Read Group sequencing center name Default value: null. Required' - id: read_group_library - type: int + type: string inputBinding: position: 0 prefix: RGLB= @@ -94,13 +95,6 @@ inputs: prefix: RGDT= separate: false doc: 'Read Group run date Default value: null.' - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -133,8 +127,11 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_add_or_replace_read_groups_bam type: File outputBinding: glob: |- @@ -173,9 +170,30 @@ arguments: return "-Xmx15G" } } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/AddOrReplaceReadGroups.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: O= separate: false @@ -186,11 +204,12 @@ arguments: return inputs.input.basename.replace(/.sam$/, '_srt.bam'); } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..9c25bd7d --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/example_inputs.yaml @@ -0,0 +1,20 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl new file mode 100644 index 00000000..715b53bc --- /dev/null +++ b/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl @@ -0,0 +1,218 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: RGID= + separate: false + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequencing_center + type: string + inputBinding: + position: 0 + prefix: RGCN= + separate: false + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: string + inputBinding: + position: 0 + prefix: RGLB= + separate: false + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: RGPU= + separate: false + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: RGSM= + separate: false + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: RGPL= + separate: false + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: RGDS= + separate: false + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: RGDT= + separate: false + doc: 'Read Group run date Default value: null.' + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: picard_add_or_replace_read_groups_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_2.21.2 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..aa9cd8b0 --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/example_inputs.yaml @@ -0,0 +1,22 @@ +bam_compression_level: +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_srt.bam +read_group_description: +read_group_identifier: test +read_group_library: 1 +read_group_platform_unit: bc01 +read_group_run_date: +read_group_sample_name: seracare +read_group_sequencing_platform: Illumina +read_group_sequnecing_center: msk +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl new file mode 100644 index 00000000..07d930e5 --- /dev/null +++ b/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl @@ -0,0 +1,235 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_add_or_replace_read_groups_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: Input file ( sam). Required. + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: '-SO' + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: read_group_identifier + type: string + inputBinding: + position: 0 + prefix: '--RGID' + doc: >- + Read Group ID Default value: 1. This option can be set to 'null' to clear + the default value Required + - id: read_group_sequencing_center + type: string + inputBinding: + position: 0 + prefix: '--RGCN' + doc: 'Read Group sequencing center name Default value: null. Required' + - id: read_group_library + type: string + inputBinding: + position: 0 + prefix: '--RGLB' + doc: Read Group Library. Required + - id: read_group_platform_unit + type: string + inputBinding: + position: 0 + prefix: '--RGPU' + doc: Read Group platform unit (eg. run barcode) Required. + - id: read_group_sample_name + type: string + inputBinding: + position: 0 + prefix: '--RGSM' + doc: Read Group sample name. Required + - id: read_group_sequencing_platform + type: string + inputBinding: + position: 0 + prefix: '--RGPL' + doc: 'Read Group platform (e.g. illumina, solid) Required.' + - id: read_group_description + type: string? + inputBinding: + position: 0 + prefix: '--RGDS' + doc: 'Read Group description Default value: null.' + - id: read_group_run_date + type: string? + inputBinding: + position: 0 + prefix: '--RGDT' + doc: 'Read Group run date Default value: null.' + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_add_or_replace_read_groups_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } + secondaryFiles: + - ^.bai +label: picard_add_or_replace_read_groups_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: AddOrReplaceReadGroups + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name) + return inputs.output_file_name; + return inputs.input.basename.replace(/.sam$/, '_srt.bam'); + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..f8a51739 --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/example_inputs.yaml @@ -0,0 +1,26 @@ +assume_sorted: true +bam_compression_level: null +create_bam_index: null +input: + class: File + metadata: {} + path: "/path/to/bam" + secondaryFiles: + - class: File + path: "/path/to/bam.bai" +max_insert_size: null +memory_overhead: null +memory_per_job: null +metrics_acciumulation_level: null +number_of_threads: null +output_file_name: null +reference_sequence: + class: File + metadata: {} + path: "/path/to/reference.fasta" + secondaryFiles: + - class: File + path: "/path/to/reference.dict" +stop_after: null +tmp_dir: null +validation_stringency: null diff --git a/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl new file mode 100644 index 00000000..24a6302c --- /dev/null +++ b/picard_collect_alignment_summary_metrics_2.21.2/picard_collect_alignment_summary_metrics_2.21.2.cwl @@ -0,0 +1,179 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: metrics_acciumulation_level + type: string? + inputBinding: + position: 0 + prefix: LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: max_insert_size + type: int? + inputBinding: + position: 0 + prefix: MAX_INSERT_SIZE= + separate: false + doc: >- + Paired-end reads above this insert size will be considered chimeric along + with inter-chromosomal pairs. Default value: 100000. This option can be + set to 'null' to clear the default value. + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: reference_sequence + type: File + inputBinding: + position: 0 + prefix: R= + separate: false + doc: >- + Reference sequence file. Note that while this argument isn't required, + without it only a small subset of the metrics will be calculated. Note + also that if a reference sequence is provided, it must be accompanied by a + sequence dictionary. Default value: null. + secondaryFiles: + - ^.dict + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. +outputs: + - id: picard_collect_alignment_summary_metrics_txt + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +label: picard_collect_alignment_summary_metrics_2.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectAlignmentSummaryMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') + } + } +requirements: + - class: ResourceRequirement + ramMin: 12000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl index 23ceec08..0c98a858 100644 --- a/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl +++ b/picard_collect_alignment_summary_metrics_2.8.1/picard_collect_alignment_summary_metrics_2.8.1.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_collect_alignment_summary_metrics_2.8.1 + sbg: 'https://www.sevenbridges.com/' +id: picard_collect_alignment_summary_metrics_2.21.2 baseCommand: - java inputs: @@ -95,7 +96,7 @@ inputs: Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value. outputs: - - id: alignment_metrics + - id: picard_collect_alignment_summary_metrics_txt type: File outputBinding: glob: |- @@ -106,7 +107,7 @@ outputs: return inputs.input.basename.replace(/.bam/,'_alignment_metrics.txt') } } -label: picard_collect_alignment_summary_metrics_2.8.1 +label: picard_collect_alignment_summary_metrics_2.21.2 arguments: - position: 0 valueFrom: |- @@ -137,7 +138,7 @@ arguments: } - position: 0 prefix: '-jar' - valueFrom: /usr/local/bin/picard.jar + valueFrom: /usr/picard/picard.jar - position: 0 valueFrom: CollectAlignmentSummaryMetrics - position: 0 @@ -156,7 +157,7 @@ requirements: ramMin: 12000 coresMin: 1 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'broadinstitute/picard:2.21.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -175,4 +176,4 @@ requirements: 'doap:release': - class: 'doap:Version' 'doap:name': picard - 'doap:revision': 2.8.1 + 'doap:revision': 2.21.2 diff --git a/picard_collectmultiplemetric_2.21.2/example_inputs.yml b/picard_collectmultiplemetric_2.21.2/example_inputs.yml new file mode 100644 index 00000000..72207111 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/example_inputs.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "Sample.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: diff --git a/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl new file mode 100644 index 00000000..0d841d74 --- /dev/null +++ b/picard_collectmultiplemetric_2.21.2/picard_collectmultiplemetrics_2.21.2.cwl @@ -0,0 +1,268 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collectmultiplemetrics_2.21.2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + separate: false + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + separate: false + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + separate: false + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + separate: false + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + separate: false + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + separate: false + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + separate: false + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: picard_collectmultiplemetrics_alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: picard_collectmultiplemetrics_error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: picard_collectmultiplemetrics_gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: picard_collectmultiplemetrics_insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: picard_collectmultiplemetrics_quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: picard_collectmultiplemetrics_quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' +label: picard_collectmultiplemetrices_2.21.2 +arguments: + - position: 0 + prefix: '' + separate: false + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + prefix: '' + separate: false + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_collectmultiplemetric_2.8.1/example_inputs.yml b/picard_collectmultiplemetric_2.8.1/example_inputs.yml new file mode 100644 index 00000000..72207111 --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/example_inputs.yml @@ -0,0 +1,18 @@ +input: + class: File + path: "Sample.bam" +assume_sorted: +bam_compression_level: +create_bam_index: +dbsnp_file: +file_extension: +include_unpaired: +intervals_file: +memory_overhead: +memory_per_job: +metric_accumulation_level: +number_of_threads: +output_file_name: +program_list: +stop_after: +validation_stringency: diff --git a/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl new file mode 100644 index 00000000..fa277b31 --- /dev/null +++ b/picard_collectmultiplemetric_2.8.1/picard_collectmultiplemetrics_2-8-1.cwl @@ -0,0 +1,268 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_collectmultiplemetrics_2_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + separate: false + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: stop_after + type: int? + inputBinding: + position: 0 + prefix: STOP_AFTER= + separate: false + doc: >- + Stop after processing N reads, mainly for debugging. Default value: 0. + This option can be set to 'null' to clear the default value. + - id: metric_accumulation_level + type: string? + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: file_extension + type: string? + inputBinding: + position: 0 + prefix: FILE_EXTENSION= + separate: false + doc: >- + Append the given file extension to all metric file names (ex. + OUTPUT.insert_size_metrics.EXT). None if null Default value: null. + - id: program_list + type: 'string[]?' + inputBinding: + position: 0 + prefix: PROGRAM= + separate: false + doc: >- + Set of metrics programs to apply during the pass through the SAM file. + Default value: [CollectAlignmentSummaryMetrics, + CollectBaseDistributionByCycle, CollectInsertSizeMetrics, + MeanQualityByCycle, QualityScoreDistribution]. This option can be set to + 'null' to clear the default value. Possible values: + {CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, + QualityScoreDistribution, MeanQualityByCycle, + CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, + CollectSequencingArtifactMetrics, CollectQualityYieldMetrics} This option + may be specified 0 or more times. This option can be set to 'null' to + clear the default list. + - id: intervals_file + type: File? + inputBinding: + position: 0 + prefix: INTERVALS= + separate: false + doc: >- + An optional list of intervals to restrict analysis to. Only pertains to + some of the PROGRAMs. Programs whose stand-alone CLP does not have an + INTERVALS argument will silently ignore this argument. Default value: + null. + - id: dbsnp_file + type: File? + inputBinding: + position: 0 + prefix: DB_SNP= + separate: false + doc: >- + VCF format dbSNP file, used to exclude regions around known polymorphisms + from analysis by some PROGRAMs; PROGRAMs whose CLP doesn't allow for this + argument will quietly ignore it. Default value: null. + - id: include_unpaired + type: boolean? + inputBinding: + position: 0 + prefix: INCLUDE_UNPAIRED=true + separate: false + doc: >- + Include unpaired reads in CollectSequencingArtifactMetrics. If set to true + then all paired reads will be included as well - MINIMUM_INSERT_SIZE and + MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics. + Default value: false. This option can be set to 'null' to clear the + default value. Possible values: {true, false} +outputs: + - id: picard_collectmultiplemetrics_alignment_summary_metrics + type: File? + outputBinding: + glob: '*alignment_summary_metrics' + - id: picard_collectmultiplemetrics_bait_bias_detail_metrics + type: File? + outputBinding: + glob: '*bait_bias_detail_metrics' + - id: picard_collectmultiplemetrics_bait_bias_summary_metrics + type: File? + outputBinding: + glob: '*bait_bias_summary_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_metrics + type: File? + outputBinding: + glob: '*base_distribution_by_cycle_metrics' + - id: picard_collectmultiplemetrics_base_distribution_by_cycle_pdf + type: File? + outputBinding: + glob: '*base_distribution_by_cycle.pdf' + - id: picard_collectmultiplemetrics_error_summary_metrics + type: File? + outputBinding: + glob: '*error_summary_metrics' + - id: picard_collectmultiplemetrics_gc_bias_detail_metrics + type: File? + outputBinding: + glob: '*gc_bias.detail_metrics' + - id: picard_collectmultiplemetrics_gc_bias_pdf + type: File? + outputBinding: + glob: '*gc_bias.pdf' + - id: picard_collectmultiplemetrics_gc_bias_summary_metrics + type: File? + outputBinding: + glob: '*gc_bias.summary_metrics' + - id: picard_collectmultiplemetrics_insert_size_histogram_pdf + type: File? + outputBinding: + glob: '*insert_size_histogram.pdf' + - id: picard_collectmultiplemetrics_insert_size_metrics + type: File? + outputBinding: + glob: '*insert_size_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_detail_metrics + type: File? + outputBinding: + glob: '*pre_adapter_detail_metrics' + - id: picard_collectmultiplemetrics_pre_adapter_summary_metrics + type: File? + outputBinding: + glob: '*pre_adapter_summary_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_metrics + type: File? + outputBinding: + glob: '*quality_by_cycle_metrics' + - id: picard_collectmultiplemetrics_quality_by_cycle_pdf + type: File? + outputBinding: + glob: '*quality_by_cycle.pdf' + - id: picard_collectmultiplemetrics_quality_distribution_metrics + type: File? + outputBinding: + glob: '*quality_distribution_metrics' + - id: picard_collectmultiplemetrics_quality_distribution_pdf + type: File? + outputBinding: + glob: '*quality_distribution.pdf' +label: picard_collectmultiplemetrices_2.8.1 +arguments: + - position: 0 + prefix: '' + separate: false + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + prefix: '' + separate: false + valueFrom: CollectMultipleMetrics + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_multiple_metrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 10000 + coresMin: 8 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.3' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sumans@mskcc.org' + 'foaf:name': Shalabh Suman + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.8.1 diff --git a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl index cfc74168..d1b9c299 100644 --- a/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl +++ b/picard_fix_mate_information_1.96/picard_fix_mate_information_1.96.cwl @@ -1,10 +1,12 @@ class: CommandLineTool cwlVersion: v1.0 $namespaces: + cwltool: 'http://commonwl.org/cwltool#' dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_fix_mate_information_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_1_96 baseCommand: - java inputs: @@ -27,8 +29,7 @@ inputs: - ^.bai - id: output_file_name type: string? - doc: >- - Output file name (bam or sam). Not Required + doc: Output file name (bam or sam). Not Required - id: sort_order type: string? inputBinding: @@ -39,13 +40,6 @@ inputs: Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.Default value: null. Possible values: {unsorted, queryname, coordinate} - - id: tmp_dir - type: string? - inputBinding: - position: 0 - prefix: TMP_DIR= - separate: false - doc: This option may be specified 0 or more times - id: validation_stringency type: string? inputBinding: @@ -78,8 +72,11 @@ inputs: Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' outputs: - - id: bam + - id: picard_fix_mate_information_bam type: File outputBinding: glob: |- @@ -120,9 +117,30 @@ arguments: return "-Xmx15G" } } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: '-jar' valueFrom: /usr/local/bin/FixMateInformation.jar + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } - position: 0 prefix: O= separate: false @@ -135,11 +153,12 @@ arguments: } } requirements: + - class: ShellCommandRequirement - class: ResourceRequirement - ramMin: 16000 + ramMin: 25000 coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_fix_mate_information_2.21.2/example_inputs.yaml b/picard_fix_mate_information_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..1d9e4ee2 --- /dev/null +++ b/picard_fix_mate_information_2.21.2/example_inputs.yaml @@ -0,0 +1,12 @@ +bam_compression_level: +create_bam_index: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl new file mode 100644 index 00000000..731652ae --- /dev/null +++ b/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl @@ -0,0 +1,166 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_2.21.2 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + valueFrom: "-XX:-UseGCOverheadLimit" + shellQuote: false + - position: 0 + valueFrom: "-Djava.io.tmpdir=$(runtime.tmpdir)" + shellQuote: false + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: "$(runtime.tmpdir)" + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_fix_mate_information_2.9.0/README.md b/picard_fix_mate_information_2.9.0/README.md new file mode 100644 index 00000000..3485d823 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/README.md @@ -0,0 +1,84 @@ +# CWL and Dockerfile for running Picard - FixMateInformation + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_fix_mate_information_2.9.0.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_jobStore.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr & +``` + +### Usage + +``` +usage: picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] [--sort_order SORT_ORDER] + [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--temporary_directory TEMPORARY_DIRECTORY] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT The input file to fix. This option may be specified 0 + or more times + --output_file_name OUTPUT_FILE_NAME + Output file name (bam or sam). Not Required + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --temporary_directory TEMPORARY_DIRECTORY + Default value: null. This option may be specified 0 or + more times. +``` diff --git a/picard_fix_mate_information_2.9.0/container/Dockerfile b/picard_fix_mate_information_2.9.0/container/Dockerfile new file mode 100644 index 00000000..643fa70d --- /dev/null +++ b/picard_fix_mate_information_2.9.0/container/Dockerfile @@ -0,0 +1,44 @@ +################## BASE IMAGE ###################### + +FROM openjdk:8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION +ARG JAVA_VERSION=8 +ARG LICENSE="Apache-2.0" +ARG PICARD_VERSION=2.9.0 +ARG R_VERSION="3.3.3" + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Ronak H Shah (shahr2@mskcc.org)" + +LABEL org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses=${LICENSE} \ + org.opencontainers.image.version.picard=${PICARD_VERSION} \ + org.opencontainers.image.version.R=${R_VERSION} \ + org.opencontainers.image.version.java=${JAVA_VERSION} \ + org.opencontainers.image.source.picard="https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" \ + org.opencontainers.image.source.R="r-base" + +LABEL org.opencontainers.image.description="This container uses openjdk ${JAVA_VERSION} as the base image to build \ + picard version ${PICARD_VERSION}, \ + R version ${R_VERSION}" + +# Install ant, git for building +RUN apt-get update && \ + apt-get --no-install-recommends install -y \ + git \ + unzip \ + wget \ + r-base && \ + apt-get clean autoclean && \ + apt-get autoremove -y + +WORKDIR /usr/src + +RUN wget "https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" && \ + cp -s /usr/src/picard.jar /usr/local/bin/ diff --git a/picard_fix_mate_information_2.9.0/example_inputs.yaml b/picard_fix_mate_information_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..91d8e497 --- /dev/null +++ b/picard_fix_mate_information_2.9.0/example_inputs.yaml @@ -0,0 +1,5 @@ +create_bam_index: true +input: + class: File + path: "/path/to/sample_id.bam" +output_file_name: sample_id_fm.bam diff --git a/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl new file mode 100644 index 00000000..f0f5eb7a --- /dev/null +++ b/picard_fix_mate_information_2.9.0/picard_fix_mate_information_2.9.0.cwl @@ -0,0 +1,182 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + cwltool: 'http://commonwl.org/cwltool#' + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: The input file to fix. This option may be specified 0 or more times + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_2.9.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx15G" + } + else { + return "-Xmx15G" + } + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-Djava.io.tmpdir=' + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: TMP_DIR= + separate: false + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 25000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 diff --git a/picard_fix_mate_information_4.1.8.1/example_inputs.yaml b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..a6581208 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/example_inputs.yaml @@ -0,0 +1,14 @@ +bam_compression_level: +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +input: + class: File + path: "/path/to/bam" +memory_overhead: +memory_per_job: +number_of_threads: +output_file_name: somename_fm.bam +sort_order: +tmp_dir: +validation_stringency: diff --git a/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl new file mode 100644 index 00000000..5140be34 --- /dev/null +++ b/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl @@ -0,0 +1,180 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_fix_mate_information_4_1_8_1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: '-I' + doc: The input file to fix. This option may be specified 0 or more times + - id: output_file_name + type: string? + doc: Output file name (bam or sam). Not Required + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: '-SO' + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: '--VALIDATION_STRINGENCY' + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: '--COMPRESSION_LEVEL' + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_DEFLATER' + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed + output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: '--USE_JDK_INFLATER' + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed + input + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: '--CREATE_INDEX' + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_fix_mate_information_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } + secondaryFiles: + - ^.bai +label: picard_fix_mate_information_4.1.8.1 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx20G" + } + else { + return "-Xmx20G" + } + } + - position: 0 + shellQuote: false + valueFrom: '-XX:-UseGCOverheadLimit' + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: FixMateInformation + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } + - position: 0 + prefix: '-O' + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_fm.bam') + } + } +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 30000 + coresMin: 12 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/picard_hsmetrics_2.21.2/example_inputs.yaml b/picard_hsmetrics_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.21.2/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl new file mode 100644 index 00000000..10503349 --- /dev/null +++ b/picard_hsmetrics_2.21.2/picard_hsmetrics_2.21.2.cwl @@ -0,0 +1,206 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2_21_2 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + doc: >- + An interval list file that contains the locations of the baits used. + Default value: null. This option must be specified at least 1 times. + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + doc: >- + Bait set name. If not provided it is inferred from the filename of the + bait intervals. Default value: null + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + doc: >- + Minimum mapping quality for a read to contribute coverage. Default value: + 20. This option can be set to 'null' to clear the default value. + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + doc: >- + Minimum base quality for a base to contribute coverage. Default value: 20. + This option can be set to 'null' to clear the default value. + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + doc: >- + True if we are to clip overlapping reads, false otherwise. Default value: + true. This option can be set to 'null' to clear the default value. + Possible values: {true, false} + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + doc: >- + An interval list file that contains the locations of the targets. Default + value: null. This option must be specified at least 1 times. + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + doc: An aligned SAM or BAM file. Required. + secondaryFiles: + - ^.bai + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + doc: The output file to write the metrics to. Required. + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + doc: >- + The level(s) at which to accumulate metrics. Default value: [ALL_READS]. + This option can be set to 'null' to clear the default value. Possible + values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be + specified 0 or more times. This option can be set to 'null' to clear the + default list. + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + doc: >- + An optional file to output per target coverage information to. Default + value: null. + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + doc: >- + An optional file to output per base coverage information to. The per-base + file contains one line per target base and can grow very large. It is not + recommended for use with large target sets. Default value: null. + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + doc: >- + The maximum distance between a read and the nearest probe/bait/amplicon + for the read to be considered 'near probe' and included in percent + selected. Default value: 250. This option can be set to 'null' to clear + the default value. + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + doc: >- + Parameter to set a max coverage limit for Theoretical Sensitivity + calculations. Default is 200. Default value: 200. This option can be set + to 'null' to clear the default value. + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false + doc: >- + Sample Size used for Theoretical Het Sensitivity sampling. Default is + 10000. Default value: 10000. This option can be set to 'null' to clear the + default value. +outputs: + - id: picard_hsmetrics_txt + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +label: picard_hsmetrics_2.21.2 +arguments: + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': hsmetrics + 'doap:revision': 2.21.2 diff --git a/picard_hsmetrics_2.8.1/example_inputs.yaml b/picard_hsmetrics_2.8.1/example_inputs.yaml new file mode 100644 index 00000000..0ab1e497 --- /dev/null +++ b/picard_hsmetrics_2.8.1/example_inputs.yaml @@ -0,0 +1,24 @@ +bait_intervals: + class: File + metadata: {} + path: 'picard_baits.interval_list' + secondaryFiles: [] +bait_set_name: null +clip_overlapping_reads: null +coverage_cap: null +input: + class: File + path: 'test_bam.bam' +metric_accumulation_level: null +minimum_base_quality: null +minimum_mapping_quality: null +near_distance: null +output_file_name: null +per_base_coverage: null +per_target_coverage: null +sample_size: null +target_intervals: + class: File + metadata: {} + path: 'picard_targets.interval_list' + secondaryFiles: [] diff --git a/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl new file mode 100644 index 00000000..e24d9fbf --- /dev/null +++ b/picard_hsmetrics_2.8.1/picard_hsmetrics_2.8.1.cwl @@ -0,0 +1,158 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_hsmetrics_2_8_1 +baseCommand: + - java +inputs: + - id: bait_intervals + type: File + inputBinding: + position: 0 + prefix: BAIT_INTERVALS= + separate: false + - id: bait_set_name + type: string? + inputBinding: + position: 0 + prefix: BAIT_SET_NAME= + separate: false + - id: minimum_mapping_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_MAPPING_QUALITY= + separate: false + - id: minimum_base_quality + type: int? + inputBinding: + position: 0 + prefix: MINIMUM_BASE_QUALITY= + separate: false + - id: clip_overlapping_reads + type: boolean? + inputBinding: + position: 0 + prefix: CLIP_OVERLAPPING_READS=true + separate: false + - id: target_intervals + type: File? + inputBinding: + position: 0 + prefix: TARGET_INTERVALS= + separate: false + - id: input + type: File + inputBinding: + position: 0 + prefix: INPUT= + separate: false + - id: output_file_name + type: string? + inputBinding: + position: 0 + prefix: OUTPUT= + separate: false + - id: metric_accumulation_level + type: + - 'null' + - type: enum + symbols: + - ALL_READS + - SAMPLE + - LIBRARY + - READ_GROUP + name: metric_accumulation_level + inputBinding: + position: 0 + prefix: METRIC_ACCUMULATION_LEVEL= + separate: false + - id: per_target_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_TARGET_COVERAGE= + separate: false + - id: per_base_coverage + type: File? + inputBinding: + position: 0 + prefix: PER_BASE_COVERAGE= + separate: false + - id: near_distance + type: int? + inputBinding: + position: 0 + prefix: NEAR_DISTANCE= + separate: false + - id: coverage_cap + type: int? + inputBinding: + position: 0 + prefix: COVERAGE_CAP= + separate: false + - id: sample_size + type: int? + inputBinding: + position: 0 + prefix: SAMPLE_SIZE= + separate: false +outputs: + - id: picard_hsmetrics_txt + type: File? + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +label: picard_hsmetrics_2.8.1 +arguments: + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: CollectHsMetrics + - position: 0 + prefix: OUTPUT= + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'.hsmetrics') + } + } +requirements: + - class: ResourceRequirement + ramMin: 4000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'mskaccess/picard:0.6.2' + - class: InlineJavascriptRequirement + +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:johnsoni@mskcc.org' + 'foaf:name': Ian Johnson + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': hsmetrics + 'doap:revision': 2.8.1 diff --git a/picard_mark_duplicates_1.96/README.md b/picard_mark_duplicates_1.96/README.md deleted file mode 100644 index bb651139..00000000 --- a/picard_mark_duplicates_1.96/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml -``` diff --git a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl index f6b19306..186f7a0b 100644 --- a/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl +++ b/picard_mark_duplicates_1.96/picard_mark_duplicates_1.96.cwl @@ -4,7 +4,8 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' -id: picard_mark_duplicates_1.96 + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_1_96 baseCommand: - java inputs: @@ -96,7 +97,7 @@ inputs: position: 0 prefix: AS=true outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(/.bam/, ''_md.bam''))' @@ -115,7 +116,7 @@ requirements: ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/picard_1.96:0.1.0' + dockerPull: 'mskaccess/picard_1.96:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/picard_mark_duplicates_2.21.2/example_inputs.yaml b/picard_mark_duplicates_2.21.2/example_inputs.yaml new file mode 100644 index 00000000..4cb5941e --- /dev/null +++ b/picard_mark_duplicates_2.21.2/example_inputs.yaml @@ -0,0 +1,15 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +tmp_dir: null +validation_stringency: null diff --git a/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl new file mode 100644 index 00000000..e5869717 --- /dev/null +++ b/picard_mark_duplicates_2.21.2/picard_mark_duplicates_2.21.2.cwl @@ -0,0 +1,181 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_21_2 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: ASO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + separate: false + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + separate: false + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: picard_mark_duplicates_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_2.21.2 +arguments: + - position: 0 + valueFrom: "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n \n if(inputs.memory_per_job % 1000 == 0) {\n \t\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n \n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" \n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n \n return \"-Xmx15G\"\n }\n else {\n \n \treturn \"-Xmx15G\"\n }\n}" + - position: 0 + prefix: '-jar' + valueFrom: /usr/picard/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'broadinstitute/picard:2.21.2' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.21.2 diff --git a/picard_mark_duplicates_2.8.1/README.md b/picard_mark_duplicates_2.8.1/README.md deleted file mode 100644 index 173d0b7e..00000000 --- a/picard_mark_duplicates_2.8.1/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# CWL and Dockerfile for running Picard - MarkDuplicates - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| java base image | 8 | - | -| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar | -| R | 3.3.3 | r-base for opnejdk:8 | - - -## CWL - -- CWL specification 1.0 -- Use example_inputs.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml -``` \ No newline at end of file diff --git a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl index ccc6165a..402a37fa 100644 --- a/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl +++ b/picard_mark_duplicates_2.8.1/picard_mark_duplicates_2.8.1.cwl @@ -4,6 +4,7 @@ $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' id: picard_mark_duplicates_2_8_1 baseCommand: - java @@ -114,7 +115,7 @@ inputs: experiment to find what works best. Default value: 100. This option can be set to 'null' to clear the default value. outputs: - - id: bam + - id: picard_mark_duplicates_bam type: File outputBinding: glob: |- @@ -150,10 +151,10 @@ arguments: } requirements: - class: ResourceRequirement - ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}" - coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 2\r }\r}" + ramMin: 17000 + coresMin: 2 - class: DockerRequirement - dockerPull: 'mskcc/picard:2.8.1' + dockerPull: 'mskaccess/picard:0.6.3' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -173,6 +174,3 @@ requirements: - class: 'doap:Version' 'doap:name': picard 'doap:revision': 2.8.1 - - class: 'doap:Version' - 'doap:name': cwl-wrapper - 'doap:revision': 1.0.0 diff --git a/picard_mark_duplicates_2.9.0/README.md b/picard_mark_duplicates_2.9.0/README.md new file mode 100644 index 00000000..ece95d84 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/README.md @@ -0,0 +1,91 @@ +# CWL and Dockerfile for running Picard - MarkDuplicates + +## Version of tools in docker image (./container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| java base image | 8 | - | +| picard | 2.9.0 | https://github.com/broadinstitute/picard/releases/download/2.9.0/picard.jar | +| R | 3.3.3 | r-base for openjdk:8 | + +[![](https://images.microbadger.com/badges/image/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own image badge on microbadger.com") [![](https://images.microbadger.com/badges/version/mskaccess/picard_2.9.0.svg)](https://microbadger.com/images/mskaccess/picard_2.9.0 "Get your own version badge on microbadger.com") + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner picard_mark_duplicates_2.9.0.cwl example_inputs.yaml +``` + +## Usage +``` +usage: picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl + [-h] [--memory_per_job MEMORY_PER_JOB] + [--memory_overhead MEMORY_OVERHEAD] + [--number_of_threads NUMBER_OF_THREADS] --input INPUT + [--output_file_name OUTPUT_FILE_NAME] + [--duplication_metrics DUPLICATION_METRICS] [--sort_order SORT_ORDER] + [--tmp_dir TMP_DIR] [--validation_stringency VALIDATION_STRINGENCY] + [--bam_compression_level BAM_COMPRESSION_LEVEL] [--create_bam_index] + [--assume_sorted] + [--duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY] + [--optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --number_of_threads NUMBER_OF_THREADS + --input INPUT Input file (bam or sam). Required. + --output_file_name OUTPUT_FILE_NAME + Output file (bam or sam). + --duplication_metrics DUPLICATION_METRICS + File to write duplication metrics to Required. + --sort_order SORT_ORDER + Optional sort order to output in. If not supplied + OUTPUT is in the same order as INPUT.Default value: + null. Possible values: {unsorted, queryname, + coordinate} + --tmp_dir TMP_DIR This option may be specified 0 or more times + --validation_stringency VALIDATION_STRINGENCY + Validation stringency for all SAM files read by this + program. Setting stringency to SILENT can improve + performance when processing a BAM file in which + variable-length data (read, qualities, tags) do not + otherwise need to be decoded. Default value: STRICT. + This option can be set to 'null' to clear the default + value. Possible values: {STRICT,LENIENT, SILENT} + --bam_compression_level BAM_COMPRESSION_LEVEL + Compression level for all compressed files created + (e.g. BAM and GELI). Default value:5. This option can + be set to 'null' to clear the default value. + --create_bam_index Whether to create a BAM index when writing a + coordinate-sorted BAM file. Default value:false. This + option can be set to 'null' to clear the default + value. Possible values:{true, false} + --assume_sorted + --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY + The scoring strategy for choosing the non-duplicate + among candidates. Default value:SUM_OF_BASE_QUALITIES. + This option can be set to 'null' to clear the default + value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE + The maximum offset between two duplicate clusters in + order to consider them optical duplicates. The default + is appropriate for unpatterned versions of the + Illumina platform. For the patterned flowcell models, + 2500 is moreappropriate. For other platforms and + models, users should experiment to find what works + best. Default value: 100. This option can be set to + 'null' to clear the default value. +``` diff --git a/picard_mark_duplicates_2.9.0/example_inputs.yaml b/picard_mark_duplicates_2.9.0/example_inputs.yaml new file mode 100644 index 00000000..234d0b62 --- /dev/null +++ b/picard_mark_duplicates_2.9.0/example_inputs.yaml @@ -0,0 +1,7 @@ +create_bam_index: true +duplication_metrics: mark_duplicates_md.metrics +input: + class: File + path: "path/to/sample_id.bam" +optical_duplicate_pixel_distance: 2500 +output: mark_duplicates_md.bam diff --git a/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl new file mode 100644 index 00000000..7c032bce --- /dev/null +++ b/picard_mark_duplicates_2.9.0/picard_mark_duplicates_2.9.0.cwl @@ -0,0 +1,206 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_2_9_0 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: I= + separate: false + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: M= + separate: false + valueFrom: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + doc: File to write duplication metrics to Required. + - id: sort_order + type: string? + inputBinding: + position: 0 + prefix: SO= + separate: false + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: TMP_DIR= + separate: false + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: VALIDATION_STRINGENCY= + separate: false + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: COMPRESSION_LEVEL= + separate: false + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: CREATE_INDEX=true + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - default: true + id: assume_sorted + type: boolean? + inputBinding: + position: 0 + prefix: AS=true + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: DUPLICATE_SCORING_STRATEGY= + separate: false + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: OPTICAL_DUPLICATE_PIXEL_DISTANCE= + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: READ_NAME_REGEX= + separate: false +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - ^.metrics +label: picard_mark_duplicates_2.9.0 +arguments: + - position: 0 + valueFrom: |- + ${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G" + } + else { + return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return "-Xmx8G" + } + else { + return "-Xmx8G" + } + } + - position: 0 + prefix: '-jar' + valueFrom: /usr/local/bin/picard.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: O= + separate: false + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'mskaccess/picard_2.9.0:0.1.0' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 2.9.0 diff --git a/picard_mark_duplicates_4.1.8.1/example_inputs.yaml b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml new file mode 100644 index 00000000..08a4e56d --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/example_inputs.yaml @@ -0,0 +1,19 @@ +assume_sort_order: coordinate +bam_compression_level: null +create_bam_index: true +use_jdk_deflater: true +use_jdk_inflater: true +sorting_collection_size_ratio: 0.25 +read_name_regex: 'null' +duplicate_scoring_strategy: null +duplication_metrics: test_metrics.txt +input: + class: File + path: /path/to/file.bam +memory_overhead: null +memory_per_job: null +number_of_threads: null +optical_duplicate_pixel_distance: null +output_file_name: null +validation_stringency: null + diff --git a/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl new file mode 100644 index 00000000..c6418246 --- /dev/null +++ b/picard_mark_duplicates_4.1.8.1/picard_mark_duplicates_4.1.8.1.cwl @@ -0,0 +1,251 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: picard_mark_duplicates_4.1.8.1 +baseCommand: + - java +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: input + type: File + inputBinding: + position: 0 + prefix: -I + doc: Input file (bam or sam). Required. + - id: output_file_name + type: string? + doc: Output file (bam or sam). + - default: '$( inputs.input.basename.replace(/.bam/, ''_md.metrics'') )' + id: duplication_metrics + type: string + inputBinding: + position: 0 + prefix: -M + doc: File to write duplication metrics to Required. + - id: assume_sort_order + type: string? + inputBinding: + position: 0 + prefix: -ASO + doc: >- + Optional sort order to output in. If not supplied OUTPUT is in the same + order as INPUT.Default value: null. Possible values: {unsorted, queryname, + coordinate} + - id: tmp_dir + type: string? + inputBinding: + position: 0 + prefix: --TMP_DIR + doc: This option may be specified 0 or more times + - id: validation_stringency + type: string? + inputBinding: + position: 0 + prefix: --VALIDATION_STRINGENCY + doc: >- + Validation stringency for all SAM files read by this program. Setting + stringency to SILENT can improve performance when processing a BAM file in + which variable-length data (read, qualities, tags) do not otherwise need + to be decoded. Default value: STRICT. This option can be set to 'null' to + clear the default value. Possible values: {STRICT,LENIENT, SILENT} + - id: bam_compression_level + type: int? + inputBinding: + position: 0 + prefix: --COMPRESSION_LEVEL + doc: >- + Compression level for all compressed files created (e.g. BAM and GELI). + Default value:5. This option can be set to 'null' to clear the default + value. + - default: true + id: create_bam_index + type: boolean? + inputBinding: + position: 0 + prefix: --CREATE_INDEX + doc: >- + Whether to create a BAM index when writing a coordinate-sorted BAM file. + Default value:false. This option can be set to 'null' to clear the default + value. Possible values:{true, false} + - id: read_name_regex + type: string? + inputBinding: + position: 0 + prefix: --READ_NAME_REGEX + doc: >- + MarkDuplicates can use the tile and cluster positions to estimate the rate of + optical duplication in addition to the dominant source of duplication, PCR, + to provide a more accurate estimation of library size. By default (with no + READ_NAME_REGEX specified), MarkDuplicates will attempt to extract coordinates + using a split on ':' (see Note below). Set READ_NAME_REGEX to 'null' to + disable optical duplicate detection. Note that without optical duplicate + counts, library size estimation will be less accurate. If the read name does + not follow a standard Illumina colon-separation convention, but does contain + tile and x,y coordinates, a regular expression can be specified to extract + three variables: tile/region, x coordinate and y coordinate from a read name. + The regular expression must contain three capture groups for the three variables, + in order. It must match the entire read name. e.g. if field names were separated + by semi-colon (';') this example regex could be specified + (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ Note that if no + READ_NAME_REGEX is specified, the read name is split on ':'. For 5 element names, + the 3rd, 4th and 5th elements are assumed to be tile, x and y values. For 7 + element names (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be + tile, x and y values. + - id: sorting_collection_size_ratio + type: int? + inputBinding: + position: 0 + prefix: --SORTING_COLLECTION_SIZE_RATIO + doc: >- + This number, plus the maximum RAM available to the JVM, determine the memory + footprint used by some of the sorting collections. If you are running out of memory, try reducing this number. + - id: use_jdk_deflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_DEFLATER + doc: >- + Use the JDK Deflater instead of the Intel Deflater for writing compressed output + - id: use_jdk_inflater + type: boolean? + inputBinding: + position: 0 + prefix: --USE_JDK_INFLATER + doc: >- + Use the JDK Inflater instead of the Intel Inflater for reading compressed input + - id: duplicate_scoring_strategy + type: string? + inputBinding: + position: 0 + prefix: --DUPLICATE_SCORING_STRATEGY + doc: >- + The scoring strategy for choosing the non-duplicate among candidates. + Default value:SUM_OF_BASE_QUALITIES. This option can be set to 'null' to + clear the default value.Possible values: {SUM_OF_BASE_QUALITIES, + TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM} + - id: optical_duplicate_pixel_distance + type: int? + inputBinding: + position: 0 + prefix: --OPTICAL_DUPLICATE_PIXEL_DISTANCE + doc: >- + The maximum offset between two duplicate clusters in order to consider + them optical duplicates. The default is appropriate for unpatterned + versions of the Illumina platform. For the patterned flowcell models, 2500 + is moreappropriate. For other platforms and models, users should + experiment to find what works best. Default value: 100. This option can + be set to 'null' to clear the default value. + - id: temporary_directory + type: string? + doc: 'Default value: null. This option may be specified 0 or more times.' +outputs: + - id: picard_mark_duplicates_bam + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + secondaryFiles: + - ^.bai + - id: picard_mark_duplicates_metrics + type: File + outputBinding: + glob: |- + ${ + if(inputs.duplication_metrics){ + return inputs.duplication_metrics + } else { + return inputs.input.basename.replace(/.bam/,'_md.metrics') + } + } +label: picard_mark_duplicates_4.1.8.1 +arguments: + - position: 0 + valueFrom: "${ + if(inputs.memory_per_job && inputs.memory_overhead) { + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if (inputs.memory_per_job && !inputs.memory_overhead){ + if(inputs.memory_per_job % 1000 == 0) { + return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" + } + else { + return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" + } + } + else if(!inputs.memory_per_job && inputs.memory_overhead){ + return \"-Xmx15G\" + } + else { + return \"-Xmx15G\" + } + }" + - position: 0 + prefix: '-jar' + valueFrom: /gatk/gatk-package-4.1.8.1-local.jar + - position: 0 + valueFrom: MarkDuplicates + - position: 0 + prefix: -O + valueFrom: |- + ${ + if(inputs.output_file_name){ + return inputs.output_file_name + } else { + return inputs.input.basename.replace(/.bam/,'_md.bam') + } + } + - position: 0 + prefix: '--TMP_DIR' + valueFrom: |- + ${ + if(inputs.temporary_directory) + return inputs.temporary_directory; + return runtime.tmpdir + } +requirements: + - class: ResourceRequirement + ramMin: 17000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/gatk:4.1.8.1' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': picard + 'doap:revision': 4.1.8.1 diff --git a/postprocessing_variant_calls/0.1.4/container/README.md b/postprocessing_variant_calls/0.1.4/container/README.md new file mode 100644 index 00000000..719e61ed --- /dev/null +++ b/postprocessing_variant_calls/0.1.4/container/README.md @@ -0,0 +1,3 @@ +Dockerfile avaliable via the postprocessing_variant_calls repository: https://github.com/msk-access/postprocessing_variant_calls/blob/main/Dockerfile + +Registry contaning image also housed with postprocessing_variant_calls repository: https://github.com/msk-access/postprocessing_variant_calls/pkgs/container/postprocessing_variant_calls diff --git a/postprocessing_variant_calls/0.1.4/example_input_case-control_filter.yml b/postprocessing_variant_calls/0.1.4/example_input_case-control_filter.yml new file mode 100644 index 00000000..5750edfb --- /dev/null +++ b/postprocessing_variant_calls/0.1.4/example_input_case-control_filter.yml @@ -0,0 +1,8 @@ +inputVCF: {class: File, path: path/tests/case-controll_test.vcf} +tsampleName: "tumorSampleName" +alleledepth: 1 +totalDepth: 20 +tnRatio: 1 +variantFraction: 5e-05 +minQual: 0 +filterGermline: False diff --git a/postprocessing_variant_calls/0.1.4/example_input_single_filter.yml b/postprocessing_variant_calls/0.1.4/example_input_single_filter.yml new file mode 100644 index 00000000..bbe015ee --- /dev/null +++ b/postprocessing_variant_calls/0.1.4/example_input_single_filter.yml @@ -0,0 +1,8 @@ +inputVCF: {class: File, path: path/tests/single_test.vcf} +tsampleName: "tumorSampleName" +alleledepth: 1 +totalDepth: 20 +tnRatio: 1 +variantFraction: 5e-05 +minQual: 0 +filterGermline: False diff --git a/postprocessing_variant_calls/0.1.4/pv_vardict_case-control_filter.cwl b/postprocessing_variant_calls/0.1.4/pv_vardict_case-control_filter.cwl new file mode 100644 index 00000000..961f1002 --- /dev/null +++ b/postprocessing_variant_calls/0.1.4/pv_vardict_case-control_filter.cwl @@ -0,0 +1,115 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - pv +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: inputVCF + type: File + inputBinding: + position: 0 + prefix: '-i' + shellQuote: false + doc: Input vardict vcf to be filtered + - id: tsampleName + type: string + inputBinding: + position: 1 + prefix: '--tsampleName' + shellQuote: false + doc: Name of the tumor Sample + - id: alleledepth + type: int + inputBinding: + position: 2 + prefix: '-ad' + shellQuote: false + - id: totalDepth + type: int? + inputBinding: + position: 2 + prefix: '-dp' + shellQuote: false + doc: Tumor total depth threshold + - id: tnRatio + type: int? + inputBinding: + position: 2 + prefix: '-tnr' + shellQuote: false + doc: Tumor-Normal variant fraction ratio threshold + - id: variantFraction + type: float? + inputBinding: + position: 2 + prefix: '-vf' + shellQuote: false + doc: Tumor variant fraction threshold + - id: minQual + type: int? + inputBinding: + position: 2 + prefix: '-mq' + doc: Minimum variant call quality + - id: filterGermline + type: boolean? + inputBinding: + position: 2 + prefix: '-fg' + doc: Whether to remove calls without 'somatic' status +outputs: + - id: txt + type: File + outputBinding: + glob: '*_STDfilter.txt' + - id: vcf_complex + type: File + outputBinding: + glob: '*_STDfilter_complex.vcf' + - id: vcf + type: File + outputBinding: + glob: '*_STDfilter.vcf' +arguments: + - vardict + - case-control + - filter +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 40000 + coresMin: 4 + - class: InlineJavascriptRequirement +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/postprocessing_variant_calls:0.1.4' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': postprocessing_variant_calls + 'doap:revision': 0.0.1 diff --git a/postprocessing_variant_calls/0.1.4/pv_vardict_single_filter.cwl b/postprocessing_variant_calls/0.1.4/pv_vardict_single_filter.cwl new file mode 100644 index 00000000..a910a4d2 --- /dev/null +++ b/postprocessing_variant_calls/0.1.4/pv_vardict_single_filter.cwl @@ -0,0 +1,115 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +baseCommand: + - pv +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: number_of_threads + type: int? + - id: inputVCF + type: File + inputBinding: + position: 0 + prefix: '-i' + shellQuote: false + doc: Input vardict vcf to be filtered + - id: tsampleName + type: string + inputBinding: + position: 1 + prefix: '--tsampleName' + shellQuote: false + doc: Name of the tumor Sample + - id: alleledepth + type: int + inputBinding: + position: 2 + prefix: '-ad' + shellQuote: false + - id: totalDepth + type: int? + inputBinding: + position: 2 + prefix: '-dp' + shellQuote: false + doc: Tumor total depth threshold + - id: tnRatio + type: int? + inputBinding: + position: 2 + prefix: '-tnr' + shellQuote: false + doc: Tumor-Normal variant fraction ratio threshold + - id: variantFraction + type: float? + inputBinding: + position: 2 + prefix: '-vf' + shellQuote: false + doc: Tumor variant fraction threshold + - id: minQual + type: int? + inputBinding: + position: 2 + prefix: '-mq' + doc: Minimum variant call quality + - id: filterGermline + type: boolean? + inputBinding: + position: 2 + prefix: '-fg' + doc: Whether to remove calls without 'somatic' status +outputs: + - id: txt + type: File + outputBinding: + glob: '*_STDfilter.txt' + - id: vcf_complex + type: File + outputBinding: + glob: '*_STDfilter_complex.vcf' + - id: vcf + type: File + outputBinding: + glob: '*_STDfilter.vcf' +arguments: + - vardict + - single + - filter +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 40000 + coresMin: 4 + - class: InlineJavascriptRequirement +hints: + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/postprocessing_variant_calls:0.1.4' +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:buehlere@mskcc.org' + 'foaf:name': Eric Buehler + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': postprocessing_variant_calls + 'doap:revision': 0.0.1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..946e86c6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +toil-ionox0[cwl]==0.0.7 +pytz +typing==3.7.4 + +# From fixing pkg_resources.ContextualVersionConflict: +ruamel.yaml==0.15.77 + +# From requirements_dev +pip==21.1 +bumpversion==0.5.3 +wheel==0.32.1 +watchdog==0.9.0 +flake8==3.5.0 +tox==3.5.2 +coverage==4.5.1 +Sphinx==1.8.1 +twine==1.12.1 +pytest==3.8.2 +pytest-runner==4.2 +coloredlogs==10.0.0 diff --git a/requirements_p2.7.txt b/requirements_p2.7.txt new file mode 100644 index 00000000..24958578 --- /dev/null +++ b/requirements_p2.7.txt @@ -0,0 +1,14 @@ +toil-ionox0[cwl]==0.0.7 +pytz==2019.2 +typing==3.7.4.1 +ruamel.yaml==0.15.77 +bumpversion==0.5.3 +watchdog==0.9.0 +flake8==3.7.8 +tox==3.14.0 +coverage==4.5.4 +twine==1.15.0 +pytest==4.6.5 +pytest-runner==5.1 +coloredlogs==10.0 +pathlib2==2.3.2 diff --git a/samtools-merge_1.9/samtools-merge_1.9.cwl b/samtools-merge_1.9/samtools-merge_1.9.cwl index 5746b362..b7b5fbbc 100644 --- a/samtools-merge_1.9/samtools-merge_1.9.cwl +++ b/samtools-merge_1.9/samtools-merge_1.9.cwl @@ -11,7 +11,7 @@ inputs: position: 2 doc: Input array containing files to be merged outputs: - - id: output_file + - id: samtools_merge_bam type: File outputBinding: glob: '*merged.bam' diff --git a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl index eb0eaeb9..d3ce72f1 100644 --- a/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl +++ b/samtools_sort_1.3.1/samtools_sort_1.3.1.cwl @@ -1,6 +1,5 @@ class: CommandLineTool cwlVersion: v1.0 -$namespaces: baseCommand: - samtools - sort @@ -43,7 +42,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_file + - id: samtools_sort_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''bam'', ''sorted.bam''))' diff --git a/samtools_view_1.3.1/samtools_view_1.3.1.cwl b/samtools_view_1.3.1/samtools_view_1.3.1.cwl index 87616cff..6c738f64 100644 --- a/samtools_view_1.3.1/samtools_view_1.3.1.cwl +++ b/samtools_view_1.3.1/samtools_view_1.3.1.cwl @@ -197,7 +197,7 @@ inputs: position: 0 prefix: '-O' outputs: - - id: output_bam + - id: samtools_view_bam type: File outputBinding: glob: '$(inputs.input.basename.replace(''sam'', ''bam''))' diff --git a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl index e334ca81..9d75bfe9 100644 --- a/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl +++ b/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl @@ -26,7 +26,7 @@ inputs: position: 0 prefix: '--output' outputs: - - id: interval_files + - id: gatk_scatter_intervals_interval_files type: 'File[]' outputBinding: glob: $(inputs.output)/*.interval_list diff --git a/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl new file mode 100644 index 00000000..07d00699 --- /dev/null +++ b/sequence_qc/0.2.2/sequence_qc_0.2.2.cwl @@ -0,0 +1,150 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_2_2 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality + - id: max_depth + type: int? + inputBinding: + position: 0 + prefix: --max_depth + doc: >- + Maximum read depth for calculation +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_positions.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + 'noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.2' + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.2.2 diff --git a/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl new file mode 100644 index 00000000..16405ef6 --- /dev/null +++ b/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl @@ -0,0 +1,150 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: calculate_noise_0_2_3 +baseCommand: + - calculate_noise +inputs: + - id: reference + type: File + inputBinding: + position: 0 + prefix: --ref_fasta + secondaryFiles: + - ^.fasta.fai + doc: >- + Path to reference fasta, containing all regions in bed_file + - id: bam_file + type: File + inputBinding: + position: 0 + prefix: --bam_file + secondaryFiles: + - ^.bai + doc: >- + Path to BAM file for calculating noise [required] + - id: bed_file + type: File + inputBinding: + position: 0 + prefix: --bed_file + doc: >- + Path to BED file containing regions over which to calculate noise [required] + - id: sample_id + type: string + inputBinding: + position: 0 + prefix: --sample_id + doc: >- + Prefix to include in all output file names + - id: threshold + type: float? + inputBinding: + position: 0 + prefix: --threshold + doc: >- + Alt allele frequency past which to ignore positions from the calculation. + - id: truncate + type: int? + inputBinding: + position: 0 + prefix: --truncate + doc: >- + Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1) + - id: min_mapq + type: int? + inputBinding: + position: 0 + prefix: --min_mapq + doc: >- + Exclude reads with a lower mapping quality + - id: min_basq + type: int? + inputBinding: + position: 0 + prefix: --min_basq + doc: >- + Exclude bases with a lower base quality +outputs: + - id: sequence_qc_pileup + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_pileup.tsv' + } + - id: sequence_qc_noise_positions + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_positions.tsv' + } + - id: sequence_qc_noise_by_substitution + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_by_substitution.tsv' + } + - id: sequence_qc_noise_acgt + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_acgt.tsv' + } + - id: sequence_qc_noise_n + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_n.tsv' + } + - id: sequence_qc_noise_del + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise_del.tsv' + } + - id: sequence_qc_figures + type: File + outputBinding: + glob: |- + ${ + return inputs.sample_id + '_noise.html' + } +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/sequence_qc:0.2.3' + - class: InlineJavascriptRequirement + - class: EnvVarRequirement + envDef: + LC_ALL: en_US.utf-8 + LANG: en_US.utf-8 +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:murphyc4@mskcc.org' + 'foaf:name': Charlie Murphy + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': sesquence_qc + 'doap:revision': 0.2.3 diff --git a/sequence_qc/README.md b/sequence_qc/README.md new file mode 100644 index 00000000..c7b71103 --- /dev/null +++ b/sequence_qc/README.md @@ -0,0 +1,61 @@ +# CWL and Dockerfile for running sequence_qc + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| sequence_qc | 0.1.19 | | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner sequence_qc_0.1.19.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir tool_toil_log +> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/sequence_qc_0.1.19/sequence_qc_0.1.19.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr & +``` + +### Usage + +```bash +toil-cwl-runner sequence_qc_0.1.19.cwl -h + +usage: sequence_qc_0.1.19.cwl [-h] --reference REFERENCE --bam_file BAM_FILE + --bed_file BED_FILE --sample_id SAMPLE_ID + [--threshold THRESHOLD] [--truncate TRUNCATE] + [--min_mapq MIN_MAPQ] [--min_basq MIN_BASQ] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Path to reference fasta, containing all regions in + bed_file + --bam_file BAM_FILE Path to BAM file for calculating noise [required] + --bed_file BED_FILE Path to BED file containing regions over which to + calculate noise [required] + --sample_id SAMPLE_ID + Prefix to include in all output file names + --threshold THRESHOLD + Alt allele frequency past which to ignore positions + from the calculation. + --truncate TRUNCATE Whether to exclude trailing bases from reads that only + partially overlap the bed file (0 or 1) + --min_mapq MIN_MAPQ Exclude reads with a lower mapping quality + --min_basq MIN_BASQ Exclude bases with a lower base quality +``` diff --git a/sequence_qc/example_inputs.yaml b/sequence_qc/example_inputs.yaml new file mode 100644 index 00000000..04cf48b4 --- /dev/null +++ b/sequence_qc/example_inputs.yaml @@ -0,0 +1,17 @@ +reference: + class: File + metadata: {} + path: /path/to/fasta +bam_file: + class: File + metadata: {} + path: /path/to/bam +bed_file: + class: File + metadata: {} + path: /path/to/bed +sample_id: test_sample_ +threshold: 0.01 +truncate: 1 +min_mapq: 10 +min_basq: 10 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..1c8fad57 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[bumpversion] +current_version = 1.2.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:cwl_commandlinetools/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..53b77ff1 --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.md') as readme_file: + readme = readme_file.read() + +requirements = [ ] + +setup_requirements = ['pytest-runner', ] + +test_requirements = ['pytest>=3', ] + +setup( + author="msk-access", + author_email='msk.access@gmail.com', + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.7', + ], + description="Central location for CWL CommandLineTools", + install_requires=requirements, + license="Apache Software License 2.0", + long_description=readme + '\n\n', + include_package_data=True, + keywords='cwl-commandlinetools', + name='cwl-commandlinetools', + packages=find_packages(include=['cwl_commandlinetools', 'cwl_commandlinetools.*']), + setup_requires=setup_requirements, + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/msk-access/cwl-commandlinetools', + version='1.2.0', + zip_safe=False, +) diff --git a/test-yamls/msi-test-one.yaml b/test-yamls/msi-test-one.yaml deleted file mode 100644 index 4115b769..00000000 --- a/test-yamls/msi-test-one.yaml +++ /dev/null @@ -1,4 +0,0 @@ -n: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -t: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -d: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -o: "my_output_prefix" diff --git a/test-yamls/msi-test.yaml b/test-yamls/msi-test.yaml deleted file mode 100644 index 5af76e7b..00000000 --- a/test-yamls/msi-test.yaml +++ /dev/null @@ -1,4 +0,0 @@ -normal_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -tumor_bam: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -msi_file: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.microsatellites.list} -output_prefix: "msi_run_prefix" diff --git a/test-yamls/snp-pileup-to-facets.yaml b/test-yamls/snp-pileup-to-facets.yaml deleted file mode 100644 index 6bdb254a..00000000 --- a/test-yamls/snp-pileup-to-facets.yaml +++ /dev/null @@ -1,4 +0,0 @@ -bam_normal: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/normal_sample.sorted.md.bqsr.bam} -bam_tumor: {class: File, path: /juno/work/pi/cmopipeline/data/uncategorized/re-run/tumor_sample.sorted.md.bqsr.bam} -tumor_sample_name: tumor_sample -facets_vcf: {class: File, path: /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/GATK/b37/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf } diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..688b77f7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +"""Unit test package for cwl_commandlinetools.""" diff --git a/tests/test_cwl_commandlinetools.py b/tests/test_cwl_commandlinetools.py new file mode 100644 index 00000000..9ce7b51e --- /dev/null +++ b/tests/test_cwl_commandlinetools.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for `cwl_commandlinetools` package.""" + +import pytest + + +def test_content(): + """Sample pytest test function with the pytest fixture as an argument.""" + print("All good") diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..29dc8a5e --- /dev/null +++ b/tox.ini @@ -0,0 +1,30 @@ +[tox] +envlist = py27, py37 flake8 + +[travis] +python = + 3.7: py37 + 2.7: py27 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 cwl_commandlinetools + +[testenv:py37] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements.txt + +commands = + py.test --capture=sys --basetemp={envtmpdir} tests + +[testenv:py27] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_p2.7.txt + +commands = + py.test --capture=sys --basetemp={envtmpdir} tests \ No newline at end of file diff --git a/trim_galore_0.6.2/trim_galore_0.6.2.cwl b/trim_galore_0.6.2/trim_galore_0.6.2.cwl index 5b91cbbc..95d53149 100644 --- a/trim_galore_0.6.2/trim_galore_0.6.2.cwl +++ b/trim_galore_0.6.2/trim_galore_0.6.2.cwl @@ -24,9 +24,6 @@ inputs: inputBinding: position: 0 prefix: '--cores' - - id: path_to_trim_galore - type: File? - doc: Path to trim_galore executable file - id: adapter type: string? inputBinding: @@ -163,7 +160,7 @@ requirements: ramMin: 8000 coresMin: 4 - class: DockerRequirement - dockerPull: 'mskcc/trim_galore:0.1.0' + dockerPull: 'ghcr.io/msk-access/trim_galore:0.6.2' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/utilities_ubuntu_18.04/README.md b/utilities_ubuntu_18.04/README.md deleted file mode 100644 index a13033b0..00000000 --- a/utilities_ubuntu_18.04/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# CWL and Dockerfile for running utilites from Ubuntu 18.04 - -## Version of tools in docker image (/container/Dockerfile) - -| Tool | Version | Location | -|--- |--- |--- | -| Ubuntu base image | 18.04 | - | - -## CWL - -- CWL specification 1.0 -- Use example_inputs_toolname.yaml to see the inputs to the cwl -- Example Command using [toil](https://toil.readthedocs.io): - -```bash - > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml -``` diff --git a/utilities_ubuntu_18.04/example_inputs_mv.yaml b/utilities_ubuntu_18.04/example_inputs_mv.yaml new file mode 100644 index 00000000..ae4246e9 --- /dev/null +++ b/utilities_ubuntu_18.04/example_inputs_mv.yaml @@ -0,0 +1,6 @@ +force: null +infile: /path/to/source/file +memory_overhead: null +memory_per_job: null +outfile: /path/to/destination/file +verbose: null diff --git a/utilities_ubuntu_18.04/mv.cwl b/utilities_ubuntu_18.04/mv.cwl new file mode 100644 index 00000000..401b5b34 --- /dev/null +++ b/utilities_ubuntu_18.04/mv.cwl @@ -0,0 +1,70 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: mv +baseCommand: + - mv +inputs: + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: infile + type: File + inputBinding: + position: 1 + doc: 'Source with path that needs to be moved' + - id: outfile + type: string + inputBinding: + position: 2 + doc: 'Target destination for the source' + - id: force + type: boolean? + inputBinding: + position: 0 + prefix: '-f' + doc: 'Do not prompt for confirmation before overwriting the destination path.' + - id: verbose + type: boolean? + inputBinding: + position: 0 + prefix: '-v' + doc: 'Cause mv to be verbose, showing files after they are moved.' +outputs: + - id: out + type: File + outputBinding: + glob: $(inputs.outfile) +label: mv +requirements: + - class: ResourceRequirement + ramMin: 2000 + coresMin: 1 + - class: DockerRequirement + dockerPull: 'ubuntu:18.04' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': mv + 'doap:revision': 18.04 \ No newline at end of file diff --git a/vardictjava/container/Dockerfile b/vardictjava/container/Dockerfile new file mode 100644 index 00000000..cd6c9371 --- /dev/null +++ b/vardictjava/container/Dockerfile @@ -0,0 +1,22 @@ +FROM alpine:3.8 + +LABEL maintainer="Nikhil Kumar (kumarn1@mskcc.org)" \ + version.image="1.0.0" \ + version.vardict="1.8.2" \ + version.r="3.5.1" \ + version.perl="5.26.2-r1" \ + version.alpine="3.8" \ + source.vardict="https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2" \ + source.r="https://pkgs.alpinelinux.org/package/edge/community/x86/R" \ + source.perl="https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl" + +ENV VARDICT_VERSION 1.8.2 + +RUN apk add --update \ + && apk add ca-certificates openssl bash perl \ + && apk add openjdk8-jre-base \ + && apk add R R-dev \ + && cd /tmp && wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v${VARDICT_VERSION}/VarDict-${VARDICT_VERSION}.zip \ + && unzip VarDict-${VARDICT_VERSION}.zip \ + && mv /tmp/VarDict-${VARDICT_VERSION} /usr/bin/vardict \ + && rm -rf /var/cache/apk/* /tmp/* diff --git a/vardictjava/v1.8.2/example_inputs.yaml b/vardictjava/v1.8.2/example_inputs.yaml new file mode 100644 index 00000000..81f4d3b0 --- /dev/null +++ b/vardictjava/v1.8.2/example_inputs.yaml @@ -0,0 +1,18 @@ +G: + class: File + path: "/path/to/ref/file" +b: + class: File + path: "/path/to/bam/file" +bedfile: + class: File + path: "/path/to/bed/file" +c: "1" +f: "0" +S: "2" +E: "3" +g: "5" +th: "4" +vcf: "output.vcf" +N: "name" +f_1: "0" diff --git a/vardictjava/v1.8.2/teststrandbias.cwl b/vardictjava/v1.8.2/teststrandbias.cwl new file mode 100644 index 00000000..95b26f49 --- /dev/null +++ b/vardictjava/v1.8.2/teststrandbias.cwl @@ -0,0 +1,59 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: teststrandbias +baseCommand: + - Rscript + - /usr/bin/vardict/bin/teststrandbias.R +inputs: + - id: input_vardict + type: File + doc: Stdard input from VarDict +outputs: + - id: output_var + type: File? + outputBinding: + glob: output_teststrandbias.var +requirements: + - class: ResourceRequirement + ramMin: 12000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/vardictjava:1.8.2' + - class: InlineJavascriptRequirement +stdin: $(inputs.input_vardict.path) +stdout: output_teststrandbias.var +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/var_to_vcf.cwl b/vardictjava/v1.8.2/var_to_vcf.cwl new file mode 100644 index 00000000..13c5ad94 --- /dev/null +++ b/vardictjava/v1.8.2/var_to_vcf.cwl @@ -0,0 +1,98 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: vardict_var2vcf +baseCommand: + - perl + - /usr/bin/vardict/bin/var2vcf_valid.pl +inputs: + - id: case_sample_name + type: string + doc: The case sample name to be used directly. + - id: control_sample_name + type: string? + doc: The case sample name to be used directly. + - id: filter_variants + type: boolean? + inputBinding: + position: 0 + prefix: '-S' + doc: If set variants that didnt pass filters will not be present in VCF file. + - id: minimum_allele_frequency + type: float? + inputBinding: + position: 0 + prefix: '-f' + doc: 'The threshold for allele frequency, default - 0.05 or 5%%' + - id: input_vcf + type: File + - id: output_vcf + type: string? + doc: output vcf file +outputs: + - id: output + type: File + outputBinding: + glob: |- + ${ + if(inputs.output_vcf) + return inputs.output_vcf; + return inputs.case_sample_name + "_vardict.vcf" + } +arguments: + - position: 0 + prefix: '-N' + valueFrom: |- + ${ + if(inputs.control_sample_name) + return inputs.case_sample_name + "|" + inputs.control_sample_name; + return inputs.case_sample_name + } +requirements: + - class: ResourceRequirement + ramMin: 32000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/vardictjava:1.8.2' + - class: InlineJavascriptRequirement +stdin: $(inputs.input_vcf.path) +stdout: |- + ${ + if(inputs.output_vcf) + return inputs.output_vcf; + return inputs.case_sample_name + "_vardict.vcf" + } +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/vardict_app.cwl b/vardictjava/v1.8.2/vardict_app.cwl new file mode 100644 index 00000000..7db17d6c --- /dev/null +++ b/vardictjava/v1.8.2/vardict_app.cwl @@ -0,0 +1,125 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: vardict +baseCommand: + - /usr/bin/vardict/bin/VarDict +inputs: + - id: bed_file_column_for_region_end + type: string? + inputBinding: + position: 0 + prefix: '-E' + doc: 'The column for region end, e.g. gene end' + - id: reference_fasta + type: File + inputBinding: + position: 0 + prefix: '-G' + doc: The reference fasta. Should be indexed (.fai) + secondaryFiles: + - .fai + - id: sample_name + type: string? + inputBinding: + position: 0 + prefix: '-N' + doc: The sample name of the case to be used directly. Will overwrite -n option + - id: bed_file_column_for_region_start + type: string? + inputBinding: + position: 0 + prefix: '-S' + doc: 'The column for region start, e.g. gene start' + - id: input_bam_case + type: File + doc: The indexed BAM file for case + secondaryFiles: + - ^.bai + - id: input_bam_control + type: File? + doc: The indexed BAM file for control + secondaryFiles: + - ^.bai + - id: bedfile + type: File? + inputBinding: + position: 1 + - id: bed_file_column_for_chromsome + type: string? + inputBinding: + position: 0 + prefix: '-c' + doc: The column for chromosome + - id: allele_frequency_threshold + type: string? + inputBinding: + position: 0 + prefix: '-f' + doc: 'The threshold for allele frequency, default - 0.01 or 1%%' + - id: bed_file_column_for_gene_name + type: string? + inputBinding: + position: 0 + prefix: '-g' + doc: 'The column for gene name, or segment annotation' +outputs: + - id: output + type: File + outputBinding: + glob: vardict_app_output.vcf +arguments: + - position: 1 + prefix: '-b' + valueFrom: |- + ${ + if(inputs.input_bam_control) + return inputs.input_bam_case + "|" + inputs.input_bam_control; + return inputs.input_bam_case + + } +requirements: + - class: EnvVarRequirement + envDef: + JAVA_OPTS: '"-Xms8g" "-Xmx95g"' + - class: ResourceRequirement + ramMin: 64000 + coresMin: 4 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/vardictjava:1.8.2' + - class: InlineJavascriptRequirement +stdout: vardict_app_output.vcf +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:shahr2@mskcc.org' + 'foaf:name': Ronak Shah + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vardictjava/v1.8.2/vardict_single_sample.cwl b/vardictjava/v1.8.2/vardict_single_sample.cwl new file mode 100644 index 00000000..fd0b0d3b --- /dev/null +++ b/vardictjava/v1.8.2/vardict_single_sample.cwl @@ -0,0 +1,154 @@ +class: Workflow +cwlVersion: v1.0 +id: vardict +label: vardict +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: bedfile + type: File? + 'sbg:x': 0 + 'sbg:y': 426.4375 + - id: input_bam_control + type: File? + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 213.21875 + - id: input_bam_case + type: File + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 319.828125 + - id: reference_fasta + type: File + secondaryFiles: + - .fai + 'sbg:x': 0 + 'sbg:y': 106.609375 + - id: sample_name + type: string + 'sbg:x': 0 + 'sbg:y': 0 + - id: bed_file_column_for_region_start + type: string? + 'sbg:x': 0 + 'sbg:y': 533.109375 + - id: bed_file_column_for_region_end + type: string? + 'sbg:x': 0 + 'sbg:y': 639.84375 + - id: bed_file_column_for_gene_name + type: string? + 'sbg:x': 0 + 'sbg:y': 746.578125 + - id: bed_file_column_for_chromsome + type: string? + 'sbg:x': 0 + 'sbg:y': 853.25 + - id: control_sample_name + type: string? + 'sbg:x': 742.6807250976562 + 'sbg:y': 639.90625 + - id: filter_variants + type: boolean? + 'sbg:x': 742.6807250976562 + 'sbg:y': 533.296875 + - id: minimum_allele_frequency + type: float? + 'sbg:x': 742.6807250976562 + 'sbg:y': 426.625 + - id: output_vcf + type: string + 'sbg:x': 742.6807250976562 + 'sbg:y': 319.953125 +outputs: + - id: output + outputSource: + - var_to_vcf/output + type: File + 'sbg:x': 1332.159423828125 + 'sbg:y': 426.625 +steps: + - id: teststrandbias + in: + - id: input_vardict + source: vardict/output + out: + - id: output_var + run: ./teststrandbias.cwl + 'sbg:x': 742.6807250976562 + 'sbg:y': 213.34375 + - id: var_to_vcf + in: + - id: case_sample_name + source: sample_name + - id: control_sample_name + source: control_sample_name + - id: filter_variants + source: filter_variants + - id: minimum_allele_frequency + source: minimum_allele_frequency + - id: input_vcf + source: teststrandbias/output_var + - id: output_vcf + source: output_vcf + out: + - id: output + run: ./var_to_vcf.cwl + 'sbg:x': 1006.1338500976562 + 'sbg:y': 391.4296875 + - id: vardict + in: + - id: bed_file_column_for_region_end + source: bed_file_column_for_region_end + - id: reference_fasta + source: reference_fasta + - id: sample_name + source: sample_name + - id: bed_file_column_for_region_start + source: bed_file_column_for_region_start + - id: input_bam_case + source: input_bam_case + - id: input_bam_control + source: input_bam_control + - id: bedfile + source: bedfile + - id: bed_file_column_for_chromsome + source: bed_file_column_for_chromsome + - id: bed_file_column_for_gene_name + source: bed_file_column_for_gene_name + out: + - id: output + run: ./vardict_app.cwl + 'sbg:x': 317.125 + 'sbg:y': 363.625 +requirements: [] +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + 'foaf:name': Memorial Sloan Kettering Cancer Center + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': Vardictjava + 'doap:revision': 1.8.2 diff --git a/vcf2maf_1.6.17/README.md b/vcf2maf_1.6.17/README.md new file mode 100644 index 00000000..491ef1c3 --- /dev/null +++ b/vcf2maf_1.6.17/README.md @@ -0,0 +1,90 @@ +# CWL and Dockerfile for running vcf2maf v1.6.17 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| alpine:3.8 base image | 3.8 | - | +| vcf2maf | 1.6.17 | https://github.com/mskcc/vcf2maf/archive/v1.6.17.zip | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.17.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +usage: toil-cwl-runner vcf2maf_1.6.17.cwl [-h] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --any_allele ANY_ALLELE + When reporting co-located variants, allow mismatched + variant alleles too + --buffer_size BUFFER_SIZE + Number of variants VEP loads at a time; Reduce this + for low memory systems + --cache_version CACHE_VERSION + Version of VEP and its cache to use + --custom_enst CUSTOM_ENST + List of custom ENST IDs that override canonical + selection + --maf_center MAF_CENTER + Variant calling center to report in MAF + --max_filter_ac MAX_FILTER_AC + Use tag common_variant if the filter-vcf reports a + subpopulation AC higher than this + --min_hom_vaf MIN_HOM_VAF + If GT undefined in VCF, minimum allele fraction to + call a variant homozygous + --ncbi_build NCBI_BUILD + Genome build of variants in input + --normal_id NORMAL_ID + Matched_Norm_Sample_Barcode to report in the MAF + --output_maf OUTPUT_MAF + Path to output MAF file + --ref_fasta REF_FASTA + Reference FASTA file + --remap_chain REMAP_CHAIN + Chain file to remap variants to a different assembly + before running VEP + --retain_fmt RETAIN_FMT + Comma-delimited names of FORMAT fields to retain as + extra columns in MAF [] + --retain_info RETAIN_INFO + Comma-delimited names of INFO fields to retain as + extra columns in MAF + --species SPECIES Species of variants in input + --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF + --vcf_normal_id VCF_NORMAL_ID + Matched normal ID used in VCFs genotype columns + --vcf_tumor_id VCF_TUMOR_ID + Tumor sample ID used in VCFs genotype columns + --vep_data VEP_DATA VEPs base cache/plugin directory + --vep_forks VEP_FORKS + Number of forked processes to use when running VEP + --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep + binary + + +``` diff --git a/vcf2maf_1.6.17/container/Dockerfile b/vcf2maf_1.6.17/container/Dockerfile new file mode 100644 index 00000000..61af253c --- /dev/null +++ b/vcf2maf_1.6.17/container/Dockerfile @@ -0,0 +1,94 @@ +################## BASE IMAGE ###################### +FROM alpine:3.8 + +################## ARGUMENTS/Environments ########## + +ARG BUILD_DATE +ARG BUILD_VERSION=1.0.0 +ARG VCF2MAF_VERSION=1.6.17 +ARG HTSLIB_VERSION=1.9 +ARG SAMTOOLS_VERSION=1.9 +ARG BCFTOOLS_VERSION=1.9 + +################## METADATA ######################## +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)" +LABEL org.opencontainers.image.created=${BUILD_DATE} +LABEL org.opencontainers.image.version=${BUILD_VERSION} +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="86" +LABEL org.opencontainers.image.version.htslib=${HTSLIB_VERSION} +LABEL org.opencontainers.image.version.bcftools=${BCFTOOLS_VERSION} +LABEL org.opencontainers.image.version.samtools=${SAMTOOLS_VERSION} +LABEL org.opencontainers.image.version.perl="5.26.2-r1" +LABEL org.opencontainers.image.version.alpine="3.8" +LABEL org.opencontainers.image.source.vcf2maf="https://github.com/mskcc/vcf2maf/releases/tag/v${VCF2MAF_VERSION}" +LABEL org.opencontainers.image.source.htslib="https://github.com/samtools/htslib/releases/tag/${HTSLIB_VERSION}" +LABEL org.opencontainers.image.source.bcftools="https://github.com/samtools/bcftools/releases/tag/${BCFTOOLS_VERSION}" +LABEL org.opencontainers.image.source.samtools="https://github.com/samtools/samtools/releases/tag/${SAMTOOLS_VERSION}" + + +LABEL org.opencontainers.image.description="This container uses alpine3.8 as the base image to build vcf2maf version ${VCF2MAF_VERSION}" + +################## INSTALL ########################## + +ENV VCF2MAF_VERSION=${VCF2MAF_VERSION} +ENV VEP_VERSION=86 +ENV VEP_DATA=/var/cache +ENV VEP_PATH=/usr/bin/vep +ENV HTSLIB_VERSION=${HTSLIB_VERSION} +ENV SAMTOOLS_VERSION=${SAMTOOLS_VERSION} +ENV BCFTOOLS_VERSION=${BCFTOOLS_VERSION} + + +RUN apk add --update \ + # install all the build-related tools + && apk add ca-certificates gcc g++ make git curl curl-dev wget gzip perl perl-dev musl-dev libgcrypt-dev zlib-dev bzip2-dev xz-dev ncurses-dev rsync \ + # install system packages and Perl modules + && apk add expat-dev libressl-dev perl-net-ssleay mariadb-dev libxml2-dev perl-dbd-mysql perl-module-metadata perl-gd perl-db_file perl-archive-zip perl-cgi perl-dbi perl-encode perl-time-hires perl-file-copy-recursive perl-json \ + # install cpanminus + && curl -L https://cpanmin.us | perl - App::cpanminus \ + # install perl libraries that VEP will need + && cpanm --notest LWP LWP::Simple LWP::Protocol::https Archive::Extract Archive::Tar Archive::Zip \ + CGI DBI Encode version Time::HiRes File::Copy::Recursive Perl::OSType Module::Metadata \ + Sereal JSON Bio::Root::Version Set::IntervalTree PerlIO::gzip \ + # install htslib (for vep) + && cd /tmp && wget https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 \ + && tar xvjf htslib-${HTSLIB_VERSION}.tar.bz2 \ + && cd /tmp/htslib-${HTSLIB_VERSION} \ + && ./configure \ + && make && make install \ + # download/unzip vep + && cd /tmp && wget https://github.com/Ensembl/ensembl-tools/archive/release/${VEP_VERSION}.zip \ + && unzip ${VEP_VERSION} \ + # install vep + && cd /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor \ + && perl INSTALL.pl --AUTO a 2>&1 | tee install.log \ + && cd /tmp && mv /tmp/ensembl-tools-release-${VEP_VERSION}/scripts/variant_effect_predictor /usr/bin/vep \ + # download and unpack VEP's offline cache + && mkdir -p ${VEP_DATA} \ + && rsync -zvh rsync://ftp.ensembl.org/ensembl/pub/release-86/variation/VEP/homo_sapiens_vep_86_GRCh37.tar.gz ${VEP_DATA} \ + && tar -zxf ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz -C ${VEP_DATA} \ + && cd /usr/bin/vep \ + && perl convert_cache.pl --species homo_sapiens --version 86_GRCh37 --dir ${VEP_DATA} \ + && rm ${VEP_DATA}/homo_sapiens_vep_86_GRCh37.tar.gz \ + # install bcftools + && cd /tmp && wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && tar xvjf bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/bcftools-${BCFTOOLS_VERSION} \ + && make HTSDIR=/tmp/htslib-${HTSLIB_VERSION} && make install \ + # install samtools + && cd /tmp && wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xvjf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd /tmp/samtools-${SAMTOOLS_VERSION} \ + && ./configure --with-htslib=/tmp/htslib-${HTSLIB_VERSION} \ + && make && make install \ + # install vcf2maf + && cd /tmp && wget -O vcf2maf-v${VCF2MAF_VERSION} https://github.com/mskcc/vcf2maf/archive/v${VCF2MAF_VERSION}.zip \ + && unzip vcf2maf-v${VCF2MAF_VERSION} \ + && mkdir -p /usr/bin/vcf2maf/ \ + && cp -r vcf2maf-${VCF2MAF_VERSION}/* /usr/bin/vcf2maf/ \ + # clean up + && rm -rf /var/cache/apk/* /tmp/* \ + && chmod +x /usr/bin/runscript.sh \ + && exec /run_test.sh diff --git a/vcf2maf_1.6.17/example_inputs.yaml b/vcf2maf_1.6.17/example_inputs.yaml new file mode 100644 index 00000000..3c32cc16 --- /dev/null +++ b/vcf2maf_1.6.17/example_inputs.yaml @@ -0,0 +1,20 @@ +input_vcf: + class: File + path: /path/to/vcf_file +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +normal_id: normal_sample_name +vcf_normal_id: normal_sample_name +ncbi_build: genome_string +filter_vcf: + class: File + path: /path/to/filter/vcf +vep_data: vep_cache_path_str (/var/cache in container) +ref_fasta: + class: File + path: /path/to/ref/fasta +vep_path: vep_path (/usr/bin/vep in container) +custom_enst: custom_enst_str +retain_info: retain_info_str +retain_fmt: retain_fmt_str +output_maf: output_maf_str \ No newline at end of file diff --git a/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl new file mode 100644 index 00000000..eca0fb95 --- /dev/null +++ b/vcf2maf_1.6.17/vcf2maf_1.6.17.cwl @@ -0,0 +1,217 @@ + +class: CommandLineTool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + doap: http://usefulinc.com/ns/doap# + foaf: http://xmlns.com/foaf/0.1/ + sbg: https://www.sevenbridges.com/ + +id: vcf2maf_v1.6.17 + +baseCommand: + - perl + - /usr/bin/vcf2maf/vcf2maf.pl + +inputs: + + memory_per_job: + type: ["null",int] + doc: Memory per job in megabytes + + memory_overhead: + type: ["null",int] + doc: Memory overhead per job in megabytes + + cache_version: + type: + - 'null' + - string + default: '86' + doc: Version of VEP and its cache to use + inputBinding: + prefix: --cache-version + species: + type: + - 'null' + - string + default: homo_sapiens + doc: Species of variants in input + inputBinding: + prefix: --species + ncbi_build: + type: + - 'null' + - string + default: GRCh37 + doc: Genome build of variants in input + inputBinding: + prefix: --ncbi-build + ref_fasta: + type: ['null', File] + doc: Reference FASTA file + inputBinding: + prefix: --ref-fasta + maf_center: + type: ['null', string] + default: mskcc.org + doc: Variant calling center to report in MAF + inputBinding: + prefix: --maf-center + output_maf: + type: ['null', string] + doc: Path to output MAF file + inputBinding: + prefix: --output-maf + max_filter_ac: + type: + - 'null' + - int + default: 10 + doc: Use tag common_variant if the filter-vcf reports a subpopulation AC higher + than this + inputBinding: + prefix: --max-filter-ac + min_hom_vaf: + type: + - 'null' + - float + default: 0.7 + doc: If GT undefined in VCF, minimum allele fraction to call a variant homozygous + inputBinding: + prefix: --min-hom-vaf + remap_chain: + type: ['null', string] + doc: Chain file to remap variants to a different assembly before running VEP + inputBinding: + prefix: --remap-chain + normal_id: + type: ['null', string] + default: NORMAL + doc: Matched_Norm_Sample_Barcode to report in the MAF + inputBinding: + prefix: --normal-id + buffer_size: + type: + - 'null' + - int + default: 5000 + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + inputBinding: + prefix: --buffer-size + custom_enst: + type: ['null', string] + doc: List of custom ENST IDs that override canonical selection + inputBinding: + prefix: --custom-enst + vcf_normal_id: + type: ['null', string] + default: NORMAL + doc: Matched normal ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-normal-id + vep_path: + type: ['null', string] + doc: Folder containing variant_effect_predictor.pl or vep binary + inputBinding: + prefix: --vep-path + vep_data: + type: ['null', string] + doc: VEPs base cache/plugin directory + inputBinding: + prefix: --vep-data + any_allele: + type: ['null', string] + doc: When reporting co-located variants, allow mismatched variant alleles too + inputBinding: + prefix: --any-allele + input_vcf: + type: + - string + - File + doc: Path to input file in VCF format + inputBinding: + prefix: --input-vcf + vep_forks: + type: + - 'null' + - int + default: 4 + doc: Number of forked processes to use when running VEP + inputBinding: + prefix: --vep-forks + vcf_tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor sample ID used in VCFs genotype columns + inputBinding: + prefix: --vcf-tumor-id + tumor_id: + type: ['null', string] + default: TUMOR + doc: Tumor_Sample_Barcode to report in the MAF + inputBinding: + prefix: --tumor-id + filter_vcf: + type: + - 'null' + - string + - File + doc: The non-TCGA VCF from exac.broadinstitute.org + inputBinding: + prefix: --filter-vcf + secondaryFiles: + - .tbi + retain_info: + type: ['null', string] + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + inputBinding: + prefix: --retain-info + retain_fmt: + type: ['null', string] + doc: Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + inputBinding: + prefix: --retain-fmt +outputs: + vcf2maf_maf: + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } + +arguments: +- valueFrom: "$(runtime.tmpdir)" + prefix: '--tmp-dir' + shellQuote: false + +requirements: + InlineJavascriptRequirement: {} + ResourceRequirement: + ramMin: 8000 + coresMin: 2 + DockerRequirement: + dockerPull: mskaccess/vcf2maf:1.6.17 + +dct:contributor: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +dct:creator: + - class: foaf:Organization + foaf:member: + - class: foaf:Person + foaf:mbox: mailto:kumarn1@mskcc.org + foaf:name: Nikhil Kumar + foaf:name: Memorial Sloan Kettering Cancer Center +doap:release: + - class: doap:Version + doap:name: vcf2maf + doap:revision: 1.6.17 diff --git a/vcf2maf_1.6.21/README.md b/vcf2maf_1.6.21/README.md new file mode 100644 index 00000000..2db56662 --- /dev/null +++ b/vcf2maf_1.6.21/README.md @@ -0,0 +1,105 @@ +# CWL and Dockerfile for running vcf2maf v1.6.21 + +## Version of tools in docker image (/container/Dockerfile) + +| Tool | Version | Location | +|--- |--- |--- | +| clearlinux (base image) | - | - | +| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip | +| VEP | 105 | - | - | +|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh +|BCFTOOLS_VERSION | 1.10.2 | - | - | +|SAMTOOLS_VERSION | 1.10 | - | - | +|VCF2MAF_VERSION | 1.6.21 | - | - | +|HTSLIB_VERSION | 1.10.2 | - | - | + +## CWL + +- CWL specification 1.0 +- Use example_inputs.yaml to see the inputs to the cwl +- Example Command using [toil](https://toil.readthedocs.io): + +```bash + > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml +``` + +**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command** + +```bash +#Using CWLTOOL +> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml + +#Using toil-cwl-runner +> mkdir vcf2maf_toil_log +> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr & +``` + +### Usage + +``` +usage: vcf2maf_1.6.21.cwl [-h] [--any_allele ANY_ALLELE] [--buffer_size BUFFER_SIZE] [--cache_version CACHE_VERSION] + [--custom_enst CUSTOM_ENST] [--maf_center MAF_CENTER] [--memory_overhead MEMORY_OVERHEAD] + [--memory_per_job MEMORY_PER_JOB] [--min_hom_vaf MIN_HOM_VAF] [--ncbi_build NCBI_BUILD] [--normal_id NORMAL_ID] + [--output_maf OUTPUT_MAF] [--ref_fasta REF_FASTA] [--remap_chain REMAP_CHAIN] [--retain_fmt RETAIN_FMT] + [--retain_info RETAIN_INFO] [--species SPECIES] [--tumor_id TUMOR_ID] [--vcf_normal_id VCF_NORMAL_ID] + [--vcf_tumor_id VCF_TUMOR_ID] [--vep_data VEP_DATA] [--vep_forks VEP_FORKS] [--vep_path VEP_PATH] + [--vep-custom.vcfFile VEP_CUSTOM.VCFFILE] [--vep-custom.shortname VEP_CUSTOM.SHORTNAME] + [--vep-custom.filetype VEP_CUSTOM.FILETYPE] [--vep-custom.annotationtype VEP_CUSTOM.ANNOTATIONTYPE] + [--vep-custom.forceReportCoordinate] [--vep-custom.vcfField VEP_CUSTOM.VCFFIELD] [--retain-ann RETAIN_ANN] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --any_allele ANY_ALLELE + When reporting co-located variants, allow mismatched variant alleles too + --buffer_size BUFFER_SIZE + Number of variants VEP loads at a time; Reduce this for low memory systems + --cache_version CACHE_VERSION + Version of VEP and its cache to use + --custom_enst CUSTOM_ENST + List of custom ENST IDs that override canonical selection, in a file + --maf_center MAF_CENTER + Variant calling center to report in MAF + --memory_overhead MEMORY_OVERHEAD + Memory overhead per job in megabytes + --memory_per_job MEMORY_PER_JOB + Memory per job in megabytes + --min_hom_vaf MIN_HOM_VAF + If GT undefined in VCF, minimum allele fraction to call a variant homozygous + --ncbi_build NCBI_BUILD + Genome build of variants in input + --normal_id NORMAL_ID + Matched_Norm_Sample_Barcode to report in the MAF + --output_maf OUTPUT_MAF + Path to output MAF file + --ref_fasta REF_FASTA + Reference FASTA file + --remap_chain REMAP_CHAIN + Chain file to remap variants to a different assembly before running VEP + --retain_fmt RETAIN_FMT + Comma-delimited names of FORMAT fields to retain as extra columns in MAF [] + --retain_info RETAIN_INFO + Comma-delimited names of INFO fields to retain as extra columns in MAF + --species SPECIES Species of variants in input + --tumor_id TUMOR_ID Tumor_Sample_Barcode to report in the MAF + --vcf_normal_id VCF_NORMAL_ID + Matched normal ID used in VCFs genotype columns + --vcf_tumor_id VCF_TUMOR_ID + Tumor sample ID used in VCFs genotype columns + --vep_data VEP_DATA VEPs base cache/plugin directory + --vep_forks VEP_FORKS + Number of forked processes to use when running VEP + --vep_path VEP_PATH Folder containing variant_effect_predictor.pl or vep binary + --vep-custom.vcfFile VEP_CUSTOM.VCFFILE + --vep-custom.shortname VEP_CUSTOM.SHORTNAME + --vep-custom.filetype VEP_CUSTOM.FILETYPE + --vep-custom.annotationtype VEP_CUSTOM.ANNOTATIONTYPE + --vep-custom.forceReportCoordinate + --vep-custom.vcfField VEP_CUSTOM.VCFFIELD + --retain-ann RETAIN_ANN + --retain-ann IB<_>I,IB<_>I use to custom option to retain the enteries + +``` diff --git a/vcf2maf_1.6.21/container/Dockerfile b/vcf2maf_1.6.21/container/Dockerfile new file mode 100644 index 00000000..919941e3 --- /dev/null +++ b/vcf2maf_1.6.21/container/Dockerfile @@ -0,0 +1,79 @@ +FROM clearlinux:latest AS builder + +ARG VCF2MAF_VERSION=1.6.21 +ARG HTSLIB_VERSION=1.10.2 +ARG SAMTOOLS_VERSION=1.10 +ARG BCFTOOLS_VERSION=1.10.2 +ARG VEP_VERSION=105.0 +# Install a minimal versioned OS into /install_root, and bundled tools if any +ENV CLEAR_VERSION=33980 +RUN swupd os-install --no-progress --no-boot-update --no-scripts \ + --version ${CLEAR_VERSION} \ + --path /install_root \ + --statedir /swupd-state \ + --bundles os-core-update,which + +# Download and install conda into /usr/bin +ENV MINICONDA_VERSION=py37_4.9.2 +RUN swupd bundle-add --no-progress curl git wget sysadmin-basic diffutils less c-basic && \ + curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \ + sh /tmp/miniconda.sh -bfp /usr + +#Download and install vcf2maf +RUN wget https://github.com/mskcc/vcf2maf/archive/refs/tags/v${VCF2MAF_VERSION}.zip \ + && unzip v${VCF2MAF_VERSION}.zip \ + && rm v${VCF2MAF_VERSION}.zip + +# Use conda to install remaining tools/dependencies into /usr/local +ENV VEP_VERSION=${VEP_VERSION} \ + HTSLIB_VERSION=${HTSLIB_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} \ + SAMTOOLS_VERSION=${SAMTOOLS_VERSION} \ + LIFTOVER_VERSION=377 +RUN conda create -qy -p /usr/local \ + -c conda-forge \ + -c bioconda \ + -c defaults \ + ensembl-vep==${VEP_VERSION} \ + htslib==${HTSLIB_VERSION} \ + bcftools==${BCFTOOLS_VERSION} \ + samtools==${SAMTOOLS_VERSION} \ + ucsc-liftover==${LIFTOVER_VERSION} + +#Copy offline cache +COPY homo_sapiens_vep_105_GRCh37.tar.gz /var/cache +COPY Homo_sapiens.GRCh37.dna.toplevel.fa.gz /var/cache +RUN mkdir -p /.vep/homo_sapiens/105_GRCh37/ \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/release-105/variation/indexed_vep_cache/homo_sapiens_vep_105_GRCh37.tar.gz $HOME/.vep/ \ + && mv /var/cache/homo_sapiens_vep_105_GRCh37.tar.gz /.vep/ \ + && tar -zxf /.vep/homo_sapiens_vep_105_GRCh37.tar.gz -C /.vep/ \ + && rm /.vep/homo_sapiens_vep_105_GRCh37.tar.gz \ + ## && rsync -avr --progress rsync://ftp.ensembl.org/ensembl/pub/grch37/release-105/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.toplevel.fa.gz $HOME/.vep/homo_sapiens/105_GRCh37/ \ + && mv /var/cache/Homo_sapiens.GRCh37.dna.toplevel.fa.gz /.vep/homo_sapiens/105_GRCh37/ \ + && gzip -d /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz \ + && bgzip -i /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa \ + && samtools faidx /.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz + +RUN vep_convert_cache --species homo_sapiens --version all --dir /.vep + +# Deploy the minimal OS and tools into a clean target layer +FROM scratch +ARG VCF2MAF_VERSION=1.6.21 +ENV VEP_VERSION=${VEP_VERSION} + +LABEL org.opencontainers.image.vendor="MSKCC" +LABEL maintainer="Suleyman Vural " +LABEL maintainer="Cyriac Kandoth " +LABEL org.opencontainers.image.version.vcf2maf=${VCF2MAF_VERSION} +LABEL org.opencontainers.image.version.vep="105" + +COPY --from=builder vcf2maf-${VCF2MAF_VERSION} /opt/vcf2maf-${VCF2MAF_VERSION} +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/*.pl /usr/local/bin/ +COPY --from=builder vcf2maf-${VCF2MAF_VERSION}/data /opt/data +COPY --from=builder /.vep /.vep/ +COPY --from=builder /install_root / +COPY --from=builder /usr/local /usr/local + +RUN chmod 777 /usr/local/bin/vcf2*.pl && chmod 777 /usr/local/bin/maf2*.pl + +WORKDIR /opt diff --git a/vcf2maf_1.6.21/example_inputs.yaml b/vcf2maf_1.6.21/example_inputs.yaml new file mode 100644 index 00000000..69ea3029 --- /dev/null +++ b/vcf2maf_1.6.21/example_inputs.yaml @@ -0,0 +1,13 @@ +input_vcf: + class: File + path: /path/to/input.vcf +tumor_id: tumor_sample_name +vcf_tumor_id: tumor_sample_name +retain_info: "set,TYPE,FAILURE_REASON" +max_filter_ac: 10 +min_hom_vaf: 0.7 +buffer_size: 5000 +custom_enst: + class: File + path: /work/access/production/resources/msk-access/current/regions_of_interest/current/dmp_ACCESS-panelA-v1-isoform-overrides +output_maf: output.maf diff --git a/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl new file mode 100644 index 00000000..eb7ba027 --- /dev/null +++ b/vcf2maf_1.6.21/vcf2maf_1.6.21.cwl @@ -0,0 +1,208 @@ +class: CommandLineTool +cwlVersion: v1.0 +$namespaces: + dct: 'http://purl.org/dc/terms/' + doap: 'http://usefulinc.com/ns/doap#' + foaf: 'http://xmlns.com/foaf/0.1/' + sbg: 'https://www.sevenbridges.com/' +id: vcf2maf_v1_6_21 +baseCommand: + - perl + - /opt/vcf2maf-1.6.21/vcf2maf.pl +inputs: + - id: any_allele + type: string? + inputBinding: + position: 0 + prefix: '--any-allele' + doc: 'When reporting co-located variants, allow mismatched variant alleles too' + - default: 5000 + id: buffer_size + type: int? + inputBinding: + position: 0 + prefix: '--buffer-size' + doc: Number of variants VEP loads at a time; Reduce this for low memory systems + - default: '105' + id: cache_version + type: string? + inputBinding: + position: 0 + prefix: '--cache-version' + doc: Version of VEP and its cache to use + - id: custom_enst + type: File? + inputBinding: + position: 0 + prefix: '--custom-enst' + doc: 'List of custom ENST IDs that override canonical selection, in a file' + - id: input_vcf + type: + - string + - File + inputBinding: + position: 0 + prefix: '--input-vcf' + doc: 'Path to input file in VCF format , gzipped' + - default: mskcc.org + id: maf_center + type: string? + inputBinding: + position: 0 + prefix: '--maf-center' + doc: Variant calling center to report in MAF + - id: memory_overhead + type: int? + doc: Memory overhead per job in megabytes + - id: memory_per_job + type: int? + doc: Memory per job in megabytes + - default: 0.7 + id: min_hom_vaf + type: float? + inputBinding: + position: 0 + prefix: '--min-hom-vaf' + doc: >- + If GT undefined in VCF, minimum allele fraction to call a variant + homozygous + - default: GRCh37 + id: ncbi_build + type: string? + inputBinding: + position: 0 + prefix: '--ncbi-build' + doc: Genome build of variants in input + - default: NORMAL + id: normal_id + type: string? + inputBinding: + position: 0 + prefix: '--normal-id' + doc: Matched_Norm_Sample_Barcode to report in the MAF + - id: output_maf + type: string? + inputBinding: + position: 0 + prefix: '--output-maf' + doc: Path to output MAF file + - default: '/.vep/homo_sapiens/105_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz' + id: ref_fasta + type: string? + inputBinding: + position: 0 + prefix: '--ref-fasta' + doc: Reference FASTA file + - id: remap_chain + type: string? + inputBinding: + position: 0 + prefix: '--remap-chain' + doc: Chain file to remap variants to a different assembly before running VEP + - id: retain_fmt + type: string? + inputBinding: + position: 0 + prefix: '--retain-fmt' + doc: >- + Comma-delimited names of FORMAT fields to retain as extra columns in MAF + [] + - id: retain_info + type: string? + inputBinding: + position: 0 + prefix: '--retain-info' + doc: Comma-delimited names of INFO fields to retain as extra columns in MAF + - default: homo_sapiens + id: species + type: string? + inputBinding: + position: 0 + prefix: '--species' + doc: Species of variants in input + - default: TUMOR + id: tumor_id + type: string? + inputBinding: + position: 0 + prefix: '--tumor-id' + doc: Tumor_Sample_Barcode to report in the MAF + - default: NORMAL + id: vcf_normal_id + type: string? + inputBinding: + position: 0 + prefix: '--vcf-normal-id' + doc: Matched normal ID used in VCFs genotype columns + - default: TUMOR + id: vcf_tumor_id + type: string? + inputBinding: + position: 0 + prefix: '--vcf-tumor-id' + doc: Tumor sample ID used in VCFs genotype columns + - default: '/.vep/ ' + id: vep_data + type: string? + inputBinding: + position: 0 + prefix: '--vep-data' + doc: VEPs base cache/plugin directory + - default: 4 + id: vep_forks + type: int? + inputBinding: + position: 0 + prefix: '--vep-forks' + doc: Number of forked processes to use when running VEP + - default: '/usr/local/bin/' + id: vep_path + type: string? + inputBinding: + position: 0 + prefix: '--vep-path' + doc: Folder containing variant_effect_predictor.pl or vep binary +outputs: + - id: vcf2maf_maf + type: File + outputBinding: + glob: | + ${ + if (inputs.output_maf) + return inputs.output_maf; + return null; + } +arguments: + - position: 0 + prefix: '--tmp-dir' + shellQuote: false + valueFrom: $(runtime.tmpdir) +requirements: + - class: ShellCommandRequirement + - class: ResourceRequirement + ramMin: 8000 + coresMin: 2 + - class: DockerRequirement + dockerPull: 'ghcr.io/msk-access/vcf2maf:1.6.21' + - class: InlineJavascriptRequirement +'dct:contributor': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:vurals@mskcc.org' + 'foaf:name': Suleyman Vural + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:sivaprk@mskcc.org' + 'foaf:name': Karthigayini Sivaprakasam + 'foaf:name': Memorial Sloan Kettering Cancer Center +'dct:creator': + - class: 'foaf:Organization' + 'foaf:member': + - class: 'foaf:Person' + 'foaf:mbox': 'mailto:kumarn1@mskcc.org' + 'foaf:name': Nikhil Kumar + 'foaf:name': Memorial Sloan Kettering Cancer Center +'doap:release': + - class: 'doap:Version' + 'doap:name': vcf2maf + 'doap:revision': 1.6.21 diff --git a/waltz_count_reads_3.1.1/container/Dockerfile b/waltz_count_reads_3.1.1/container/Dockerfile index 70d13a31..cfad2f10 100644 --- a/waltz_count_reads_3.1.1/container/Dockerfile +++ b/waltz_count_reads_3.1.1/container/Dockerfile @@ -5,14 +5,14 @@ FROM java:8 ################## ARGUMENTS ####################### ARG BUILD_DATE ARG BUILD_VERSION=1.0.0 -ARG WALTZ_VERSION_cmd=3.1.1 +ARG WALTZ_VERSION_cmd=3.2.0 ################## LABELS ####################### LABEL org.opencontainers.image.authors='Shalabh Suman (sumans@mskcc.org)' LABEL org.opencontainers.image.created=$BUILD_DATE #LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/blob/master/Waltz-2.0.jar" -LABEL org.opencontainers.image.source = "https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar" +LABEL org.opencontainers.image.source = "https://github.com/msk-access/cwl_commandlinetools" LABEL org.opencontainers.image.revision = "10037a1" LABEL org.opencontainers.image.title = "waltz" LABEL org.opencontainers.image.description = "Syntax to build image: docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') -t mskcc/waltz:1.0.0 ." @@ -24,7 +24,7 @@ ENV WALTZ_VERSION=${WALTZ_VERSION_cmd} # Install Waltz #RUN wget https://github.com/juberpatel/Waltz/blob/master/Waltz-${WALTZ_VERSION}.jar -RUN wget https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-${WALTZ_VERSION}.jar +RUN wget https://github.com/mskcc/Waltz/releases/download/v${WALTZ_VERSION}/Waltz-${WALTZ_VERSION}.jar RUN mv Waltz-${WALTZ_VERSION}.jar /usr/local/bin/Waltz.jar #ENV PATH=$PATH:/usr/local/bin/Waltz.jar \ No newline at end of file diff --git a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl index b06321c7..f177b5d5 100644 --- a/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl +++ b/waltz_count_reads_3.1.1/waltz_count_reads_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_count_reads baseCommand: - java @@ -33,15 +34,15 @@ inputs: inputBinding: position: 4 outputs: - - id: covered_regions + - id: waltz_count_reads_covered_regions type: File outputBinding: glob: '*.covered-regions' - - id: fragment_sizes + - id: waltz_count_reads_fragment_sizes type: File outputBinding: glob: '*.fragment-sizes' - - id: read_counts + - id: waltz_count_reads_read_counts type: File outputBinding: glob: '*.read-counts' @@ -75,10 +76,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' diff --git a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl index 65a73de9..1fdcdb1d 100644 --- a/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl +++ b/waltz_pileupmatrices_3.1.1/waltz_pileupmatrices_3.1.1.cwl @@ -3,8 +3,9 @@ cwlVersion: v1.0 $namespaces: dct: 'http://purl.org/dc/terms/' doap: 'http://usefulinc.com/ns/doap#' + edam: 'http://edamontology.org/' foaf: 'http://xmlns.com/foaf/0.1/' - edam: http://edamontology.org/ + sbg: 'https://www.sevenbridges.com/' id: waltz_pileupmetrics baseCommand: - java @@ -37,19 +38,19 @@ inputs: inputBinding: position: 13 outputs: - - id: pileup + - id: waltz_pileupmetrics_pileup type: File outputBinding: glob: '*-pileup.txt' - - id: pileup_without_duplicates + - id: waltz_pileupmetrics_pileup_without_duplicates type: File outputBinding: glob: '*-pileup-without-duplicates.txt' - - id: intervals + - id: waltz_pileupmetrics_intervals type: File outputBinding: glob: '*-intervals.txt' - - id: intervals_without_duplicates + - id: waltz_pileupmetrics_intervals_without_duplicates type: File outputBinding: glob: '*-intervals-without-duplicates.txt' @@ -87,10 +88,8 @@ requirements: - class: ResourceRequirement ramMin: 8000 coresMin: 1 -# ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 8000 + inputs.memory_overhead\r }\r else {\r \r \treturn 8000 \r }\r}" -# coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 1\r }\r}" - class: DockerRequirement - dockerPull: 'mskcc/waltz:1.0.0' + dockerPull: 'ghcr.io/msk-access/waltz:3.1.1' - class: InlineJavascriptRequirement 'dct:contributor': - class: 'foaf:Organization' @@ -110,4 +109,3 @@ requirements: - class: 'doap:Version' 'doap:name': waltz 'doap:revision': 3.1.1 -