diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..d4a2c440
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,21 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+
+[LICENSE]
+insert_final_newline = false
+
+[Makefile]
+indent_style = tab
diff --git a/.gitbook.yaml b/.gitbook.yaml
new file mode 100644
index 00000000..afdeba9f
--- /dev/null
+++ b/.gitbook.yaml
@@ -0,0 +1 @@
+root: ./docs
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 00000000..14740fbe
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,15 @@
+* cwl-commandlinetools version:
+* Python version:
+* Operating System:
+
+### Description
+
+Describe what you were trying to get done.
+Tell us what happened, what went wrong, and what you expected to happen.
+
+### What I Did
+
+```
+Paste the command(s) you ran and the output.
+If there was a crash, please include the traceback here.
+```
diff --git a/.gitignore b/.gitignore
index 76d33366..de23585e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
# Distribution / packaging
.Python
+env/
build/
develop-eggs/
dist/
@@ -81,8 +82,10 @@ celerybeat-schedule
# SageMath parsed files
*.sage.py
-# Environments
+# dotenv
.env
+
+# virtualenv
.venv
env/
venv/
@@ -108,7 +111,6 @@ venv.bak/
# vscode
.vscode/*
-!.vscode/settings.json
-!.vscode/tasks.json
-!.vscode/launch.json
-!.vscode/extensions.json
\ No newline at end of file
+
+# pycharm
+.idea
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..c0504869
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,31 @@
+# Config file for automatic testing at travis-ci.org
+
+language: python
+python:
+ - 3.7
+ - 2.7
+
+# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install:
+ - pip install -U tox-travis
+ - pip install -r requirements.txt
+
+# Command to run tests, e.g. python setup.py test
+script:
+ - find . -name '*.cwl' | xargs -n 1 cwltool --validate
+
+# Assuming you have installed the travis-ci CLI tool, after you
+# create the Github repo and add it to Travis, run the
+# following command to finish PyPI deployment setup:
+# $ travis encrypt --add deploy.password
+deploy:
+ provider: pypi
+ distributions: sdist bdist_wheel
+ user: __token__
+ password:
+ secure: g9Ja5QDLc1WGu50xpmXl6wcP7qRNzfYZk7i3PEJtQNO6JLPtxEmBgDAb4+RedRxLo9MRmws/n/bFkTOSP837d+tJ91cYN6TFbVu2teWiR6hblDX/Twhbceq/MjdYJyAVsH+KpuORjuJGqzk2I4QLzI+B/0mXuWcE4EPaCZ5mpm0aYYOTLW1Ukxl1j/PoV8wWC2glItLQ02zIvLyr276+en+RAdWYwqW8sY7rn4hI6VaM78OMsc2/cvG27X82SX4rBxJ3/VveslAc3O7Kck02ltOPyOLI3w++HEVvhHAaCK3kDxNEYQCMly1lDYWTfAGm2F5TZ5mgt2adb08AN//0GnWQOfciHh3JUrIt7po7B5Zs8kmZNGGTJFog8o+btU4pAeCDIt61lFyMo7VVpvPzR4ToiGP3zBvGEgnZd7WpTI0H0E4oc821vl9SAN+3aWQhDxDHl+z3VDwpZTA18mgQikFNc7asKDSXCAGoStI/YFWjw3X+tvFGMXR+R6dpmeSplFFSOx9L3TbrtymWProH8MOyxSVNDdQG6Vz41bN9IS47GRI+/1A9jXxwGurKY1ZL7HZDApDx42Fn2RdOFQNyLCeCneI+RUXtkHY56tH3GpBmnnJX6bKPrn4+VIbgd1VCahPrG8INqxx+SE4JojdIQHcxDy924PCL8mS4hakE4Z0=
+ on:
+ tags: true
+ repo: msk-access/cwl-commandlinetools
+ python: 2.7
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..e994ec71
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,16 @@
+Apache Software License 2.0
+
+Copyright (c) 2019, msk-access
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..405d5d7e
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,9 @@
+include LICENSE
+include README.md
+
+recursive-include tests *
+recursive-include *.cwl
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+
+recursive-include docs *.jpg *.png *.gif
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..4dba77a2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,77 @@
+.PHONY: clean clean-test clean-pyc clean-build docs help
+.DEFAULT_GOAL := help
+
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+
+try:
+ from urllib import pathname2url
+except:
+ from urllib.request import pathname2url
+
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+ match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+ if match:
+ target, help = match.groups()
+ print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+help:
+ @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
+
+clean-build: ## remove build artifacts
+ rm -fr build/
+ rm -fr dist/
+ rm -fr .eggs/
+ find . -name '*.egg-info' -exec rm -fr {} +
+ find . -name '*.egg' -exec rm -f {} +
+
+clean-pyc: ## remove Python file artifacts
+ find . -name '*.pyc' -exec rm -f {} +
+ find . -name '*.pyo' -exec rm -f {} +
+ find . -name '*~' -exec rm -f {} +
+ find . -name '__pycache__' -exec rm -fr {} +
+
+clean-test: ## remove test and coverage artifacts
+ rm -fr .tox/
+ rm -f .coverage
+ rm -fr htmlcov/
+ rm -fr .pytest_cache
+
+lint: ## check style with flake8
+ flake8 cwl_commandlinetools tests
+
+test: ## run tests quickly with the default Python
+ pytest
+
+test-all: ## run tests on every Python version with tox
+ tox
+
+coverage: ## check code coverage quickly with the default Python
+ coverage run --source cwl_commandlinetools -m pytest
+ coverage report -m
+ coverage html
+ $(BROWSER) htmlcov/index.html
+
+release: dist ## package and upload a release
+ twine upload dist/*
+
+dist: clean ## builds source and wheel package
+ python setup.py sdist
+ python setup.py bdist_wheel
+ ls -l dist
+
+install: clean ## install the package to the active Python's site-packages
+ python setup.py install
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..ee39a452
--- /dev/null
+++ b/README.md
@@ -0,0 +1,31 @@
+---
+description: Central location for storing common workflow language based command line tools for building workflows
+---
+
+# Command-line tools built by CCI
+
+- Free software: Apache Software License 2.0
+* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/)
+
+## Features
+
+Create command line tools in common workflow language to generate workflows.
+
+## Installation
+
+Clone the repository:
+
+```
+git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git
+```
+
+**Follow the README in respective tool folder for execution of the tool.**
+
+
+## Credits
+
+- CMO ACCESS Informatics Team
+- This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
+ - Cookiecutter: https://github.com/audreyr/cookiecutter
+ - `audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
+
diff --git a/Workflows/bwa_sort_merge.cwl b/Workflows/bwa_sort_merge.cwl
deleted file mode 100644
index 99b25be9..00000000
--- a/Workflows/bwa_sort_merge.cwl
+++ /dev/null
@@ -1,86 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: bwa_sort_merge
-label: bwa_sort_merge
-
-inputs:
- - id: reference_sequence
- type: File
- secondaryFiles:
- - .amb
- - .ann
- - .bwt
- - .pac
- - .sa
- - .fai
- - id: read_pair
- type:
- type: array
- items:
- items: File
- type: array
- - id: sample_id
- type: string
- - id: lane_id
- type: 'string[]'
-
-outputs:
- - id: sample_id_output
- outputSource:
- - bwa_sort/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: output_md_metrics
- outputSource:
- - gatk_markduplicatesgatk/output_md_metrics
- type: File
- - id: output_md_bam
- outputSource:
- - gatk_markduplicatesgatk/output_md_bam
- type: File
-
-steps:
- - id: samtools_merge
- in:
- - id: input_bams
- source:
- - bwa_sort/output_file
- out:
- - id: output_file
- run: ../CommandLineTools/samtools-merge_1.9/samtools-merge_1.9.cwl
- - id: bwa_sort
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pair
- - id: sample_id
- source: sample_id
- - id: lane_id
- source: lane_id
- out:
- - id: output_file
- - id: sample_id_output
- - id: lane_id_output
- run: ./bwa_sort.cwl
- label: bwa_sort
- scatter:
- - read_pair
- - lane_id
- scatterMethod: dotproduct
- - id: gatk_markduplicatesgatk
- in:
- - id: input_bam
- source: samtools_merge/output_file
- out:
- - id: output_md_bam
- - id: output_md_metrics
- run: ../CommandLineTools/mark-duplicates_4.1.0.0/mark-duplicates_4.1.0.0.cwl
- label: GATK MarkDuplicates
-
-requirements:
- - class: SubworkflowFeatureRequirement
- - class: ScatterFeatureRequirement
diff --git a/Workflows/make_bam.cwl b/Workflows/make_bam.cwl
deleted file mode 100644
index 5d1dfdef..00000000
--- a/Workflows/make_bam.cwl
+++ /dev/null
@@ -1,107 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: make_bam
-label: make_bam
-
-inputs:
- - id: read_pairs_normal
- type:
- type: array
- items:
- items: File
- type: array
- - id: lane_ids_normal
- type: 'string[]'
- - id: reference_sequence
- type: File
- secondaryFiles:
- - .amb
- - .ann
- - .bwt
- - .pac
- - .sa
- - .fai
- - id: sample_id_normal
- type: string
- - id: sample_id_tumor
- type: string
- - id: read_pairs_tumor
- type:
- type: array
- items:
- items: File
- type: array
- - id: lane_ids_tumor
- type: 'string[]'
-
-outputs:
- - id: sample_id_output_normal
- outputSource:
- - make_bam_Normal/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: normal_bam
- outputSource:
- - make_bam_Normal/output_md_bam
- type: File
- - id: sample_id_output_tumor
- outputSource:
- - make_bam_Tumor/sample_id_output
- type:
- - string
- - type: array
- items: string
- - id: tumor_bam
- outputSource:
- - make_bam_Tumor/output_md_bam
- type: File
- - id: tumor_metrics
- outputSource:
- - make_bam_Tumor/output_md_metrics
- type: File
- - id: normal_metrics
- outputSource:
- - make_bam_Normal/output_md_metrics
- type: File
-
-steps:
- - id: make_bam_Normal
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pairs_normal
- - id: sample_id
- source: sample_id_normal
- - id: lane_id
- source:
- - lane_ids_normal
- out:
- - id: sample_id_output
- - id: output_md_metrics
- - id: output_md_bam
- run: ./bwa_sort_merge.cwl
- label: make_bam_Normal
- - id: make_bam_Tumor
- in:
- - id: reference_sequence
- source: reference_sequence
- - id: read_pair
- source:
- - read_pairs_tumor
- - id: sample_id
- source: sample_id_tumor
- - id: lane_id
- source:
- - lane_ids_tumor
- out:
- - id: sample_id_output
- - id: output_md_metrics
- - id: output_md_bam
- run: ./bwa_sort_merge.cwl
- label: make_bam_Tumor
-requirements:
- - class: SubworkflowFeatureRequirement
diff --git a/Workflows/msisensor-run-both.cwl b/Workflows/msisensor-run-both.cwl
deleted file mode 100644
index a9d2f373..00000000
--- a/Workflows/msisensor-run-both.cwl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env cwl-runner
-
-$namespaces:
- dct: http://purl.org/dc/terms/
- foaf: http://xmlns.com/foaf/0.1/
- doap: http://usefulinc.com/ns/doap#
-
-cwlVersion: v1.0
-
-class: Workflow
-id: msisensor-run-both
-requirements:
- StepInputExpressionRequirement: {}
- MultipleInputFeatureRequirement: {}
- ScatterFeatureRequirement: {}
- SubworkflowFeatureRequirement: {}
- InlineJavascriptRequirement: {}
-
-inputs:
- normal_bam:
- type: File
- secondaryFiles: [ ".bai" ]
- tumor_bam:
- type: File
- secondaryFiles: [ ".bai" ]
- output_prefix: string
- msi_file: File
-
-outputs:
- msisensor_0.2_output:
- type: File
- outputSource: msisensor_0.2/output
-
- msisensor_0.6_output:
- type: File
- outputSource: msisensor_0.6/output
-
-steps:
- msisensor_0.2:
- run: ../CommandLineTools/msisensor_0.2/msisensor-0.2.cwl
- in:
- output_prefix: output_prefix
- d: msi_file
- n: normal_bam
- t: tumor_bam
- o:
- valueFrom: ${ return inputs.output_prefix + "_0.2.txt"; }
- out: [ output ]
-
- msisensor_0.6:
- run: ../CommandLineTools/msisensor_0.6/msisensor-0.6.cwl
- in:
- output_prefix: output_prefix
- d: msi_file
- n: normal_bam
- t: tumor_bam
- o:
- valueFrom: ${ return inputs.output_prefix + "_0.6.txt"; }
- out: [ output ]
diff --git a/Workflows/mutect_wf.cwl b/Workflows/mutect_wf.cwl
deleted file mode 100644
index c046e18a..00000000
--- a/Workflows/mutect_wf.cwl
+++ /dev/null
@@ -1,65 +0,0 @@
-class: Workflow
-cwlVersion: v1.0
-id: mutect_wf
-label: mutect_wf
-inputs:
- - id: scatter-count
- type: int?
- - id: output
- type: string
- - id: intervals
- type: File?
- - id: reference
- type: File?
- - id: tumor_sample
- type: string?
- - id: normal_sample
- type: string?
- - id: input_normal
- type: File?
- - id: input_tumor
- type: File?
-
-outputs:
- - id: output_1
- outputSource:
- - mutect2/output
- type: 'File[]?'
-
-steps:
- - id: scatterintervals
- in:
- - id: reference
- source: reference
- - id: intervals
- source: intervals
- - id: scatter-count
- source: scatter-count
- - id: output
- source: output
- out:
- - id: interval_files
- run: ../CommandLineTools/scatterintervals_4.1.0.0/scatterintervals_4.1.0.0.cwl
- label: ScatterIntervals
- - id: mutect2
- in:
- - id: reference
- source: reference
- - id: intervals
- source: scatterintervals/interval_files
- - id: input
- source: input_tumor
- - id: tumor_sample
- source: tumor_sample
- - id: input_normal
- source: input_normal
- - id: normal_sample
- source: normal_sample
- out:
- - id: output
- run: ../CommandLineTools/mutect2_4.1.0.0/mutect2_4.1.0.0.cwl
- label: Mutect2
- scatter:
- - intervals
-requirements:
- - class: ScatterFeatureRequirement
diff --git a/Workflows/snp-pileup-to-facets.cwl b/Workflows/snp-pileup-to-facets.cwl
deleted file mode 100644
index 619a8753..00000000
--- a/Workflows/snp-pileup-to-facets.cwl
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env cwl-runner
-
-class: Workflow
-cwlVersion: v1.0
-
-requirements:
- InlineJavascriptRequirement: {}
- StepInputExpressionRequirement: {}
- SubworkflowFeatureRequirement: {}
- MultipleInputFeatureRequirement: {}
- ScatterFeatureRequirement: {}
-
-inputs:
- facets_vcf:
- type: File
- secondaryFiles:
- - .gz
-
- bam_normal:
- type: File
-
- bam_tumor:
- type: File
-
- tumor_sample_name:
- type: string
-
-outputs:
-
- snp_pileup_out:
- type: File
- outputSource: do_snp_pileup/output_file
-
- facets_png:
- type: File[]?
- outputSource: do_facets/png_files
-
- facets_txt_purity:
- type: File?
- outputSource: do_facets/txt_files_purity
-
- facets_txt_hisens:
- type: File?
- outputSource: do_facets/txt_files_hisens
-
- facets_out_files:
- type: File[]?
- outputSource: do_facets/out_files
-
- facets_rdata:
- type: File[]?
- outputSource: do_facets/rdata_files
-
- facets_seg:
- type: File[]?
- outputSource: do_facets/seg_files
-
-steps:
- do_snp_pileup:
- run: ../CommandLineTools/snp-pileup_0.1.1/htstools-0.1.1.cwl
- in:
- vcf_file: facets_vcf
- bam_normal: bam_normal
- bam_tumor: bam_tumor
- output_file:
- valueFrom: ${ return inputs.bam_normal.basename.replace(".bam", "") + "_" + inputs.bam_tumor.basename.replace(".bam", "") + ".dat.gz"; }
- out: [ output_file ]
-
- do_facets:
- run: ../CommandLineTools/facets_1.5.6/facets.doFacets-1.5.6.cwl
- in:
- genome:
- valueFrom: ${ return "hg19"; }
- counts_file: do_snp_pileup/output_file
- TAG:
- valueFrom: ${ return inputs.counts_file.basename.replace(".dat.gz", ""); }
- tumor_id: tumor_sample_name
- directory:
- valueFrom: ${ return "."; }
- out: [ png_files, txt_files_purity, txt_files_hisens, out_files, rdata_files, seg_files ]
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 00000000..d62ed77c
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+"""Top-level package for cwl-commandlinetools."""
+
+__author__ = """msk-access"""
+__email__ = 'msk.access@gmail.com'
+__version__ = '1.1.1'
diff --git a/abra2_2.17/abra2_2.17.cwl b/abra2_2.17/abra2_2.17.cwl
index e9ac545e..0f31e4d4 100644
--- a/abra2_2.17/abra2_2.17.cwl
+++ b/abra2_2.17/abra2_2.17.cwl
@@ -184,7 +184,7 @@ requirements:
ramMin: 60000
coresMin: 16
- class: DockerRequirement
- dockerPull: 'mskcc/abra2:0.1.0'
+ dockerPull: 'ghcr.io/msk-access/abra2:0.6.3'
- class: InlineJavascriptRequirement
'dct:contributor':
- class: 'foaf:Organization'
diff --git a/abra2_2.17/container/Dockerfile b/abra2_2.17/container/Dockerfile
index 9f1bc82f..e7c64df9 100644
--- a/abra2_2.17/container/Dockerfile
+++ b/abra2_2.17/container/Dockerfile
@@ -20,7 +20,7 @@ LABEL org.opencontainers.image.created=${BUILD_DATE} \
org.opencontainers.image.licenses=${LICENSE} \
org.opencontainers.image.version.java=${JAVA_VERSION} \
org.opencontainers.image.version.abra2=${ABRA2_VERSION} \
- org.opencontainers.image.source.abra2="https://github.com/mozack/abra2/releases/"
+ org.opencontainers.image.source="https://github.com/mozack/abra2/releases/"
LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}"
@@ -37,4 +37,4 @@ RUN apt-get update && \
RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \
chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \
- cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
\ No newline at end of file
+ cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
diff --git a/abra2_2.19/abra2_2.19.cwl b/abra2_2.19/abra2_2.19.cwl
index 8c9be47a..1af3c194 100644
--- a/abra2_2.19/abra2_2.19.cwl
+++ b/abra2_2.19/abra2_2.19.cwl
@@ -4,7 +4,8 @@ $namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
-id: abra2_2.19
+ sbg: 'https://www.sevenbridges.com/'
+id: abra2_2_19
baseCommand:
- java
inputs:
@@ -173,7 +174,7 @@ requirements:
ramMin: "${\r if(inputs.memory_per_job && inputs.memory_overhead) {\r \r return inputs.memory_per_job + inputs.memory_overhead\r }\r else if (inputs.memory_per_job && !inputs.memory_overhead){\r \r \treturn inputs.memory_per_job + 2000\r }\r else if(!inputs.memory_per_job && inputs.memory_overhead){\r \r return 15000 + inputs.memory_overhead\r }\r else {\r \r \treturn 17000 \r }\r}"
coresMin: "${\r if (inputs.number_of_threads) {\r \r \treturn inputs.number_of_threads \r }\r else {\r \r return 4\r }\r}"
- class: DockerRequirement
- dockerPull: 'mskcc/abra2:0.2.0'
+ dockerPull: 'aphoid/abra2:2.19'
- class: InlineJavascriptRequirement
'dct:contributor':
- class: 'foaf:Organization'
@@ -193,6 +194,3 @@ requirements:
- class: 'doap:Version'
'doap:name': abra2
'doap:revision': 2.19
- - class: 'doap:Version'
- 'doap:name': cwl-wrapper
- 'doap:revision': 1.0.0
diff --git a/abra2_2.22/abra2_2.22.cwl b/abra2_2.22/abra2_2.22.cwl
new file mode 100644
index 00000000..e24cb107
--- /dev/null
+++ b/abra2_2.22/abra2_2.22.cwl
@@ -0,0 +1,246 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: abra2_2_22
+baseCommand:
+ - java
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input_bam
+ type:
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: '--in'
+ doc: Required list of input sam or bam file (s) separated by comma
+ secondaryFiles:
+ - ^.bai
+ - id: working_directory
+ type: string?
+ doc: Set the temp directory (overrides java.io.tmpdir)
+ - id: reference_fasta
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--ref'
+ doc: Genome reference location
+ secondaryFiles:
+ - .fai
+ - id: targets
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--targets'
+ - id: kmer_size
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--kmer'
+ doc: >-
+ Optional assembly kmer size(delimit with commas if multiple sizes
+ specified)
+ - id: maximum_average_depth
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--mad'
+ doc: >-
+ Regions with average depth exceeding this value will be downsampled
+ (default: 1000)
+ - id: soft_clip_contig
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sc'
+ doc: >-
+ Soft clip contig args
+ [max_contigs,min_base_qual,frac_high_qual_bases,min_soft_clip_len]
+ (default:16,13,80,15)
+ - id: maximum_mixmatch_rate
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--mmr'
+ doc: >-
+ Max allowed mismatch rate when mapping reads back to contigs (default:
+ 0.05)
+ - id: scoring_gap_alignments
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sga'
+ doc: >-
+ Scoring used for contig alignments(match,
+ mismatch_penalty,gap_open_penalty,gap_extend_penalty) (default:8,32,48,1)
+ - id: contig_anchor
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--ca'
+ doc: >-
+ Contig anchor [M_bases_at_contig_edge,max_mismatches_near_edge]
+ (default:10,2)
+ - id: window_size
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--ws'
+ doc: |-
+ Processing window size and overlap
+ (size,overlap) (default: 400,200)
+ - id: consensus_sequence
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--cons'
+ doc: Use positional consensus sequence when aligning high quality soft clipping
+ - id: output_bams
+ type:
+ - string
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: '--out'
+ doc: Required list of output sam or bam file (s) separated by comma
+ - id: ignore_bad_assembly
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--ignore-bad-assembly'
+ doc: Use this option to avoid parsing errors for corrupted assemblies
+ - id: bam_index
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--index'
+ doc: >-
+ Enable BAM index generation when outputting sorted alignments (may require
+ additonal memory)
+ - id: input_vcf
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--in-vcf'
+ doc: >-
+ VCF containing known (or suspected) variant sites. Very large files
+ should be avoided.
+ - id: no_edge_complex_indel
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-edge-ci'
+ doc: Prevent output of complex indels at read start or read end
+ - id: no_sort
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--nosort'
+ doc: Do not attempt to sort final output
+outputs:
+ - id: abra_realigned_bam
+ type:
+ - 'null'
+ - File
+ - type: array
+ items: File
+ outputBinding:
+ glob: |-
+ ${
+ return inputs.output_bams
+ }
+ secondaryFiles:
+ - ^.bai
+label: abra2_2.22
+arguments:
+ - position: 0
+ valueFrom: |-
+ ${
+ if (inputs.memory_per_job && inputs.memory_overhead) {
+
+ if (inputs.memory_per_job % 1000 == 0) {
+
+ return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G"
+ }
+ else {
+
+ return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G"
+ }
+ }
+ else if (inputs.memory_per_job && !inputs.memory_overhead) {
+
+ if (inputs.memory_per_job % 1000 == 0) {
+
+ return "-Xmx" + (inputs.memory_per_job / 1000).toString() + "G"
+ }
+ else {
+
+ return "-Xmx" + Math.floor((inputs.memory_per_job / 1000)).toString() + "G"
+ }
+ }
+ else if (!inputs.memory_per_job && inputs.memory_overhead) {
+
+ return "-Xmx20G"
+ }
+ else {
+
+ return "-Xmx20G"
+ }
+ }
+ - position: 0
+ prefix: '-jar'
+ valueFrom: /usr/local/bin/abra2.jar
+ - position: 0
+ prefix: '--threads'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+ - position: 0
+ prefix: '--tmpdir'
+ valueFrom: |-
+ ${
+ if(inputs.working_directory)
+ return inputs.working_directory;
+ return runtime.tmpdir
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 60000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/abra2:2.22'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:kumarn1@mskcc.org'
+ 'foaf:name': Nikhil Kumar
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': abra2
+ 'doap:revision': 2.22
diff --git a/abra2_2.22/container/Dockerfile b/abra2_2.22/container/Dockerfile
new file mode 100644
index 00000000..0ec95e41
--- /dev/null
+++ b/abra2_2.22/container/Dockerfile
@@ -0,0 +1,40 @@
+################## BASE IMAGE ######################
+
+FROM openjdk:8
+
+################## ARGUMENTS########################
+
+ARG BUILD_DATE
+ARG BUILD_VERSION
+ARG LICENSE="Apache-2.0"
+ARG JAVA_VERSION=8
+ARG ABRA2_VERSION=2.22
+
+################## METADATA ########################
+
+LABEL org.opencontainers.image.vendor="MSKCC"
+LABEL org.opencontainers.image.authors="Nikhil Kumar (kumarn1@mskcc.org)"
+
+LABEL org.opencontainers.image.created=${BUILD_DATE} \
+ org.opencontainers.image.version=${BUILD_VERSION} \
+ org.opencontainers.image.licenses=${LICENSE} \
+ org.opencontainers.image.version.java=${JAVA_VERSION} \
+ org.opencontainers.image.version.abra2=${ABRA2_VERSION} \
+ org.opencontainers.image.source="https://github.com/mozack/abra2/releases/"
+
+LABEL org.opencontainers.image.description="This container uses OPENJDK ${JAVA_VERSION} as the base image to build abra2 version ${ABRA2_VERSION}"
+
+################## INSTALL ##########################
+
+WORKDIR /usr/src
+
+RUN apt-get update && \
+ apt-get --no-install-recommends install -y \
+ wget && \
+ apt-get clean autoclean && \
+ apt-get autoremove -y && \
+ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+RUN wget "https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar" && \
+ chmod 755 /usr/src/abra2-${ABRA2_VERSION}.jar && \
+ cp -s /usr/src/abra2-${ABRA2_VERSION}.jar /usr/local/bin/abra2.jar
diff --git a/abra2_2.22/example_inputs.yaml b/abra2_2.22/example_inputs.yaml
new file mode 100644
index 00000000..928aafd8
--- /dev/null
+++ b/abra2_2.22/example_inputs.yaml
@@ -0,0 +1,30 @@
+bam_index: true
+no_edge_complex_indel: true
+consensus_sequence:
+contig_anchor:
+ignore_bad_assembly:
+input_bam:
+ class: File
+ path: "path/to/alignment.bam"
+input_vcf:
+kmer_size:
+maximum_average_depth:
+maximum_mixmatch_rate:
+memory_overhead:
+memory_per_job:
+no_sort:
+number_of_threads:
+output_bam: name_of_realigned_abra.bam
+path_to_abra:
+reference_fasta:
+ class: File
+ path: "/path/to/reference.fasta"
+scoring_gap_alignments:
+soft_clip_contig:
+targets:
+ class: File
+ metadata: {}
+ path: "/path/to/target.bed"
+ secondaryFiles: []
+window_size:
+working_directory:
diff --git a/access_utils/0.1.1/README.md b/access_utils/0.1.1/README.md
new file mode 100644
index 00000000..45bd9f8b
--- /dev/null
+++ b/access_utils/0.1.1/README.md
@@ -0,0 +1,32 @@
+# CWL and Dockerfile for running merge_fastq
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| access_utils | 0.1.1 | |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.json to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner general_stats_parse.cwl example_inputs.json
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner general_stats_parse.cwl -h
+```
diff --git a/access_utils/0.1.1/example_inputs.yaml b/access_utils/0.1.1/example_inputs.yaml
new file mode 100644
index 00000000..27fccad3
--- /dev/null
+++ b/access_utils/0.1.1/example_inputs.yaml
@@ -0,0 +1,2 @@
+dir: /path/to/sample_info_directory
+samples-json: /path/to/sample_meta.json
diff --git a/access_utils/0.1.1/general_stats_parse.cwl b/access_utils/0.1.1/general_stats_parse.cwl
new file mode 100644
index 00000000..5116e6f3
--- /dev/null
+++ b/access_utils/0.1.1/general_stats_parse.cwl
@@ -0,0 +1,67 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: general_stats_parse
+baseCommand:
+ - general_stats_parse.py
+arguments:
+ - --dir
+ - .
+inputs:
+ - id: directory
+ type: Directory
+ doc: Directory containing results.
+ - id: samples-json
+ type: File
+ inputBinding:
+ prefix: '--samples-json'
+ doc: Sample JSON file.
+ - id: config
+ type: File?
+ inputBinding:
+ prefix: '--config'
+ doc: MultQC config file.
+outputs:
+ - id: aggregate_parsed_stats
+ label: aggregate_parsed_stats
+ type: Directory
+ outputBinding:
+ glob: .
+ outputEval: |-
+ ${
+ self[0].basename = "aggregate_qc_stats";
+ return self[0]
+ }
+label: general_stats_parse
+requirements:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/access_utils:0.1.1'
+ - class: InitialWorkDirRequirement
+ listing:
+ - entry: $(inputs.directory)
+ writable: true
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/athena/1.4.2/annotate_bed/annotate_bed.cwl b/athena/1.4.2/annotate_bed/annotate_bed.cwl
new file mode 100755
index 00000000..78002b99
--- /dev/null
+++ b/athena/1.4.2/annotate_bed/annotate_bed.cwl
@@ -0,0 +1,98 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: annotate_bed
+baseCommand:
+ - python
+ - /app/bin/annotate_bed.py
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ doc: 'worker thread number'
+ - id: panel_bed
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: >-
+ Input panel bed file; must have ONLY the following 4 columns chromosome,
+ start position, end position, gene/transcript
+ - id: transcript_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-t'
+ doc: >-
+ Transcript annotation file, contains required gene and exon information.
+ Must have ONLY the following 6 columns:
+
+ chromosome, start, end, gene, transcript, exon
+ - id: coverage_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-c'
+ doc: Per base coverage file (output from mosdepth or similar)
+ - id: chunk_size
+ type: int?
+ inputBinding:
+ position: 999
+ prefix: '-s'
+ - id: output_name
+ type: string?
+ inputBinding:
+ position: 960
+ prefix: '-n'
+ doc: >-
+ (optional) Prefix for naming output file, if not given will use name from
+ per base coverage file
+outputs:
+ - id: annotated_bed
+ label: annotated_bed
+ type: File
+ outputBinding:
+ glob: '*.bed'
+label: annotate_bed
+requirements:
+ - class: ResourceRequirement
+ ramMin: 17000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/athena:1.4.2'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/athena/1.4.2/annotate_bed/example_inputs.yaml b/athena/1.4.2/annotate_bed/example_inputs.yaml
new file mode 100644
index 00000000..a770b0a5
--- /dev/null
+++ b/athena/1.4.2/annotate_bed/example_inputs.yaml
@@ -0,0 +1,5 @@
+panel_bed: {class: File, path: path/to/panel_bed.bed}
+transcript_file: {class: File, path: path/to/transcript.bed}
+coverage_file: {class: File, path: path/to/coverage.per-base.bed.gz}
+chunk_size: 10000000
+output_name: 'prefix_name'
diff --git a/athena/1.4.2/annotate_bed/example_inputs_juno.yaml b/athena/1.4.2/annotate_bed/example_inputs_juno.yaml
new file mode 100644
index 00000000..c65dba0d
--- /dev/null
+++ b/athena/1.4.2/annotate_bed/example_inputs_juno.yaml
@@ -0,0 +1,5 @@
+panel_bed: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/panel_bed_file_athena_CH_nodup.bed}
+transcript_file: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/CH_transcript_pad3bp_athena_nodup.bed}
+coverage_file: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/Myeloid200-1-05500HJ_P20.per-base.bed.gz}
+chunk_size: 10000000
+output_name: 'ex1_prefix'
diff --git a/athena/1.4.2/container/README.md b/athena/1.4.2/container/README.md
new file mode 100644
index 00000000..f251d8f4
--- /dev/null
+++ b/athena/1.4.2/container/README.md
@@ -0,0 +1,3 @@
+Dockerfile avaliable via the athena repository: https://github.com/msk-access/athena/blob/master/Dockerfile
+
+Registry contaning image also housed with athena repository: https://github.com/msk-access/athena/pkgs/container/athena
diff --git a/athena/1.4.2/coverage_report_single/coverage_report_single.cwl b/athena/1.4.2/coverage_report_single/coverage_report_single.cwl
new file mode 100755
index 00000000..9411a023
--- /dev/null
+++ b/athena/1.4.2/coverage_report_single/coverage_report_single.cwl
@@ -0,0 +1,157 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: general_stats_parse
+baseCommand:
+ - python
+ - /app/bin/coverage_report_single.py
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ doc: 'worker thread number'
+ - id: exon_stats
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-e'
+ doc: per exon statistics file (from `coverage_stats_single.py`)
+ - id: gene_stats
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-g'
+ doc: per gene statistics file (from `coverage_stats_single.py`)
+ - id: raw_coverage
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: >-
+ annotated bed file with coverage data (generated from annotate_bed.sh /
+ bedtools intersect)
+ - id: per_base_coverage
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-b'
+ doc: >-
+ Per-base coverage bed file from mosdepth. (Optional; if not submitted,
+ plots displaying global coverage per chromosome will not be displayed)
+ - id: snps
+ type:
+ - 'null'
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 999
+ prefix: '-s'
+ doc: 'VCF(s) of known SNPs to check coverage of (optional; i.e. HGMD, ClinVar)'
+ - id: threshold
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-t'
+ doc: >-
+ threshold value defining sub-optimal coverage (optional; default if not
+ given: 20)
+ - id: sample_name
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-n'
+ - id: output
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-o'
+ doc: name for output report (optional; sample name will be used if not given)
+ - id: panel
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: >-
+ panel bed file used for initial annotation, name will be displayed in
+ summary of report (optional)
+ - id: limit
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-l'
+ doc: >-
+ number of genes at which to limit including full gene plots, large numbers
+ of genes may take a long time to generate the plots (optional)
+ - id: summary
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-m'
+ doc: >-
+ boolean flag to add clinical report summary text in summary section,
+ includes list of all genes with transcripts (optional; default False)
+outputs:
+ - id: coverage_report_single
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '*_report.html'
+ } else {
+ return '*_report.html'
+ }
+ }
+label: coverage_report_single
+arguments:
+ - position: 0
+ prefix: '--cores'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 25000
+ coresMin: 6
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/athena:1.4.2'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/athena/1.4.2/coverage_report_single/example_inputs.yaml b/athena/1.4.2/coverage_report_single/example_inputs.yaml
new file mode 100644
index 00000000..202a5647
--- /dev/null
+++ b/athena/1.4.2/coverage_report_single/example_inputs.yaml
@@ -0,0 +1,14 @@
+raw_coverage: {class: File, path: /path/to/annotated_bed/file.bed}
+gene_stats: {class: File, path: /path/to/gene_stats.tsv}
+exon_stats: {class: File, path: /path/to/exon_stats.tsv}
+per_base_coverage: {class: File, path: /path/to/per_base/mosdepth/file.bed}
+snps:
+ - {class: File, path: /path/to/vcfs/known1.vcf}
+ - {class: File, path: /path/to/vcfs/known2.vcf}
+threshold: 20
+sample_name: 'report1'
+output: 'report1'
+panel: {class: File, path: path/to/panel_bed.bed}
+limit: 20
+summary: true
+cores: 4
diff --git a/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml b/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml
new file mode 100644
index 00000000..0beae6b4
--- /dev/null
+++ b/athena/1.4.2/coverage_report_single/example_inputs_juno.yaml
@@ -0,0 +1,11 @@
+raw_coverage: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_annotated.bed}
+gene_stats: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_gene_stats.tsv}
+exon_stats: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_exon_stats.tsv}
+per_base_coverage: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/Myeloid200-1-05500HJ_P20.per-base.bed.gz}
+panel: {class: File, path: /work/access/testing/users/buehlere/athena_test/athena/test/panel_bed_file_athena_CH_nodup.bed}
+threshold: 20
+sample_name: 'report1'
+output: 'report1'
+limit: 20
+summary: true
+cores: 4
diff --git a/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl b/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl
new file mode 100755
index 00000000..7be30e2a
--- /dev/null
+++ b/athena/1.4.2/coverage_stats_single/coverage_stats_single.cwl
@@ -0,0 +1,119 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: general_stats_parse
+baseCommand:
+ - python
+ - /app/bin/coverage_stats_single.py
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ doc: 'worker thread number'
+ - id: file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--file'
+ doc: annotated bed file on which to generate report from
+ - id: build
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--build'
+ doc: >-
+ text file with build number used for alignment, output from mosdepth
+ (optional) chromosome, start, end, gene, transcript, exon
+ - id: outfile
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--outfile'
+ doc: >-
+ output file name prefix, if not given the input file name will be used as
+ the name prefix
+ - id: thresholds
+ type: 'int[]?'
+ inputBinding:
+ position: 999
+ prefix: '--thresholds'
+ doc: >-
+ threshold values to calculate coverage for as comma seperated integers
+ (default: 10, 20, 30, 50, 100)
+ - id: output_name
+ type: string?
+ inputBinding:
+ position: 900
+ prefix: '--n'
+ doc: >-
+ (optional) Prefix for naming output file, if not given will use name from
+ per base coverage file
+ - id: flagstat
+ type: string?
+ inputBinding:
+ position: 900
+ prefix: '--flagstat'
+ doc: 'file for sample, required for generating run statistics (in development)'
+outputs:
+ - id: exon_stats_output
+ label: exon_stats_output
+ type: File
+ outputBinding:
+ glob: '*_exon_stats.tsv'
+ - id: gene_stats_output
+ label: gene_stats_output
+ type: File
+ outputBinding:
+ glob: '*_gene_stats.tsv'
+label: general_stats_parse
+arguments:
+ - position: 0
+ prefix: '--cores'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 25000
+ coresMin: 6
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/athena:1.4.2'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:charlk@mskcc.org'
+ 'foaf:name': Carmelina Charlambous
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/athena/1.4.2/coverage_stats_single/example_inputs.yaml b/athena/1.4.2/coverage_stats_single/example_inputs.yaml
new file mode 100644
index 00000000..871cf071
--- /dev/null
+++ b/athena/1.4.2/coverage_stats_single/example_inputs.yaml
@@ -0,0 +1,5 @@
+file: {class: File, path: /path/to/annotated_bed/file.bed}
+build: {class: File, path: /path/to/mosdepth/build/file.txt}
+flagstat: {class: File, path: /path/to/flagstat/file.txt}
+threshold: [10, 20, 30, 50, 100]
+cores: 4
diff --git a/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml b/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml
new file mode 100644
index 00000000..9dc935a3
--- /dev/null
+++ b/athena/1.4.2/coverage_stats_single/example_inputs_juno.yaml
@@ -0,0 +1,3 @@
+file: {class: File, path: /work/access/testing/users/buehlere/athena_test/Myeloid200-1-05500HJ_annotated.bed}
+thresholds: [10, 20, 30, 50, 100]
+cores: 4
diff --git a/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl
new file mode 100644
index 00000000..60c2c313
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_bgzip_1.15.1.cwl
@@ -0,0 +1,82 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: bgzip
+baseCommand:
+ - bgzip
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: stdout
+ type: boolean
+ inputBinding:
+ position: 0
+ prefix: '-c'
+ doc: Stdandard output for bgzip
+ - id: input
+ type: File
+ inputBinding:
+ position: 10
+ doc: input VCF file
+ - id: output_file_name
+ type: string?
+ doc: Name of the output file
+outputs:
+ - id: zippedVcf
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output_file_name) {
+ return inputs.output_file_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.vcf.gz')
+ }
+ }
+label: bgzip
+requirements:
+ - class: ResourceRequirement
+ ramMin: 8000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InitialWorkDirRequirement
+ listing:
+ - entry: $(inputs.input)
+ - class: InlineJavascriptRequirement
+stdout: |-
+ ${
+ if (inputs.output_file_name) {
+ return inputs.output_file_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.vcf.gz')
+ }
+ }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bgzip
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/bcftools_concat_1.15.1.cwl b/bcftools_1.15.1/bcftools_concat_1.15.1.cwl
new file mode 100644
index 00000000..f2bfc420
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_concat_1.15.1.cwl
@@ -0,0 +1,82 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: bcftools_concat
+baseCommand:
+ - bcftools
+ - concat
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: allow_overlaps
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-a'
+ doc: >-
+ First coordinate of the next file can precede last record of the current
+ file.
+ - id: output_name
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '-o'
+ doc: Output file name
+ - id: output_type
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '-O'
+ doc: >-
+ compressed BCF (b), uncompressed BCF (u), compressed VCF (z), uncompressed
+ VCF (v)
+ - id: input
+ type: 'File[]'
+ inputBinding:
+ position: 100
+ secondaryFiles:
+ - .tbi
+outputs:
+ - id: concatenated_vcf
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ return inputs.output_name
+ }
+label: bcftools_concat
+requirements:
+ - class: ResourceRequirement
+ ramMin: 8000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bcftools concat
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/bcftools_norm_1.15.1.cwl b/bcftools_1.15.1/bcftools_norm_1.15.1.cwl
new file mode 100644
index 00000000..ecc24425
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_norm_1.15.1.cwl
@@ -0,0 +1,101 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: norm
+baseCommand:
+ - bcftools
+ - norm
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: check_ref
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '--check-ref'
+ - id: multiallelics
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '-m'
+ doc: use any
+ - id: output_type
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '-O'
+ - id: output_name
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '-o'
+ - id: input
+ type: File
+ inputBinding:
+ position: 100
+ doc: input vcf file
+ secondaryFiles:
+ - .tbi
+ - id: fastaRef
+ type: File
+ inputBinding:
+ position: 99
+ prefix: '-f'
+ secondaryFiles:
+ - .fai
+outputs:
+ - id: normalized_vcf
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_name) {
+ return inputs.output_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '_norm.vcf')
+ }
+ }
+label: bcftools_norm
+requirements:
+ - class: ResourceRequirement
+ ramMin: 8000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InlineJavascriptRequirement
+stdout: |-
+ ${
+ if(inputs.output_name) {
+ return inputs.output_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '_norm.vcf')
+ }
+ }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bcftools norm
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/bcftools_sort._1.15.1.cwl b/bcftools_1.15.1/bcftools_sort._1.15.1.cwl
new file mode 100644
index 00000000..202fbcd2
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_sort._1.15.1.cwl
@@ -0,0 +1,93 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: bcftools_sort
+baseCommand:
+ - bcftools
+ - sort
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--threads'
+ - id: output_name
+ type: string?
+ doc: Output file name
+ - id: output_type
+ type: string?
+ inputBinding:
+ position: 99
+ prefix: '-O'
+ doc: >-
+ compressed BCF (b), uncompressed BCF (u), compressed VCF (z), uncompressed
+ VCF (v)
+ - id: input
+ type: File
+ inputBinding:
+ position: 100
+ doc: input vcf files
+ secondaryFiles:
+ - .tbi
+outputs:
+ - id: sorted_file
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_name) {
+ return inputs.output_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.sorted.vcf')
+ }
+ }
+label: bcftools_sort
+arguments:
+ - position: 0
+ prefix: '-o'
+ valueFrom: |-
+ ${
+ if(inputs.output_name) {
+ return inputs.output_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.sorted.vcf')
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 8000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InitialWorkDirRequirement
+ listing:
+ - $(inputs.input)
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bcftools sort
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl
new file mode 100644
index 00000000..40e45df6
--- /dev/null
+++ b/bcftools_1.15.1/bcftools_tabix_1.15.1.cwl
@@ -0,0 +1,66 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: tabix
+baseCommand:
+ - tabix
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: preset
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: 'input file type can be gff, bed, sam or vcf'
+ - id: input
+ type: File
+ inputBinding:
+ position: 99
+ doc: Input bgziped file
+outputs:
+ - id: tabixIndex
+ type: File?
+ outputBinding:
+ glob: $(inputs.input.basename)
+ secondaryFiles:
+ - .tbi
+label: tabix
+requirements:
+ - class: ResourceRequirement
+ ramMin: 4000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bcftools:1.15.1'
+ - class: InitialWorkDirRequirement
+ listing:
+ - $(inputs.input)
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': tabix
+ 'doap:revision': 1.15.1
diff --git a/bcftools_1.15.1/example_input_bgzip.yaml b/bcftools_1.15.1/example_input_bgzip.yaml
new file mode 100644
index 00000000..3429f37d
--- /dev/null
+++ b/bcftools_1.15.1/example_input_bgzip.yaml
@@ -0,0 +1,4 @@
+input:
+ class: File
+ path: /path/to/input.vcf
+stdout: True
diff --git a/bcftools_1.15.1/example_input_concat.yaml b/bcftools_1.15.1/example_input_concat.yaml
new file mode 100644
index 00000000..36032292
--- /dev/null
+++ b/bcftools_1.15.1/example_input_concat.yaml
@@ -0,0 +1,10 @@
+input:
+ - class: File
+ path: >-
+ /path/input/vcf.sorted.vcf.gz
+ - class: File
+ path: >-
+ /path/input/vcf.sorted.vcf.gz
+output_name: out_merged.vcf.gz
+output_type: z
+allow_overlaps: True
diff --git a/bcftools_1.15.1/example_input_normvcf.yaml b/bcftools_1.15.1/example_input_normvcf.yaml
new file mode 100644
index 00000000..8832efcb
--- /dev/null
+++ b/bcftools_1.15.1/example_input_normvcf.yaml
@@ -0,0 +1,12 @@
+check-ref: s
+fastaRef:
+ class: File
+ path: >-
+ /juno/work/access/production/resources/reference/current/Homo_sapiens_assembly19.fasta
+input:
+ class: File
+ path: /path/to/input.vcf.gz
+ secondaryFiles: []
+multiallelics: +
+output-name: out_norm.vcf.gz
+output-type: z
diff --git a/bcftools_1.15.1/example_input_sort.yaml b/bcftools_1.15.1/example_input_sort.yaml
new file mode 100644
index 00000000..bc77aeda
--- /dev/null
+++ b/bcftools_1.15.1/example_input_sort.yaml
@@ -0,0 +1,5 @@
+input:
+ class: File
+ path: /path/to/input.vcf.gz
+output_name: out.sorted.vcf.gz
+output_type: z
diff --git a/bcftools_1.15.1/example_input_tabix.yaml b/bcftools_1.15.1/example_input_tabix.yaml
new file mode 100644
index 00000000..2128d691
--- /dev/null
+++ b/bcftools_1.15.1/example_input_tabix.yaml
@@ -0,0 +1,3 @@
+class: File
+ path: /path/to/input.vcf.gz
+preset: vcf
diff --git a/bcftools_1.6/README.md b/bcftools_1.6/README.md
new file mode 100644
index 00000000..770e8751
--- /dev/null
+++ b/bcftools_1.6/README.md
@@ -0,0 +1,69 @@
+# CWL and Dockerfile for running bcftools v1.6
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| alpine:3.8 base image | 3.8 | - |
+| bcftools | 1.6 | quay.io/biocontainers/bcftools:1.6--0 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bcftools_concat_1.6.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir bcftools_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.6.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_concat_1.6.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --allow_overlaps First coordinate of the next file can precede last
+ record of the current file.
+ --compact_PS Do not output PS tag at each site, only at the start
+ of a new phase set block.
+ --ligate Ligate phased VCFs by matching phase at overlapping
+ haplotypes
+ --list LIST Read the list of files from a file.
+ --min_PQ MIN_PQ Break phase set if phasing quality is lower than
+ [30]
+ --naive Concatenate BCF files without recompression
+ (dangerous, use with caution)
+ --no_version do not append version and command line to the header
+ --output OUTPUT Write output to a file [standard output]
+ --output_type OUTPUT_TYPE
+ b - compressed BCF, u - uncompressed BCF, z
+ - compressed VCF, v - uncompressed VCF [v]
+ --regions REGIONS Restrict to comma-separated list of regions
+ --regions_file REGIONS_FILE
+ Restrict to regions listed in a file
+ --remove_duplicates Alias for -d none
+ --rm_dups RM_DUPS Output duplicate records present in multiple
+ files only once -
+ --threads THREADS Number of extra output compression threads [0]
+ --vcf_files_csi VCF_FILES_CSI
+ Array of vcf files to be concatenated into one vcf
+ --vcf_files_tbi VCF_FILES_TBI
+ Array of vcf files to be concatenated into one vcf
+
+```
diff --git a/bcftools_1.6/bcftools_concat_1.6.cwl b/bcftools_1.6/bcftools_concat_1.6.cwl
new file mode 100644
index 00000000..98f4122c
--- /dev/null
+++ b/bcftools_1.6/bcftools_concat_1.6.cwl
@@ -0,0 +1,180 @@
+
+class: CommandLineTool
+cwlVersion: v1.0
+
+$namespaces:
+ dct: http://purl.org/dc/terms/
+ doap: http://usefulinc.com/ns/doap#
+ foaf: http://xmlns.com/foaf/0.1/
+ sbg: https://www.sevenbridges.com/
+
+id: bcftools_concat_v1_6
+
+baseCommand:
+ - /usr/bin/bcftools
+ - concat
+
+doc: |
+ concatenate VCF/BCF files from the same set of samples
+
+inputs:
+
+ memory_per_job:
+ type: ["null",int]
+ doc: Memory per job in megabytes
+
+ memory_overhead:
+ type: ["null",int]
+ doc: Memory overhead per job in megabytes
+
+ threads:
+ type: ["null", string]
+ doc: Number of extra output compression threads [0]
+ inputBinding:
+ prefix: --threads
+
+ compact_PS:
+ type: ["null", boolean]
+ default: false
+ doc: Do not output PS tag at each site, only at the start of a new phase set block.
+ inputBinding:
+ prefix: --compact-PS
+
+ remove_duplicates:
+ type: ["null", boolean]
+ default: false
+ doc: Alias for -d none
+ inputBinding:
+ prefix: --remove-duplicates
+
+ ligate:
+ type: ["null", boolean]
+ default: false
+ doc: Ligate phased VCFs by matching phase at overlapping haplotypes
+ inputBinding:
+ prefix: --ligate
+
+ output_type:
+ type: ["null", string]
+ doc: b - compressed BCF, u - uncompressed BCF, z - compressed VCF, v - uncompressed VCF [v]
+ inputBinding:
+ prefix: --output-type
+
+ no_version:
+ type: ["null", boolean]
+ default: false
+ doc: do not append version and command line to the header
+ inputBinding:
+ prefix: --no-version
+
+ naive:
+ type: ["null", boolean]
+ default: false
+ doc: Concatenate BCF files without recompression (dangerous, use with caution)
+ inputBinding:
+ prefix: --naive
+
+ allow_overlaps:
+ type: ["null", boolean]
+ default: false
+ doc: First coordinate of the next file can precede last record of the current file.
+ inputBinding:
+ prefix: --allow-overlaps
+
+ min_PQ:
+ type: ["null", string]
+ doc: Break phase set if phasing quality is lower than [30]
+ inputBinding:
+ prefix: --min-PQ
+
+ regions_file:
+ type: ["null", string]
+ doc: Restrict to regions listed in a file
+ inputBinding:
+ prefix: --regions-file
+
+ regions:
+ type: ["null", string]
+ doc: Restrict to comma-separated list of regions
+ inputBinding:
+ prefix: --regions
+
+ rm_dups:
+ type: ["null", string]
+ doc: Output duplicate records present in multiple files only once -
+ inputBinding:
+ prefix: --rm-dups
+
+ output:
+ type: string
+ doc: Write output to a file [standard output]
+ default: "bcftools_concat.vcf"
+ inputBinding:
+ prefix: --output
+
+ list:
+ type: ['null', string]
+ doc: Read the list of files from a file.
+ inputBinding:
+ prefix: --file-list
+
+ vcf_files_tbi:
+ type:
+ - 'null'
+ - type: array
+ items: File
+ secondaryFiles:
+ - .tbi
+ doc: Array of vcf files to be concatenated into one vcf
+ inputBinding:
+ position: 1
+
+ vcf_files_csi:
+ type:
+ - 'null'
+ - type: array
+ items: File
+ secondaryFiles:
+ - ^.bcf.csi
+ doc: Array of vcf files to be concatenated into one vcf
+ inputBinding:
+ position: 1
+
+outputs:
+ bcftools_concat_vcf_output_file:
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return null;
+ }
+
+requirements:
+ InlineJavascriptRequirement: {}
+ ResourceRequirement:
+ ramMin: 8000
+ coresMin: 1
+ DockerRequirement:
+ dockerPull: ghcr.io/msk-access/bcftools:1.6
+
+
+dct:contributor:
+ - class: foaf:Organization
+ foaf:member:
+ - class: foaf:Person
+ foaf:mbox: mailto:kumarn1@mskcc.org
+ foaf:name: Nikhil Kumar
+ foaf:name: Memorial Sloan Kettering Cancer Center
+dct:creator:
+ - class: foaf:Organization
+ foaf:member:
+ - class: foaf:Person
+ foaf:mbox: mailto:kumarn1@mskcc.org
+ foaf:name: Nikhil Kumar
+ foaf:name: Memorial Sloan Kettering Cancer Center
+doap:release:
+ - class: doap:Version
+ doap:name: bcftools
+ doap:revision: 1.6
diff --git a/bcftools_1.6/example_inputs.yaml b/bcftools_1.6/example_inputs.yaml
new file mode 100644
index 00000000..0512753d
--- /dev/null
+++ b/bcftools_1.6/example_inputs.yaml
@@ -0,0 +1,9 @@
+vcf_files_tbi:
+ class: File
+ path: /path/to/vcf/and/tbi/files
+tumor_sample_name: tumor_sample_name
+normal_sample_name: normal_sample_name
+allow_overlaps: allow_overlaps_boolean
+rm_dups: rm_dups_str
+output_type: output_type_str
+output: output_file_name
diff --git a/bedtools_genomecov_v2.28.0_cv2/README.md b/bedtools_genomecov_v2.28.0_cv2/README.md
deleted file mode 100644
index 75de8a57..00000000
--- a/bedtools_genomecov_v2.28.0_cv2/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# CWL and Dockerfile for running Bedtools GenomeCov
-
-## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |
-
-[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
-## CWL
-
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml
-```
-
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
-
-```bash
-#Using CWLTOOL
-> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml
-
-#Using toil-cwl-runner
-> mkdir run_directory
-> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
-```
-
-## Usage
-
-```bash
-> toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl --help
-
-usage: bedtools_genomecov_v2.28.0_cv2.cwl [-h] --input INPUT
- --output_file_name OUTPUT_FILE_NAME
- [--memory_overhead MEMORY_OVERHEAD]
- [--memory_per_job MEMORY_PER_JOB]
- [--number_of_threads NUMBER_OF_THREADS]
- [--option_bedgraph]
- [job_order]
-
-positional arguments:
- job_order Job input json file
-
-optional arguments:
- -h, --help show this help message and exit
- --input INPUT The input file can be in BAM format (Note: BAM must be
- sorted by position)
- --output_file_name OUTPUT_FILE_NAME
- --memory_overhead MEMORY_OVERHEAD
- --memory_per_job MEMORY_PER_JOB
- --number_of_threads NUMBER_OF_THREADS
- --option_bedgraph option flag parameter to choose output file format.
- -bg refers to bedgraph format
\ No newline at end of file
diff --git a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
index 43f392dd..396931b9 100644
--- a/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
+++ b/bedtools_genomecov_v2.28.0_cv2/bedtools_genomecov_v2.28.0_cv2.cwl
@@ -39,7 +39,7 @@ inputs:
option flag parameter to choose output file format. -bg refers to bedgraph
format
outputs:
- - id: output_file
+ - id: bedtools_genomecove_bedgraph
type: File
outputBinding:
glob: |-
@@ -55,7 +55,7 @@ requirements:
ramMin: 20000
coresMin: 1
- class: DockerRequirement
- dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
stdout: |-
${
diff --git a/bedtools_merge_v2.28.0_cv2/README.md b/bedtools_merge_v2.28.0_cv2/README.md
deleted file mode 100644
index 960664db..00000000
--- a/bedtools_merge_v2.28.0_cv2/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# CWL and Dockerfile for running Bedtools Merge
-
-## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| Bedtools | v2.28.0_cv2 | https://github.com/arq5x/bedtools2/releases/tag/v2.28.0 |
-
-[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
-## CWL
-
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml
-```
-
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
-
-```bash
-#Using CWLTOOL
-> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml
-
-#Using toil-cwl-runner
-> mkdir run_directory
-> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
-```
-
-## Usage
-
-```bash
-> toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl --help
-
-usage: bedtools_merge_v2.28.0_cv2.cwl [-h] --input INPUT --output_file_name
- OUTPUT_FILE_NAME
- [--memory_overhead MEMORY_OVERHEAD]
- [--memory_per_job MEMORY_PER_JOB]
- [--number_of_threads NUMBER_OF_THREADS]
- [--distance_between_features DISTANCE_BETWEEN_FEATURES]
- [job_order]
-
-positional arguments:
- job_order Job input json file
-
-optional arguments:
- -h, --help show this help message and exit
- --input INPUT BEDgraph format file generated from Bedtools Genomecov
- module
- --output_file_name OUTPUT_FILE_NAME
- --memory_overhead MEMORY_OVERHEAD
- --memory_per_job MEMORY_PER_JOB
- --number_of_threads NUMBER_OF_THREADS
- --distance_between_features DISTANCE_BETWEEN_FEATURES
- Maximum distance between features allowed for features
- to be merged.
\ No newline at end of file
diff --git a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
index 21e7cc7b..e6259ff3 100644
--- a/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
+++ b/bedtools_merge_v2.28.0_cv2/bedtools_merge_v2.28.0_cv2.cwl
@@ -33,8 +33,8 @@ inputs:
shellQuote: false
doc: Maximum distance between features allowed for features to be merged.
outputs:
- - id: output_file
- type: File?
+ - id: bedtools_merge_bed
+ type: File
outputBinding:
glob: |-
${
@@ -49,7 +49,7 @@ requirements:
ramMin: 20000
coresMin: 1
- class: DockerRequirement
- dockerPull: 'biocontainers/bedtools:v2.28.0_cv2'
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
- class: InlineJavascriptRequirement
stdout: |-
${
diff --git a/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl b/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl
new file mode 100644
index 00000000..8b6c2ba9
--- /dev/null
+++ b/bedtools_sortBed_v2.28.0_cv2/bedtools_sortBed_v2.28.0_cv2.cwl
@@ -0,0 +1,77 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: sortbed
+baseCommand:
+ - sortBed
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-i'
+ doc: 'input file can be either bed, gff or vcf'
+ - id: output_file_name
+ type: string?
+ doc: Name of the output file
+outputs:
+ - id: sorted_file
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_file_name) {
+ return inputs.output_file_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.sorted.vcf')
+ }
+ }
+label: sortBed
+requirements:
+ - class: ResourceRequirement
+ ramMin: 2000
+ coresMin: 1
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bedtools:v2.28.0_cv2'
+ - class: InitialWorkDirRequirement
+ listing:
+ - $(inputs.input)
+ - class: InlineJavascriptRequirement
+stdout: |-
+ ${
+ if(inputs.output_file_name) {
+ return inputs.output_file_name
+ } else {
+ return inputs.input.basename.replace(/.vcf/, '.sorted.vcf')
+ }
+ }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:sivaprk@mskcc.org'
+ 'foaf:name': Karthigayini Sivaprakasam
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': sortbed
+ 'doap:revision': 2.28.0
diff --git a/bedtools_sortBed_v2.28.0_cv2/example_input.yaml b/bedtools_sortBed_v2.28.0_cv2/example_input.yaml
new file mode 100644
index 00000000..e51645b1
--- /dev/null
+++ b/bedtools_sortBed_v2.28.0_cv2/example_input.yaml
@@ -0,0 +1 @@
+input: /path/to/the/file/to/be/sorted
diff --git a/biometrics_extract/0.2.13/biometrics_extract.cwl b/biometrics_extract/0.2.13/biometrics_extract.cwl
new file mode 100644
index 00000000..19ee1121
--- /dev/null
+++ b/biometrics_extract/0.2.13/biometrics_extract.cwl
@@ -0,0 +1,139 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_extract_0_2_13
+baseCommand:
+ - biometrics
+ - extract
+inputs:
+ - id: sample_bam
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--sample-bam'
+ doc: BAM file.
+ secondaryFiles:
+ - ^.bai
+ - id: sample_sex
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sample-sex'
+ doc: Expected sample sex (i.e. M or F).
+ - id: sample_group
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--sample-group'
+ doc: The sample group (e.g. the sample patient ID).
+ - id: sample_name
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--sample-name'
+ doc: >-
+ Sample name. If not specified, sample name is automatically figured out
+ from the BAM file.
+ - id: fafile
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--fafile'
+ doc: Path to reference fasta.
+ secondaryFiles:
+ - ^.fasta.fai
+ - id: vcf_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--vcf'
+ doc: VCF file containing the SNPs to be queried.
+ - id: bed_file
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--bed'
+ doc: BED file containing the intervals to be queried.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 1
+ id: min_mapping_quality
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-mapping-quality'
+ doc: Minimum mapping quality of reads to be used for pileup.
+ - default: 1
+ id: min_base_quality
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-base-quality'
+ doc: Minimum base quality of reads to be used for pileup.
+ - default: 10
+ id: min_coverage
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--min-coverage'
+ doc: Minimum coverage to count a site.
+ - default: 0.1
+ id: min_homozygous_thresh
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--min-homozygous-thresh'
+ doc: Minimum threshold to define homozygous.
+ - id: default_genotype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--default-genotype'
+ doc: Default genotype if coverage is too low (options are Het or Hom).
+outputs:
+ - id: biometrics_extract_pickle
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.database) {
+ return inputs.database + '/' + inputs.sample_name + '.pickle';
+ } else {
+ return inputs.sample_name + '.pickle';
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_extract/0.2.13/example_inputs.yaml b/biometrics_extract/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..566b496a
--- /dev/null
+++ b/biometrics_extract/0.2.13/example_inputs.yaml
@@ -0,0 +1,24 @@
+sample_type:
+ - "Normal"
+sample_sex:
+ - "M"
+sample_name:
+ - "test"
+sample_group:
+ - "test"
+fafile:
+ class: File
+ path: /path/to/fasta
+sample_bam:
+ - class: File
+ path: /path/to/bam
+bed_file: null
+vcf_file:
+ class: File
+ path: /path/to/vcf
+database: null
+min_mapping_quality: null
+min_base_quality: null
+min_coverage: null
+min_homozygous_thresh: null
+default_genotype: null
diff --git a/biometrics_extract/0.2.5/biometrics_extract.cwl b/biometrics_extract/0.2.5/biometrics_extract.cwl
new file mode 100644
index 00000000..0abf2aec
--- /dev/null
+++ b/biometrics_extract/0.2.5/biometrics_extract.cwl
@@ -0,0 +1,172 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_extract
+baseCommand:
+ - biometrics
+ - extract
+inputs:
+ - id: sample_bam
+ type:
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: --sample-bam
+ secondaryFiles:
+ - ^.bai
+ doc: >-
+ BAM file.
+ - id: sample_type
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-type
+ doc: >-
+ Sample types: Normal or Tumor.
+ - id: sample_sex
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-sex
+ doc: >-
+ Expected sample sex (i.e. M or F).
+ - id: sample_group
+ type:
+ - "null"
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-group
+ doc: >-
+ The sample group (e.g. the sample patient ID).
+ - id: sample_name
+ type:
+ - type: array
+ items: string
+ inputBinding:
+ position: 0
+ prefix: --sample-name
+ doc: >-
+ Sample name. If not specified, sample name is automatically figured out from the BAM file.
+ - id: fafile
+ type: File
+ inputBinding:
+ position: 0
+ prefix: --fafile
+ secondaryFiles:
+ - ^.fasta.fai
+ doc: >-
+ Path to reference fasta.
+ - id: vcf_file
+ type: File
+ inputBinding:
+ position: 0
+ prefix: --vcf
+ doc: >-
+ VCF file containing the SNPs to be queried.
+ - id: bed_file
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: --bed
+ doc: >-
+ BED file containing the intervals to be queried.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: min_mapping_quality
+ type: int?
+ default: 1
+ inputBinding:
+ position: 0
+ prefix: --min-mapping-quality
+ doc: >-
+ Minimum mapping quality of reads to be used for pileup.
+ - id: min_base_quality
+ type: int?
+ default: 1
+ inputBinding:
+ position: 0
+ prefix: --min-base-quality
+ doc: >-
+ Minimum base quality of reads to be used for pileup.
+ - id: min_coverage
+ type: int?
+ default: 10
+ inputBinding:
+ position: 0
+ prefix: --min-coverage
+ doc: >-
+ Minimum coverage to count a site.
+ - id: min_homozygous_thresh
+ type: float?
+ default: 0.1
+ inputBinding:
+ position: 0
+ prefix: --min-homozygous-thresh
+ doc: >-
+ Minimum threshold to define homozygous.
+ - id: default_genotype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --default-genotype
+ doc: >-
+ Default genotype if coverage is too low (options are Het or Hom).
+outputs:
+ - id: biometrics_extract_pickle
+ type:
+ type: array
+ items: File
+ outputBinding:
+ glob: |-
+ ${
+ return inputs.sample_name.map(val => {
+ if (inputs.database) {
+ return inputs.database + '/' + val + '.pk';
+ } else {
+ return val + '.pk';
+ }
+ });
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_extract/0.2.5/example_inputs.yaml b/biometrics_extract/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..566b496a
--- /dev/null
+++ b/biometrics_extract/0.2.5/example_inputs.yaml
@@ -0,0 +1,24 @@
+sample_type:
+ - "Normal"
+sample_sex:
+ - "M"
+sample_name:
+ - "test"
+sample_group:
+ - "test"
+fafile:
+ class: File
+ path: /path/to/fasta
+sample_bam:
+ - class: File
+ path: /path/to/bam
+bed_file: null
+vcf_file:
+ class: File
+ path: /path/to/vcf
+database: null
+min_mapping_quality: null
+min_base_quality: null
+min_coverage: null
+min_homozygous_thresh: null
+default_genotype: null
diff --git a/biometrics_extract/README.md b/biometrics_extract/README.md
new file mode 100644
index 00000000..5687fef3
--- /dev/null
+++ b/biometrics_extract/README.md
@@ -0,0 +1,69 @@
+# CWL for running biometrics extract tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_extract.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_extract.cwl [-h] [--sample_bam SAMPLE_BAM]
+ [--sample_type SAMPLE_TYPE]
+ [--sample_sex SAMPLE_SEX]
+ [--sample_group SAMPLE_GROUP]
+ [--sample_name SAMPLE_NAME] --fafile
+ FAFILE --vcf_file VCF_FILE --bed_file
+ BED_FILE --database DATABASE
+ [--min_mapping_quality MIN_MAPPING_QUALITY]
+ [--min_base_quality MIN_BASE_QUALITY]
+ [--min_coverage MIN_COVERAGE]
+ [--min_homozygous_thresh MIN_HOMOZYGOUS_THRESH]
+ [--default_genotype DEFAULT_GENOTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --sample_bam SAMPLE_BAM
+ BAM file.
+ --sample_type SAMPLE_TYPE
+ Sample types: Normal or Tumor.
+ --sample_sex SAMPLE_SEX
+ Expected sample sex (i.e. M or F).
+ --sample_group SAMPLE_GROUP
+ The sample group (e.g. the sample patient ID).
+ --sample_name SAMPLE_NAME
+ Sample name. If not specified, sample name is
+ automatically figured out from the BAM file.
+ --fafile FAFILE Path to reference fasta.
+ --vcf_file VCF_FILE VCF file containing the SNPs to be queried.
+ --bed_file BED_FILE BED file containing the intervals to be queried.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --min_mapping_quality MIN_MAPPING_QUALITY
+ Minimum mapping quality of reads to be used for
+ pileup.
+ --min_base_quality MIN_BASE_QUALITY
+ Minimum base quality of reads to be used for pileup.
+ --min_coverage MIN_COVERAGE
+ Minimum coverage to count a site.
+ --min_homozygous_thresh MIN_HOMOZYGOUS_THRESH
+ Minimum threshold to define homozygous.
+ --default_genotype DEFAULT_GENOTYPE
+ Default genotype if coverage is too low (options are
+ Het or Hom).
+```
diff --git a/biometrics_genotype/0.2.13/biometrics_genotype.cwl b/biometrics_genotype/0.2.13/biometrics_genotype.cwl
new file mode 100644
index 00000000..d4d1449a
--- /dev/null
+++ b/biometrics_genotype/0.2.13/biometrics_genotype.cwl
@@ -0,0 +1,149 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_genotype_0_2_13
+baseCommand:
+ - biometrics
+ - genotype
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: '--input'
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.05
+ id: discordance_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--discordance-threshold'
+ doc: >-
+ Discordance values less than this are regarded as matching samples.
+ (default: 0.05)
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+ - default: 2
+ id: threads
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--threads'
+ doc: Number of threads to use.
+outputs:
+ - id: biometrics_genotype_comparisons
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_comparison.csv'
+ } else {
+ return 'genotype_comparison.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_input.csv'
+ } else {
+ return 'genotype_clusters_input.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_database.csv'
+ } else {
+ return 'genotype_clusters_database.csv'
+ }
+ }
+ - id: biometrics_genotype_plot_input
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_input.html'
+ }
+ - id: biometrics_genotype_plot_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_database.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_genotype/0.2.13/example_inputs.yaml b/biometrics_genotype/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..0bc68d94
--- /dev/null
+++ b/biometrics_genotype/0.2.13/example_inputs.yaml
@@ -0,0 +1,12 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+prefix: 'test'
+outdir: null
+plot: true
+no_db_comparison: false
+threads: null
+discordance_threshold: null
diff --git a/biometrics_genotype/0.2.5/biometrics_genotype.cwl b/biometrics_genotype/0.2.5/biometrics_genotype.cwl
new file mode 100644
index 00000000..51285328
--- /dev/null
+++ b/biometrics_genotype/0.2.5/biometrics_genotype.cwl
@@ -0,0 +1,145 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_genotype
+baseCommand:
+ - biometrics
+ - genotype
+inputs:
+ - id: input
+ type:
+ - type: array
+ items: File
+ inputBinding:
+ position: 0
+ prefix: --input
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: discordance_threshold
+ type: float?
+ default: 0.05
+ inputBinding:
+ position: 0
+ prefix: --discordance-threshold
+ doc: >-
+ Discordance values less than this are regarded as matching samples. (default: 0.05)
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+ - id: threads
+ type: int?
+ default: 2
+ inputBinding:
+ position: 0
+ prefix: --threads
+ doc: >-
+ Number of threads to use.
+outputs:
+ - id: biometrics_genotype_comparisons
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_comparison.csv'
+ } else {
+ return 'genotype_comparison.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_input.csv'
+ } else {
+ return 'genotype_clusters_input.csv'
+ }
+ }
+ - id: biometrics_genotype_cluster_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_genotype_clusters_database.csv'
+ } else {
+ return 'genotype_clusters_database.csv'
+ }
+ }
+ - id: biometrics_genotype_plot_input
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_input.html'
+ }
+ - id: biometrics_genotype_plot_input_database
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'genotype_comparison_database.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_genotype/0.2.5/example_inputs.yaml b/biometrics_genotype/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..0bc68d94
--- /dev/null
+++ b/biometrics_genotype/0.2.5/example_inputs.yaml
@@ -0,0 +1,12 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+prefix: 'test'
+outdir: null
+plot: true
+no_db_comparison: false
+threads: null
+discordance_threshold: null
diff --git a/biometrics_genotype/README.md b/biometrics_genotype/README.md
new file mode 100644
index 00000000..fd398b99
--- /dev/null
+++ b/biometrics_genotype/README.md
@@ -0,0 +1,53 @@
+# CWL for running biometrics genotype tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_genotype.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_genotype.cwl [-h] --input INPUT [--database DATABASE]
+ [--discordance_threshold DISCORDANCE_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison] [--threads THREADS]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --discordance_threshold DISCORDANCE_THRESHOLD
+ Discordance values less than this are regarded as
+ matching samples. (default: 0.05)
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+ --threads THREADS Number of threads to use.
+```
diff --git a/biometrics_major/0.2.13/biometrics_major.cwl b/biometrics_major/0.2.13/biometrics_major.cwl
new file mode 100644
index 00000000..b5a42a9e
--- /dev/null
+++ b/biometrics_major/0.2.13/biometrics_major.cwl
@@ -0,0 +1,123 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_major_0_2_13
+baseCommand:
+ - biometrics
+ - major
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.6
+ id: major_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--major-threshold'
+ doc: Major contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_major_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.csv'
+ } else {
+ return 'major_contamination.csv'
+ }
+ }
+ - id: biometrics_major_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.json'
+ } else {
+ return 'major_contamination.json'
+ }
+ }
+ - id: biometrics_major_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'major_contamination.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_major/0.2.13/example_inputs.yaml b/biometrics_major/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..da03de55
--- /dev/null
+++ b/biometrics_major/0.2.13/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+minor_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_major/0.2.5/biometrics_major.cwl b/biometrics_major/0.2.5/biometrics_major.cwl
new file mode 100644
index 00000000..217c9d96
--- /dev/null
+++ b/biometrics_major/0.2.5/biometrics_major.cwl
@@ -0,0 +1,120 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_major
+baseCommand:
+ - biometrics
+ - major
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: major_threshold
+ type: float?
+ default: 0.6
+ inputBinding:
+ position: 0
+ prefix: --major-threshold
+ doc: >-
+ Major contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_major_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.csv'
+ } else {
+ return 'major_contamination.csv'
+ }
+ }
+ - id: biometrics_major_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_major_contamination.json'
+ } else {
+ return 'major_contamination.json'
+ }
+ }
+ - id: biometrics_major_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'major_contamination.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_major/0.2.5/example_inputs.yaml b/biometrics_major/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..da03de55
--- /dev/null
+++ b/biometrics_major/0.2.5/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+minor_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_major/README.md b/biometrics_major/README.md
new file mode 100644
index 00000000..10fa476e
--- /dev/null
+++ b/biometrics_major/README.md
@@ -0,0 +1,51 @@
+# CWL for running biometrics major tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_major.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_major.cwl [-h] --input INPUT [--database DATABASE]
+ [--major_threshold MAJOR_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --major_threshold MAJOR_THRESHOLD
+ Major contamination threshold for bad sample.
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/biometrics_minor/0.2.13/biometrics_minor.cwl b/biometrics_minor/0.2.13/biometrics_minor.cwl
new file mode 100644
index 00000000..2535eb6b
--- /dev/null
+++ b/biometrics_minor/0.2.13/biometrics_minor.cwl
@@ -0,0 +1,130 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_minor_0_2_13
+baseCommand:
+ - biometrics
+ - minor
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 0.002
+ id: minor_threshold
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '--minor-threshold'
+ doc: Minor contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--plot'
+ doc: Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_minor_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.csv'
+ } else {
+ return 'minor_contamination.csv'
+ }
+ }
+ - id: biometrics_minor_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.json'
+ } else {
+ return 'minor_contamination.json'
+ }
+ }
+ - id: biometrics_minor_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination.html'
+ }
+ - id: biometrics_minor_sites_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination_sites.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_minor/0.2.13/example_inputs.yaml b/biometrics_minor/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..bddb4c72
--- /dev/null
+++ b/biometrics_minor/0.2.13/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+major_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_minor/0.2.5/biometrics_minor.cwl b/biometrics_minor/0.2.5/biometrics_minor.cwl
new file mode 100644
index 00000000..dc0410cb
--- /dev/null
+++ b/biometrics_minor/0.2.5/biometrics_minor.cwl
@@ -0,0 +1,127 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_minor
+baseCommand:
+ - biometrics
+ - minor
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: minor_threshold
+ type: float?
+ default: 0.002
+ inputBinding:
+ position: 0
+ prefix: --minor-threshold
+ doc: >-
+ Minor contamination threshold for bad sample.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: plot
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --plot
+ doc: >-
+ Also output plots of the data.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_minor_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.csv'
+ } else {
+ return 'minor_contamination.csv'
+ }
+ }
+ - id: biometrics_minor_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_minor_contamination.json'
+ } else {
+ return 'minor_contamination.json'
+ }
+ }
+ - id: biometrics_minor_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination.html'
+ }
+ - id: biometrics_minor_sites_plot
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ return 'minor_contamination_sites.html'
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_minor/0.2.5/example_inputs.yaml b/biometrics_minor/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..bddb4c72
--- /dev/null
+++ b/biometrics_minor/0.2.5/example_inputs.yaml
@@ -0,0 +1,11 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+major_threshold: null
+prefix: null
+plot: true
+json: true
+no_db_comparison: null
diff --git a/biometrics_minor/README.md b/biometrics_minor/README.md
new file mode 100644
index 00000000..af94ea40
--- /dev/null
+++ b/biometrics_minor/README.md
@@ -0,0 +1,51 @@
+# CWL for running biometrics minor tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_minor.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_minor.cwl [-h] --input INPUT [--database DATABASE]
+ [--minor_threshold MINOR_THRESHOLD]
+ [--prefix PREFIX] [--plot] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --minor_threshold MINOR_THRESHOLD
+ Minor contamination threshold for bad sample.
+ --prefix PREFIX Output file prefix.
+ --plot Also output plots of the data.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl
new file mode 100644
index 00000000..e8755e55
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl
@@ -0,0 +1,110 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_sexmismatch_0_2_13
+baseCommand:
+ - biometrics
+ - sexmismatch
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: '--input'
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample
+ information (one per line). For example:
+ sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a
+ '*.pk' file that was produced by the 'extract' tool. (3) Name of the
+ sample to analyze; this assumes there is a file named '{sample_name}.pk'
+ in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--database'
+ doc: >-
+ Directory to store the intermediate files after running the extraction
+ step.
+ - default: 50
+ id: coverage_threshold
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--coverage-threshold'
+ doc: Samples with Y chromosome above this value will be considered male.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--prefix'
+ doc: Output file prefix.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--no-db-compare'
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database,
+ only compare them with each other.
+outputs:
+ - id: biometrics_sexmismatch_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.csv'
+ } else {
+ return 'sex_mismatch.csv'
+ }
+ }
+ - id: biometrics_sexmismatch_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.json'
+ } else {
+ return 'sex_mismatch.json'
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.13'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.13
diff --git a/biometrics_sexmismatch/0.2.13/example_inputs.yaml b/biometrics_sexmismatch/0.2.13/example_inputs.yaml
new file mode 100644
index 00000000..60832e43
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.13/example_inputs.yaml
@@ -0,0 +1,10 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+coverage_threshold: null
+prefix: null
+json: true
+no_db_comparison: null
diff --git a/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl
new file mode 100644
index 00000000..bae28a19
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.5/biometrics_sexmismatch.cwl
@@ -0,0 +1,106 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: biometrics_sexmismatch
+baseCommand:
+ - biometrics
+ - sexmismatch
+inputs:
+ - id: input
+ type:
+ type: array
+ items: File
+ inputBinding:
+ prefix: --input
+ inputBinding:
+ position: 0
+ doc: >-
+ Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once.
+ - id: database
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --database
+ doc: >-
+ Directory to store the intermediate files after running the extraction step.
+ - id: coverage_threshold
+ type: int?
+ default: 50
+ inputBinding:
+ position: 0
+ prefix: --coverage-threshold
+ doc: >-
+ Samples with Y chromosome above this value will be considered male.
+ - id: prefix
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: --prefix
+ doc: >-
+ Output file prefix.
+ - id: json
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --json
+ doc: >-
+ Also output data in JSON format.
+ - id: no_db_comparison
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: --no-db-compare
+ doc: >-
+ Do not compare the sample(s) you provided to all samples in the database, only compare them with each other.
+outputs:
+ - id: biometrics_sexmismatch_csv
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.csv'
+ } else {
+ return 'sex_mismatch.csv'
+ }
+ }
+ - id: biometrics_sexmismatch_json
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.prefix) {
+ return inputs.prefix + '_sex_mismatch.json'
+ } else {
+ return 'sex_mismatch.json'
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 16000
+ coresMin: 2
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/biometrics:0.2.5'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': biometrics
+ 'doap:revision': 0.2.5
diff --git a/biometrics_sexmismatch/0.2.5/example_inputs.yaml b/biometrics_sexmismatch/0.2.5/example_inputs.yaml
new file mode 100644
index 00000000..60832e43
--- /dev/null
+++ b/biometrics_sexmismatch/0.2.5/example_inputs.yaml
@@ -0,0 +1,10 @@
+input:
+ - class: File
+ path: "../biometrics_extract_0.2.5/test.pk"
+ - class: File
+ path: "../biometrics_extract_0.2.5/test2.pk"
+database: null
+coverage_threshold: null
+prefix: null
+json: true
+no_db_comparison: null
diff --git a/biometrics_sexmismatch/README.md b/biometrics_sexmismatch/README.md
new file mode 100644
index 00000000..81b02145
--- /dev/null
+++ b/biometrics_sexmismatch/README.md
@@ -0,0 +1,52 @@
+# CWL for running biometrics sexmismatch tool.
+
+| Tool | Latest version | Location |
+|--- |--- |--- |
+| biometrics | 0.2.12 | |
+
+The python package source code and Docker file are located on GitHub.
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner biometrics_sexmismatch.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: biometrics_sexmismatch.cwl [-h] --input INPUT
+ [--database DATABASE]
+ [--coverage_threshold COVERAGE_THRESHOLD]
+ [--prefix PREFIX] [--json]
+ [--no_db_comparison]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Can be one of three types: (1) path to a CSV file
+ containing sample information (one per line). For
+ example: sample_name,sample_bam,sample_type,sample_sex
+ ,sample_group. (2) Path to a '*.pk' file that was
+ produced by the 'extract' tool. (3) Name of the sample
+ to analyze; this assumes there is a file named
+ '{sample_name}.pk' in your database directory. Can be
+ specified more than once.
+ --database DATABASE Directory to store the intermediate files after
+ running the extraction step.
+ --coverage_threshold COVERAGE_THRESHOLD
+ Samples with Y chromosome above this value will be
+ considered male.
+ --prefix PREFIX Output file prefix.
+ --json Also output data in JSON format.
+ --no_db_comparison Do not compare the sample(s) you provided to all
+ samples in the database, only compare them with each
+ other.
+```
diff --git a/bwa_mem_0.7.17/README.md b/bwa_mem_0.7.17/README.md
new file mode 100644
index 00000000..fd6785d6
--- /dev/null
+++ b/bwa_mem_0.7.17/README.md
@@ -0,0 +1,136 @@
+# CWL and Dockerfile for running BWA MEM
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| ------ | ------- | ----------------------------------------------- |
+| ubuntu | 16.04 | - |
+| BWA | 0.7.17 | https://github.com/lh3/bwa/releases/tag/v0.7.17 |
+
+[](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskaccess/bwa_mem_0.7.17 "Get your own image badge on microbadger.com")
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bwa_mem_0.7.17.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr &
+```
+
+### Usage
+
+```
+usage: bwa_mem_0.7.17.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --reads
+ READS --reference REFERENCE [-A A] [-B B] [-C]
+ [-E E] [-L L] [-M] [-O O] [-P] [-S] [-T T] [-U U]
+ [-a] [-c C] [-d D] [-k K] [-K K] [--output OUTPUT]
+ [-p] [-r R] [-v V] [-w W] [-y Y] [-D D] [-W W]
+ [-m M] [-e] [-x X] [-j J] [--he HE] [-V] [-Y] [-I I]
+ [-R R] [--sample_id SAMPLE_ID] [--lane_id LANE_ID]
+ [--platform PLATFORM]
+ [--platform_unit PLATFORM_UNIT]
+ [--center_name CENTER_NAME]
+ [--library_id LIBRARY_ID]
+ [job_order]
+
+bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff]
+[-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen]
+[-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel]
+db.prefix reads.fq [mates.fq] Align 70bp-1Mbp query sequences with the BWA-MEM
+algorithm. Briefly, the algorithm works by seeding alignments with maximal
+exact matches (MEMs) and then extending seeds with the affine-gap Smith-
+Waterman algorithm (SW). If mates.fq file is absent and option -p is not set,
+this command regards input reads are single-end. If mates.fq is present, this
+command assumes the i-th read in reads.fq and the i-th read in mates.fq
+constitute a read pair. If -p is used, the command assumes the 2i-th and the
+(2i+1)-th read in reads.fq constitute a read pair (such input file is said to
+be interleaved). In this case, mates.fq is ignored. In the paired-end mode,
+the mem command will infer the read orientation and the insert size
+distribution from a batch of reads. The BWA-MEM algorithm performs local
+alignment. It may produce multiple primary alignments for different part of a
+query sequence. This is a crucial feature for long sequences. However, some
+tools such as Picard’s markDuplicates does not work with split alignments. One
+may consider to use option -M to flag shorter split hits as secondary.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --reads READS
+ --reference REFERENCE
+ -A A score for a sequence match, which scales options
+ -TdBOELU unless overridden [1]
+ -B B penalty for a mismatch [4]
+ -C append FASTA/FASTQ comment to SAM output
+ -E E gap extension penalty; a gap of size k cost '{-O} +
+ {-E}*k' [1,1]
+ -L L penalty for 5'- and 3'-end clipping [5,5]
+ -M
+ -O O gap open penalties for deletions and insertions [6,6]
+ -P skip pairing; mate rescue performed unless -S also in
+ use
+ -S skip mate rescue
+ -T T minimum score to output [30]
+ -U U penalty for an unpaired read pair [17]
+ -a output all alignments for SE or unpaired PE
+ -c C skip seeds with more than INT occurrences [500]
+ -d D off-diagonal X-dropoff [100]
+ -k K minimum seed length [19]
+ -K K process INT input bases in each batch regardless of
+ nThreads (for reproducibility) []
+ --output OUTPUT
+ -p smart pairing (ignoring in2.fq)
+ -r R look for internal seeds inside a seed longer than {-k}
+ * FLOAT [1.5]
+ -v V verbosity level: 1=error, 2=warning, 3=message,
+ 4+=debugging [3]
+ -w W band width for banded alignment [100]
+ -y Y seed occurrence for the 3rd round seeding [20]
+ -D D drop chains shorter than FLOAT fraction of the longest
+ overlapping chain [0.50]
+ -W W discard a chain if seeded bases shorter than INT [0]
+ -m M perform at most INT rounds of mate rescues for each
+ read [50]
+ -e
+ -x X read type. Setting -x changes multiple parameters
+ unless overridden [null] pacbio: -k17 -W40 -r10 -A1
+ -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d: -k14 -W20
+ -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to
+ ref) intractg: -B9 -O16 -L5 (intra-species contigs to
+ ref)
+ -j J treat ALT contigs as part of the primary assembly
+ (i.e. ignore .alt file)
+ --he HE if there are 80% of the max
+ score, output all in XA [5,200]
+ -V output the reference FASTA header in the XR tag
+ -Y use soft clipping for supplementary alignments
+ -I I
+ -R R STR read group header line such as '@RG\tID -foo\tSM
+ -bar' [null]
+ --sample_id SAMPLE_ID
+ --lane_id LANE_ID
+ --platform PLATFORM
+ --platform_unit PLATFORM_UNIT
+ --center_name CENTER_NAME
+ --library_id LIBRARY_ID
+```
diff --git a/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl
new file mode 100644
index 00000000..5e7e55dd
--- /dev/null
+++ b/bwa_mem_0.7.17/bwa_mem_0.7.17.cwl
@@ -0,0 +1,351 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+baseCommand:
+ - bwa
+ - mem
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: reads
+ type: 'File[]'
+ inputBinding:
+ position: 3
+ - id: reference
+ type: File
+ inputBinding:
+ position: 2
+ secondaryFiles:
+ - .amb
+ - .ann
+ - .bwt
+ - .pac
+ - .sa
+ - .fai
+ - id: A
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-A'
+ doc: >-
+ score for a sequence match, which scales options -TdBOELU unless
+ overridden [1]
+ - id: B
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-B'
+ doc: 'penalty for a mismatch [4]'
+ - id: C
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-C'
+ doc: append FASTA/FASTQ comment to SAM output
+ - id: E
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-E'
+ itemSeparator: ','
+ doc: 'gap extension penalty; a gap of size k cost ''{-O} + {-E}*k'' [1,1]'
+ - id: L
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-L'
+ itemSeparator: ','
+ doc: 'penalty for 5''- and 3''-end clipping [5,5]'
+ - id: M
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-M'
+ - id: O
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-O'
+ itemSeparator: ','
+ doc: 'gap open penalties for deletions and insertions [6,6]'
+ - id: P
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-P'
+ doc: skip pairing; mate rescue performed unless -S also in use
+ - id: S
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-S'
+ doc: skip mate rescue
+ - id: T
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-T'
+ doc: 'minimum score to output [30]'
+ - id: U
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-U'
+ doc: 'penalty for an unpaired read pair [17]'
+ - id: a
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-a'
+ doc: output all alignments for SE or unpaired PE
+ - id: c
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-c'
+ doc: 'skip seeds with more than INT occurrences [500]'
+ - id: d
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-d'
+ doc: 'off-diagonal X-dropoff [100]'
+ - id: k
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-k'
+ doc: 'minimum seed length [19]'
+ - id: K
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-K'
+ doc: >-
+ process INT input bases in each batch regardless of nThreads (for
+ reproducibility) []
+ - id: output
+ type: string?
+ - id: p
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-p'
+ doc: smart pairing (ignoring in2.fq)
+ - id: r
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: 'look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]'
+ - id: v
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-v'
+ doc: 'verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3]'
+ - id: w
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-w'
+ doc: 'band width for banded alignment [100]'
+ - id: 'y'
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-y'
+ doc: 'seed occurrence for the 3rd round seeding [20]'
+ - id: D
+ type: float?
+ inputBinding:
+ position: 0
+ prefix: '-D'
+ doc: >-
+ drop chains shorter than FLOAT fraction of the longest overlapping chain
+ [0.50]
+ - id: W
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-W'
+ doc: 'discard a chain if seeded bases shorter than INT [0]'
+ - id: m
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-m'
+ doc: 'perform at most INT rounds of mate rescues for each read [50]'
+ - id: e
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-e'
+ - id: x
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-x'
+ doc: >-
+ read type. Setting -x changes multiple parameters unless overridden [null]
+ pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) ont2d:
+ -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)
+ intractg: -B9 -O16 -L5 (intra-species contigs to ref)
+ - id: H
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-H'
+ doc: >-
+ Use hard clipping ’H’ in the SAM output. This option may dramatically
+ reduce the redundancy of output when mapping long contig or BAC sequences
+ - id: j
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-j'
+ doc: >-
+ treat ALT contigs as part of the primary assembly (i.e. ignore
+ .alt file)
+ - id: he
+ type: 'int[]?'
+ inputBinding:
+ position: 0
+ prefix: '-h'
+ itemSeparator: ','
+ doc: >-
+ if there are 80% of the max score, output all in XA
+ [5,200]
+ - id: V
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-V'
+ doc: output the reference FASTA header in the XR tag
+ - id: 'Y'
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '-Y'
+ doc: use soft clipping for supplementary alignments
+ - id: I
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-M'
+ - id: R
+ type: string?
+ doc: 'STR read group header line such as ''@RG\tID -foo\tSM -bar'' [null]'
+ - id: sample_id
+ type: string?
+ - id: lane_id
+ type: string?
+ - id: platform
+ type: string?
+ - id: platform_unit
+ type: string?
+ - id: center_name
+ type: string?
+ - id: library_id
+ type: string?
+outputs:
+ - id: bwa_mem_output_sam
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam');
+ }
+doc: >-
+ bwa mem [-aCHMpP] [-t nThreads] [-k minSeedLen] [-w bandWidth] [-d zDropoff]
+ [-r seedSplitRatio] [-c maxOcc] [-A matchScore] [-B mmPenalty] [-O gapOpenPen]
+ [-E gapExtPen] [-L clipPen] [-U unpairPen] [-R RGline] [-v verboseLevel]
+ db.prefix reads.fq [mates.fq]
+
+ Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the
+ algorithm works by seeding alignments with maximal exact matches (MEMs) and
+ then extending seeds with the affine-gap Smith-Waterman algorithm (SW).
+
+
+ If mates.fq file is absent and option -p is not set, this command regards
+ input reads are single-end. If mates.fq is present, this command assumes the
+ i-th read in reads.fq and the i-th read in mates.fq constitute a read pair. If
+ -p is used, the command assumes the 2i-th and the (2i+1)-th read in reads.fq
+ constitute a read pair (such input file is said to be interleaved). In this
+ case, mates.fq is ignored. In the paired-end mode, the mem command will infer
+ the read orientation and the insert size distribution from a batch of reads.
+
+
+ The BWA-MEM algorithm performs local alignment. It may produce multiple
+ primary alignments for different part of a query sequence. This is a crucial
+ feature for long sequences. However, some tools such as Picard’s
+ markDuplicates does not work with split alignments. One may consider to use
+ option -M to flag shorter split hits as secondary.
+label: bwa_mem_0.7.17
+arguments:
+ - position: 0
+ prefix: '-t'
+ valueFrom: $(runtime.cores)
+ - position: 0
+ prefix: '-R'
+ valueFrom: |-
+ ${
+ if (inputs.sample_id) {
+ var rg_id = "@RG\\tID:" + inputs.sample_id + "\\tSM:" + inputs.sample_id;
+ if (inputs.library_id) {
+ rg_id += "\\tLB:" + inputs.library_id;
+ } if (inputs.platform) {
+ rg_id += "\\tPL:" + inputs.platform;
+ } if (inputs.platform_unit) {
+ rg_id += "\\tPU:" + inputs.platform_unit;
+ } if (inputs.center_name) {
+ rg_id += "\\tCN:" + inputs.center_name;
+ }
+ return rg_id
+ } else {
+ return inputs.R
+ }
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 34000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/bwa:0.7.17'
+ - class: InlineJavascriptRequirement
+stdout: |-
+ ${
+ if (inputs.output)
+ return inputs.output;
+ return inputs.reads[0].basename.replace(/(fastq.gz)|(fq.gz)/, 'sam');
+ }
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': bwa
+ 'doap:revision': 0.7.17
diff --git a/bwa_mem_0.7.17/container/Dockerfile b/bwa_mem_0.7.17/container/Dockerfile
new file mode 100644
index 00000000..49cc6831
--- /dev/null
+++ b/bwa_mem_0.7.17/container/Dockerfile
@@ -0,0 +1,23 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Ian Johnson (johnsoni@mskcc.org)" \
+ version.image="0.1.0" \
+ version.bwa="0.7.17" \
+ version.ubuntu="16.04" \
+ source.bwa="https://github.com/lh3/bwa/releases/tag/v0.7.17"
+
+ENV BWA_VERSION 0.7.17
+
+RUN apt-get -y update \
+ # install build tools and dependencies
+ && apt-get -y install build-essential zlib1g-dev wget unzip \
+ # download and unzip bwa
+ && cd /tmp && wget "https://github.com/lh3/bwa/archive/v${BWA_VERSION}.zip" \
+ && unzip "v${BWA_VERSION}.zip" \
+ # build
+ && cd "/tmp/bwa-${BWA_VERSION}" \
+ && make \
+ # move binaries to /usr/bin
+ && mv "/tmp/bwa-${BWA_VERSION}/bwa" /usr/bin \
+ # clean up
+ && rm -rf /tmp/*
diff --git a/bwa_mem_0.7.17/example_inputs.yaml b/bwa_mem_0.7.17/example_inputs.yaml
new file mode 100644
index 00000000..74683384
--- /dev/null
+++ b/bwa_mem_0.7.17/example_inputs.yaml
@@ -0,0 +1,9 @@
+reads:
+- class: File
+ path: "path/to/fastq_R1.fastq"
+- class: File
+ path: "path/to/fastq_R2.fastq"
+reference:
+ class: File
+ path: "/path/to/reference.fasta"
+sample_id: test_sample_id
diff --git a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
index 8741d4fd..41701c6c 100644
--- a/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
+++ b/bwa_mem_0.7.5a/bwa_mem_0.7.5a.cwl
@@ -4,7 +4,6 @@ $namespaces:
dct: 'http://purl.org/dc/terms/'
doap: 'http://usefulinc.com/ns/doap#'
foaf: 'http://xmlns.com/foaf/0.1/'
- sbg: 'https://www.sevenbridges.com/'
baseCommand:
- bwa
- mem
diff --git a/cci_utils/0.2.8/example_inputs.yaml b/cci_utils/0.2.8/example_inputs.yaml
new file mode 100644
index 00000000..5929933f
--- /dev/null
+++ b/cci_utils/0.2.8/example_inputs.yaml
@@ -0,0 +1,3 @@
+dir: {class: Directory, path: /path/to/sample_info_directory }
+samples-json: {class: File, path: /path/to/sample_meta.json }
+config: {class: File, path: /path/to/config.yaml }
diff --git a/cci_utils/0.2.8/general_stats_parse.cwl b/cci_utils/0.2.8/general_stats_parse.cwl
new file mode 100644
index 00000000..339f3013
--- /dev/null
+++ b/cci_utils/0.2.8/general_stats_parse.cwl
@@ -0,0 +1,61 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: general_stats_parse
+baseCommand:
+ - general_stats_parse.py
+arguments:
+ - --dir
+ - .
+inputs:
+ - id: directory
+ type: Directory
+ doc: Directory containing results.
+ - id: samples-json
+ type: File
+ inputBinding:
+ prefix: '--samples-json'
+ doc: Sample JSON file.
+ - id: config
+ type: File
+ inputBinding:
+ prefix: '--config'
+ doc: MultQC config file.
+outputs:
+ - id: aggregate_parsed_stats
+ label: aggregate_parsed_stats
+ type: Directory
+ outputBinding:
+ glob: .
+ outputEval: |-
+ ${
+ self[0].basename = "aggregate_parsed_stats";
+ return self[0]
+ }
+label: general_stats_parse
+requirements:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/cci_utils:0.2.8'
+ - class: InitialWorkDirRequirement
+ listing:
+ - entry: $(inputs.directory)
+ writable: true
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:johnsoni@mskcc.org'
+ 'foaf:name': Ian Johnson
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/cwl_commandlinetools/__init__.py b/cwl_commandlinetools/__init__.py
new file mode 100644
index 00000000..30f6dd27
--- /dev/null
+++ b/cwl_commandlinetools/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+"""Top-level package for cwl-commandlinetools."""
+
+__author__ = """msk-access"""
+__email__ = 'msk.access@gmail.com'
+__version__ = '1.2.0'
diff --git a/cwl_commandlinetools/cwl_commandlinetools.py b/cwl_commandlinetools/cwl_commandlinetools.py
new file mode 100644
index 00000000..7fbbae4f
--- /dev/null
+++ b/cwl_commandlinetools/cwl_commandlinetools.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+"""Main module."""
diff --git a/delly_0.9.1/README.md b/delly_0.9.1/README.md
new file mode 100644
index 00000000..d3f8c542
--- /dev/null
+++ b/delly_0.9.1/README.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 0.9.1 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/delly_0.9.1/container/Dockerfile b/delly_0.9.1/container/Dockerfile
new file mode 100644
index 00000000..ce58d263
--- /dev/null
+++ b/delly_0.9.1/container/Dockerfile
@@ -0,0 +1,65 @@
+# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile
+# modify for additional functionality
+################## Base Image ##########
+FROM --platform=linux/amd64 ubuntu:18.04
+################## ARGUMENTS/Environments ##########
+
+ARG BUILD_DATE
+ARG BUILD_VERSION
+ARG LICENSE="Apache-2.0"
+ARG DELLY_VERSION="0.9.1"
+ARG VCS_REF
+################## METADATA ########################
+LABEL org.opencontainers.image.vendor="MSKCC"
+LABEL org.opencontainers.image.authors="Eric Buehlere (buehlere@mskcc.org)"
+
+LABEL org.opencontainers.image.created=${BUILD_DATE} \
+ org.opencontainers.image.version=${BUILD_VERSION} \
+ org.opencontainers.image.licenses=${LICENSE} \
+ org.opencontainers.image.version.delly=${DELLY_VERSION} \
+ org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \
+ org.opencontainers.image.vcs-ref=${VCS_REF}
+
+LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \
+ DELLY version ${DELLY_VERSION}"
+
+
+################## INSTALL ##########################
+RUN apt-get update && apt-get install -y \
+ autoconf \
+ build-essential \
+ cmake \
+ g++ \
+ gfortran \
+ git \
+ libcurl4-gnutls-dev \
+ hdf5-tools \
+ libboost-date-time-dev \
+ libboost-program-options-dev \
+ libboost-system-dev \
+ libboost-filesystem-dev \
+ libboost-iostreams-dev \
+ libbz2-dev \
+ libhdf5-dev \
+ libncurses-dev \
+ liblzma-dev \
+ zlib1g-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# set environment
+ENV BOOST_ROOT /usr
+
+# Download delly
+RUN cd /opt \
+ && git clone --recursive https://github.com/dellytools/delly.git \
+ && cd /opt/delly/ \
+ && git checkout tags/v${DELLY_VERSION} \
+ && make STATIC=1 all \
+ && make install
+
+# Add Delly to PATH
+ENV PATH="/opt/delly/bin:${PATH}"
+
+# by default /bin/sh
+CMD ["/bin/sh"]
diff --git a/delly_0.9.1/delly_0.9.1.cwl b/delly_0.9.1/delly_0.9.1.cwl
new file mode 100644
index 00000000..067768b5
--- /dev/null
+++ b/delly_0.9.1/delly_0.9.1.cwl
@@ -0,0 +1,131 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+baseCommand:
+ - delly
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: out_file
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-o'
+ shellQuote: false
+ doc: The name to be used for the output bcf file
+ - id: reference_genome
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-g'
+ shellQuote: false
+ doc: reference genome fasta file
+ secondaryFiles:
+ - ^.fasta.fai
+ - id: input_bams
+ type:
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 99
+ shellQuote: false
+ doc: >-
+ an indexed bam tumor file, an indexed bam control file or it can be an
+ array of indexed bam files
+ secondaryFiles:
+ - ^.bai
+ - id: exclude_regions
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-x'
+ shellQuote: false
+ doc: file with regions to exclude
+ - id: vcffile
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-v'
+ shellQuote: false
+ doc: input VCF/BCF file for genotyping
+ - id: svtype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-t'
+ shellQuote: false
+ doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]'
+ - id: geno_qual
+ type: int?
+ inputBinding:
+ position: 71
+ prefix: '-u'
+ doc: min. mapping quality for genotyping
+ - id: dump
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-d'
+ doc: gzipped output file for SV-reads (optional)
+ - id: map_qual
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-q'
+ doc: min. paired-end (PE) mapping quality
+ - id: qual_tra
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: min. PE quality for translocation
+ - id: mad_cutoff
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-s'
+ doc: 'insert size cutoff, median+s*MAD (deletions only)'
+outputs:
+ - id: bcf_out
+ type: File
+ outputBinding:
+ glob: $(inputs.out_file)
+arguments:
+ - call
+requirements:
+ - class: ShellCommandRequirement
+ - class: ResourceRequirement
+ ramMin: 40000
+ coresMin: 4
+hints:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/delly:0.9.1'
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': delly
+ 'doap:revision': 0.9.1
diff --git a/delly_0.9.1/example_input.yml b/delly_0.9.1/example_input.yml
new file mode 100644
index 00000000..d84fb73c
--- /dev/null
+++ b/delly_0.9.1/example_input.yml
@@ -0,0 +1,5 @@
+out_file: "name_of_output.bcf"
+reference_genome: {class: File, path: path_to_file.fasta}
+input_bams:
+ - {class: File, path: /path/to/file.bam}
+ - {class: File, path: /path/to/file.bam}
diff --git a/delly_1.0.3/README.md b/delly_1.0.3/README.md
new file mode 100644
index 00000000..3bd6c5be
--- /dev/null
+++ b/delly_1.0.3/README.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 1.0.3 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_1.0.3 .cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/delly_1.0.3 .cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_1.0.3 .cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/delly_1.0.3/container/Dockerfile b/delly_1.0.3/container/Dockerfile
new file mode 100644
index 00000000..56bd8db0
--- /dev/null
+++ b/delly_1.0.3/container/Dockerfile
@@ -0,0 +1,65 @@
+# taken from: https://github.com/dellytools/delly/blob/main/Dockerfile
+# modify for additional functionality
+################## Base Image ##########
+FROM --platform=linux/amd64 ubuntu:18.04
+################## ARGUMENTS/Environments ##########
+
+ARG BUILD_DATE
+ARG BUILD_VERSION
+ARG LICENSE="Apache-2.0"
+ARG DELLY_VERSION="1.0.3"
+ARG VCS_REF
+################## METADATA ########################
+LABEL org.opencontainers.image.vendor="MSKCC"
+LABEL org.opencontainers.image.authors="Eric Buehler (buehlere@mskcc.org)"
+
+LABEL org.opencontainers.image.created=${BUILD_DATE} \
+ org.opencontainers.image.version=${BUILD_VERSION} \
+ org.opencontainers.image.licenses=${LICENSE} \
+ org.opencontainers.image.version.delly=${DELLY_VERSION} \
+ org.opencontainers.image.vcs-url="https://github.com/dellytools/delly.git" \
+ org.opencontainers.image.vcs-ref=${VCS_REF}
+
+LABEL org.opencontainers.image.description="This container uses ubuntu:18.04 as the base image to build \
+ DELLY version ${DELLY_VERSION}"
+
+
+################## INSTALL ##########################
+RUN apt-get update && apt-get install -y \
+ autoconf \
+ build-essential \
+ cmake \
+ g++ \
+ gfortran \
+ git \
+ libcurl4-gnutls-dev \
+ hdf5-tools \
+ libboost-date-time-dev \
+ libboost-program-options-dev \
+ libboost-system-dev \
+ libboost-filesystem-dev \
+ libboost-iostreams-dev \
+ libbz2-dev \
+ libhdf5-dev \
+ libncurses-dev \
+ liblzma-dev \
+ zlib1g-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# set environment
+ENV BOOST_ROOT /usr
+
+# Install delly
+RUN cd /opt \
+ && git clone --recursive https://github.com/dellytools/delly.git \
+ && cd /opt/delly/ \
+ && git checkout tags/v${DELLY_VERSION} \
+ && make STATIC=1 all \
+ && make install
+
+# Add Delly to PATH
+ENV PATH="/opt/delly/bin:${PATH}"
+
+# by default /bin/sh
+CMD ["/bin/sh"]
diff --git a/delly_1.0.3/delly_1.0.3.cwl b/delly_1.0.3/delly_1.0.3.cwl
new file mode 100644
index 00000000..f1525ce7
--- /dev/null
+++ b/delly_1.0.3/delly_1.0.3.cwl
@@ -0,0 +1,131 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+baseCommand:
+ - delly
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: out_file
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-o'
+ shellQuote: false
+ doc: The name to be used for the output bcf file
+ - id: reference_genome
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '-g'
+ shellQuote: false
+ doc: reference genome fasta file
+ secondaryFiles:
+ - ^.fasta.fai
+ - id: input_bams
+ type:
+ - File
+ - type: array
+ items: File
+ inputBinding:
+ position: 99
+ shellQuote: false
+ doc: >-
+ an indexed bam tumor file, an indexed bam control file or it can be an
+ array of indexed bam files
+ secondaryFiles:
+ - ^.bai
+ - id: exclude_regions
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-x'
+ shellQuote: false
+ doc: file with regions to exclude
+ - id: vcffile
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-v'
+ shellQuote: false
+ doc: input VCF/BCF file for genotyping
+ - id: svtype
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '-t'
+ shellQuote: false
+ doc: 'SV type to compute [DEL, INS, DUP, INV, BND, ALL]'
+ - id: geno_qual
+ type: int?
+ inputBinding:
+ position: 71
+ prefix: '-u'
+ doc: min. mapping quality for genotyping
+ - id: dump
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '-d'
+ doc: gzipped output file for SV-reads (optional)
+ - id: map_qual
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-q'
+ doc: min. paired-end (PE) mapping quality
+ - id: qual_tra
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-r'
+ doc: min. PE quality for translocation
+ - id: mad_cutoff
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '-s'
+ doc: 'insert size cutoff, median+s*MAD (deletions only)'
+outputs:
+ - id: bcf_out
+ type: File
+ outputBinding:
+ glob: $(inputs.out_file)
+arguments:
+ - call
+requirements:
+ - class: ShellCommandRequirement
+ - class: ResourceRequirement
+ ramMin: 40000
+ coresMin: 4
+hints:
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/delly:1.0.3'
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:buehlere@mskcc.org'
+ 'foaf:name': Eric Buehler
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': delly
+ 'doap:revision': 1.0.3
diff --git a/delly_1.0.3/example_input.yml b/delly_1.0.3/example_input.yml
new file mode 100644
index 00000000..d84fb73c
--- /dev/null
+++ b/delly_1.0.3/example_input.yml
@@ -0,0 +1,5 @@
+out_file: "name_of_output.bcf"
+reference_genome: {class: File, path: path_to_file.fasta}
+input_bams:
+ - {class: File, path: /path/to/file.bam}
+ - {class: File, path: /path/to/file.bam}
diff --git a/disambiguate_1.0.0/README.md b/disambiguate_1.0.0/README.md
deleted file mode 100644
index b1cd50f0..00000000
--- a/disambiguate_1.0.0/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
- # CWL and Dockerfile for running Disambiguate
-
-## Version of tools in docker image (/container/Dockerfile)
-
-Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`.
-
-| Tool | Version | Location | Notes |
-|--- |--- |--- | - |
-| biocontainers | latest | https://hub.docker.com/r/biocontainers/biocontainers/ | base image; "latest" not actually latest version, just tag name on docker hub|
-| bamtools | 2.4.0 | https://bioconda.github.io/recipes/bamtools/README.html | - |
-| ngs-disambiguate | 2016.11.10 | https://bioconda.github.io/recipes/ngs-disambiguate/README.html | - |
-
-[](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/disambiguate:1.0.0 "Get your own image badge on microbadger.com")
-
-
-## CWL
-
-- CWL specification 1.0
-- Use `example_inputs.yaml` to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
-
-```bash
- > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml
-```
-
-## Command
-```
-USAGE:
-
- cwltool disambiguate_1.0.0.cwl \
- --prefix \
- --output_dir \
- [--aligner ] \
-
-
-Where:
-
- --prefix
- (required) Sample ID or name used as prefix. Do not include .bam
-
- --output_dir
- (required) Output directory
-
- --aligner
- Aligner option {bwa(default),tophat,hisat2,star}
-
-
- (required) Species A BAM file
-
-
- (required) Species B BAM file
-```
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..7b085810
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,32 @@
+---
+description: >-
+ Central location for storing common workflow language based command line tools
+ for building workflows
+---
+
+# Command-line tools used by CCI
+
+* Free software: Apache Software License 2.0
+* Documentation: [https://msk-access.gitbook.io/command-line-tools-cwl/](https://msk-access.gitbook.io/command-line-tools-cwl/)
+
+## Features
+
+Create command line tools in common workflow language to generate workflows.
+
+## Installation
+
+Clone the repository:
+
+```text
+git clone --depth 50 https://github.com/msk-access/cwl-commandlinetools.git
+```
+
+**Follow the README in repsective tool folder for execution of the tool.**
+
+## Credits
+
+This package was created with Cookiecutter _and the `audreyr/cookiecutter-pypackage`_ project template.
+
+* Cookiecutter: [https://github.com/audreyr/cookiecutter](https://github.com/audreyr/cookiecutter)
+* `audreyr/cookiecutter-pypackage`: [https://github.com/audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage)
+
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
new file mode 100644
index 00000000..a6cc28d1
--- /dev/null
+++ b/docs/SUMMARY.md
@@ -0,0 +1,120 @@
+# Table of contents
+
+* [MSK-ACCESS command-line tools](README.md)
+
+* [ABRA2](abra2/README.md)
+ * [v2.17](abra2/abra2_2.17.md)
+ * [v2.22](abra2/abra2_2.22.md)
+
+* access_utils
+ * [0.1.1](../access_utils/0.1.1/README.md)
+
+* [bcftools](bcftools/README.md)
+
+ * [bcftools bgzip v1.15.1](bcftools/bcftools_bgzip_v1.15.1.md)
+ * [bcftools concat v1.15.1](bcftools/bcftools_concat_1.15.1.md)
+ * [bcftools norm v1.15.1](bcftools/bcftools_norm_v1.15.1.md)
+ * [bcftools sort v1.15.1](bcftools/bcftools_sort_v1.15.1.md)
+ * [bcftools tabix v1.15.1](bcftools/bcftools_tabix_v1.15.1.md)
+
+* [Bedtools](bedtools/README.md)
+ * [genomecov v2.28.0\_cv2](bedtools/bedtools_genomecov_v2.28.0_cv2.md)
+ * [merge v2.28.0\_cv2](bedtools/bedtools_merge_v2.28.0_cv2.md)
+ * [sortbed v2.28.0\_cv2](bedtools/bedtools_sortbed_v2.28.0_cv2.md)
+
+* Biometrics
+ * [extract](../biometrics_extract/README.md)
+ * [minor](../biometrics_minor/README.md)
+ * [major](../biometrics_major/README.md)
+ * [genotype](../biometrics_genotype/README.md)
+ * [sexmismatch](../biometrics_sexmismatch/README.md)
+
+* [Delly](delly/README.md)
+ * [delly call 0.9.1](delly/delly_call_0.9.1.md)
+ * [delly call 1.0.3](delly/delly_call_1.0.3.md)
+
+* [Disambiguate](disambiguate/README.md)
+ * [v1.0.0](disambiguate/disambiguate_1.0.0.md)
+
+* [Fgbio](fgbio/README.md)
+ * [CallDuplexConsensusReads v1.2.0](fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md)
+ * [CollectDuplexSeqMetrics v1.2.0](fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md)
+ * [GroupReadsByUmi v1.2.0](fgbio/fgbio_group_reads_by_umi_1.2.0.md)
+ * [FastqToBam v1.2.0](fgbio/fgbio_fastq_to_bam_1.2.0.md)
+ * [FilterConsensusReads v1.2.0](fgbio/fgbio_filter_consensus_reads_1.2.0.md)
+ * [simplex\_filter v0.1.8](fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md)
+
+* [GATK](gatk/README.md)
+ * [ApplyBQSR v4.1.2.0](gatk/gatk_applybqsr_4.1.2.0.md)
+ * [ApplyBQSR v4.1.8.1](gatk/gatk_apply_bqsr_4.1.8.1.md)
+ * [BaseRecalibrator v4.1.2.0](gatk/gatk_baserecalibrator_4.1.2.0.md)
+ * [BaseRecalibrator v4.1.8.1](gatk/gatk_base_recalibrator_4.1.8.1.md)
+ * [MergeBamAlignment v4.1.8.0](gatk/gatk_merge_bam_alignment_4.1.8.0.md)
+ * [MergeSamFiles v4.1.8.0](gatk/gatk_merge_sam_files_4.1.8.0.md)
+ * [SamToFastq v4.1.8.0](gatk/samtofastq-v4.1.8.0.md)
+
+* GetBaseCountsMultiSample
+ * [1.2.5](../getbasecountsmultisample/1.2.5/README.md)
+
+* [Manta](manta/README.md)
+ * [Manta v1.5.1](manta/manta_1.5.1.md)
+
+* [Marianas](marianas/README.md)
+ * [Collapsing First Pass v1.8.1](marianas/marianas_collapsing_first_pass_1.8.1.md)
+ * [Collapsing Second Pass v1.8.1](marianas/marianas_collapsing_second_pass_1.8.1.md)
+ * [Process Loop UMI v1.8.1](marianas/marianas_process_loop_umi_1.8.1.md)
+ * [Seprate BAMs v1.8.1](marianas/marianas_separate_bams_1.8.1.md)
+
+* [MultiQC](multiqc/README.md)
+ * [MultiQC v1.10.1.7](multiqc/multiqc_1.10.1.7.md)
+ * [MultiQC v1.12](multiqc/multiqc_1.12.md)
+
+* [MuTect](mutect/README.md)
+ * [MuTect 1.1.5](mutect/mutect_1.1.5.md)
+
+* [Merge Fastq](merge-fastq/README.md)
+ * [v0.1.7](merge-fastq/merge_fastq_0.1.7.md)
+
+* [Mosdepth](mosdepth/README.md)
+ * [0.3.3](mosdepth/mosdepth_0.3.3.md)
+
+* [Octopus](octopus/README.md)
+ * [v0.7.4](octopus/octopus_0.7.4.md)
+
+* [Picard Tools](picard-tools/README.md)
+ * [AddOrReplaceReadGroups v1.96](picard-tools/picard_add_or_replace_read_groups_1.96.md)
+ * [AddOrReplaceReadGroups v2.21.2](picard-tools/picard_add_or_replace_read_groups_2.21.2.md)
+ * [AddOrReplaceReadGroups v4.1.8.1](picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md)
+ * [CollectAlignmentSummaryMetrics v2.8.1](picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md)
+ * [CollectAlignmentSummaryMetrics v2.21.2](picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md)
+ * [CollectMultipleMetrics v2.8.1](picard-tools/picard_collectmultiplemetric_2.8.1.md)
+ * [CollectMultipleMetrics v2.21.2](picard-tools/picard_collectmultiplemetric_2.21.2.md)
+ * [FixMateInformation v1.96](picard-tools/picard_fix_mate_information_1.96.md)
+ * [FixMateInformation v2.21.2](picard-tools/picard_fix_mate_information_2.21.2.md)
+ * [FixMateInformation v4.1.8.1](picard-tools/picard_fix_mate_information_4.1.8.1.md)
+ * [HSmetrics v2.8.1](picard-tools/picard_hsmetrics_2.8.1.md)
+ * [HSmetrics v2.21.2](picard-tools/picard_hsmetrics_2.21.2.md)
+ * [MarkDuplicates v1.96](picard-tools/picard_mark_duplicates_1.96.md)
+ * [MarkDuplicates v2.8.1](picard-tools/picard_mark_duplicates_2.8.1.md)
+ * [MarkDuplicates v2.21.2](picard-tools/picard_mark_duplicates_2.21.2.md)
+ * [MarkDuplicates v4.1.8.1](picard-tools/picard_mark_duplicates_4.1.8.1.md)
+* [Postprocessing variant calls](postprocessing_variant_calls/README.md)
+ * [vardict_filter_case-control 0.1.3](postprocessing_variant_calls/vardict_filter_case-control_0.1.3.md)
+ * [vardict_filter_single-sample 0.1.3](postprocessing_variant_calls/vardict_filter_single-sample_0.1.3.md)
+* [Trim Galore](trim-galore/README.md)
+ * [v0.6.2](trim-galore/trim_galore_0.6.2.md)
+
+* [Ubuntu utilites](ubuntu-utilites/README.md)
+ * [v18.04](ubuntu-utilites/utilities_ubuntu_18.04.md)
+
+* [VarDictJava](vardictjava/README.md)
+ * [v1.8.2](vardictjava/vardictjava_1.8.2.md)
+
+* [VCF2MAF](vcf2maf/README.md)
+ * [1.6.21](vcf2maf/vcf2maf_1.6.21.md)
+
+* [Waltz](waltz/README.md)
+ * [CountReads v3.1.1](waltz/waltz_count_reads_3.1.1.md)
+ * [PileupMetrics v3.1.1](waltz/waltz_pileupmatrices_3.1.1.md)
+
+
diff --git a/docs/abra2/README.md b/docs/abra2/README.md
new file mode 100644
index 00000000..5cd595c7
--- /dev/null
+++ b/docs/abra2/README.md
@@ -0,0 +1,2 @@
+# ABRA2
+
diff --git a/abra2_2.17/README.md b/docs/abra2/abra2_2.17.md
similarity index 73%
rename from abra2_2.17/README.md
rename to docs/abra2/abra2_2.17.md
index 89818e32..e2f9457b 100644
--- a/abra2_2.17/README.md
+++ b/docs/abra2/abra2_2.17.md
@@ -1,25 +1,25 @@
-# CWL and Dockerfile for running ABRA2
+# v2.17
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| openjdk | 8 | - |
-| ABRA2 | 2.17 | https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk | 8 | - |
+| ABRA2 | 2.17 | [https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar](https://github.com/mozack/abra2/releases/download/v2.19/abra2-2.17.jar) |
-[](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/abra2:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/abra2:0.1.0) [](https://microbadger.com/images/mskcc/abra2:0.1.0) [](https://microbadger.com/images/mskcc/abra2:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner abra2_2.17.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command:**
```bash
#Using CWLTOOL
@@ -30,9 +30,9 @@
> toil-cwl-runner --singularity --logFile /path/to/abra2_toil_log/cwltoil.log --jobStore /path/to/abra2_jobStore --batchSystem lsf --workDir /path/to/abra2_toil_log --outdir . --writeLogs /path/to/abra2_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/abra2_2.17.cwl /path/to/inputs.yaml > abra2_toil.stdout 2> abra2_toil.stderr &
```
-### Usage
+### Usage
-```
+```text
usage: abra2_2.17.cwl [-h]
positional arguments:
@@ -85,4 +85,5 @@ optional arguments:
VCF containing known (or suspected) variant sites.
Very large files should be avoided.
--no_sort Do not attempt to sort final output
- ```
\ No newline at end of file
+```
+
diff --git a/docs/abra2/abra2_2.22.md b/docs/abra2/abra2_2.22.md
new file mode 100644
index 00000000..c23aaada
--- /dev/null
+++ b/docs/abra2/abra2_2.22.md
@@ -0,0 +1,21 @@
+# v2.22
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk | 8 | - |
+| ABRA2 | 2.22 | [https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar](https://github.com/mozack/abra2/releases/download/v2.22/abra2-2.22.jar) |
+
+[](https://microbadger.com/images/mskcc/abra2:0.2.0) [](https://microbadger.com/images/mskcc/abra2:0.2.0) [](https://microbadger.com/images/mskcc/abra2:0.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner abra2_2.22.cwl example_inputs.yaml
+```
+
diff --git a/docs/athena/README.md b/docs/athena/README.md
new file mode 100644
index 00000000..5f4802c9
--- /dev/null
+++ b/docs/athena/README.md
@@ -0,0 +1,9 @@
+# athena
+
+Athena is a tool to generate coverage statistics for NGS data, and combine these into an interactive HTML report. This gives both summary level and in depth information as to the coverage of the data, including various tables and plots to visualise the data. Athena can also optionally include plots visualising per-chromosome level coverage.
+
+The general workflow for generating the statistics and report is as follows:
+
++ Annotate each region of the bed file with the gene, exon and per base coverage data using annotate_bed.cwl
++ Generate per exon and per gene statistics using coverage_stats_single.cwl
++ Generate HTML coverage report with coverage_report_single.cwl
diff --git a/docs/athena/annotate_bed.md b/docs/athena/annotate_bed.md
new file mode 100644
index 00000000..f1848719
--- /dev/null
+++ b/docs/athena/annotate_bed.md
@@ -0,0 +1,75 @@
+# annotate_bed_1.4.2
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| python:8 base image | 8 | - |
+| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip |
+
+## Explanation
+
+The annotate_bed.cwl annotates the given bed file with transcript and coverage information required for the next step coverage_stats_single.cwl. Specifically, this is done using BEDtools intersect, with a file containing transcript to gene and exon information, and then the per base coverage data using the mosdepth output (*per_based.bed.gz). Currently, 100% overlap is required between coordinates in the panel bed file and the transcript annotation file, therefore you must ensure any added flank regions etc. are the same.
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner annotate_bed.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/annotate_bed.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir annotate_bed_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/annotate_bed_toil_log/cwltoil.log --jobStore /path/to/annotate_bed_jobStore --batchSystem lsf --workDir /path/to/annotate_bed_toil_log --outdir . --writeLogs /path/to/annotate_bed_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/annotate_bed.cwl /path/to/inputs.yaml > annotate_bed_toil.stdout 2> annotate_bed_toil.stderr &
+```
+
+## Usage
+
+```
+toil-cwl-runner annotate_bed.cwl --help
+
+usage: annotate_bed.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --panel_bed
+ PANEL_BED --transcript_file TRANSCRIPT_FILE
+ --coverage_file COVERAGE_FILE
+ [--chunk_size CHUNK_SIZE] [--output_name OUTPUT_NAME]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ worker thread number
+ --panel_bed PANEL_BED
+ Input panel bed file; must have ONLY the following 4
+ columns chromosome, start position, end position,
+ gene/transcript
+ --transcript_file TRANSCRIPT_FILE
+ Transcript annotation file, contains required gene and
+ exon information. Must have ONLY the following 6
+ columns: chromosome, start, end, gene, transcript,
+ exon
+ --coverage_file COVERAGE_FILE
+ Per base coverage file (output from mosdepth or
+ similar)
+ --chunk_size CHUNK_SIZE
+ --output_name OUTPUT_NAME
+ (optional) Prefix for naming output file, if not given
+ will use name from per base coverage file
+```
diff --git a/docs/athena/coverage_report_single.md b/docs/athena/coverage_report_single.md
new file mode 100644
index 00000000..c33c461d
--- /dev/null
+++ b/docs/athena/coverage_report_single.md
@@ -0,0 +1,95 @@
+# coverage_report_single_1.4.2
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| python:8 base image | 8 | - |
+| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip |
+
+## Explanation
+The coverage_report_single.cwl generates a full HTML report on a per sample-level. The user can define the threshold to display.
+The output html report contains the following:
++ Summary including per gene coverage chart
++ Table of exons with sub-optimal coverage
++ Interactive plots of exons with sub-optimal coverage
++ A summary table of average coverage across all genes
++ Full gene coverage plots
++ Table of per exon coverage across all genes
++ Coverage of known variants (if specified)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner coverage_report_single.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/coverage_report_single.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir coverage_report_single_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/coverage_report_single_toil_log/cwltoil.log --jobStore /path/to/coverage_report_single_jobStore --batchSystem lsf --workDir /path/to/coverage_report_single_toil_log --outdir . --writeLogs /path/to/coverage_report_single_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/coverage_report_single.cwl /path/to/inputs.yaml > coverage_report_single_toil.stdout 2> coverage_report_single_toil.stderr &
+```
+
+## Usage
+
+```
+usage: coverage_report_single.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --exon_stats EXON_STATS --gene_stats
+ GENE_STATS --raw_coverage RAW_COVERAGE
+ [--per_base_coverage PER_BASE_COVERAGE]
+ [--threshold THRESHOLD]
+ [--sample_name SAMPLE_NAME]
+ [--output OUTPUT] [--panel PANEL]
+ [--limit LIMIT] [--summary]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ worker thread number
+ --exon_stats EXON_STATS
+ per exon statistics file (from
+ `coverage_stats_single.py`)
+ --gene_stats GENE_STATS
+ per gene statistics file (from
+ `coverage_stats_single.py`)
+ --raw_coverage RAW_COVERAGE
+ annotated bed file with coverage data (generated from
+ annotate_bed.sh / bedtools intersect)
+ --per_base_coverage PER_BASE_COVERAGE
+ Per-base coverage bed file from mosdepth. (Optional;
+ if not submitted, plots displaying global coverage per
+ chromosome will not be displayed)
+ --threshold THRESHOLD
+ threshold value defining sub-optimal coverage
+ (optional; default if not given: 20)
+ --sample_name SAMPLE_NAME
+ --output OUTPUT name for output report (optional; sample name will be
+ used if not given)
+ --panel PANEL panel bed file used for initial annotation, name will
+ be displayed in summary of report (optional)
+ --limit LIMIT number of genes at which to limit including full gene
+ plots, large numbers of genes may take a long time to
+ generate the plots (optional)
+ --summary boolean flag to add clinical report summary text in
+ summary section, includes list of all genes with
+ transcripts (optional; default False)
+```
diff --git a/docs/athena/coverage_stats_single.md b/docs/athena/coverage_stats_single.md
new file mode 100644
index 00000000..0f5090c1
--- /dev/null
+++ b/docs/athena/coverage_stats_single.md
@@ -0,0 +1,71 @@
+# coverage_stats_single_1.4.2
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| python:8 base image | 8 | - |
+| Athena | 1.4.2 | https://github.com/msk-access/athena/archive/refs/tags/1.4.2.zip |
+
+## Explanation
+The coverage_stats_single.cwl uses the annotated coverage bed file generated from the previous step, annotate_bed.cwl. The output of coverage_stats_single.cwl are tsv files of per per exon and per gene coverage statistics. This gives a minimum, mean and maxmimum coverage for each region, along with coverage at defined thresholds. The output tsv files are used as input files in the next step, coverage_report_single.cwl.
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner coverage_stats_single.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/coverage_stats_single.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir coverage_stats_single_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/coverage_stats_single_toil_log/cwltoil.log --jobStore /path/to/coverage_stats_single_jobStore --batchSystem lsf --workDir /path/to/coverage_stats_single_toil_log --outdir . --writeLogs /path/to/coverage_stats_single_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/coverage_stats_single.cwl /path/to/inputs.yaml > coverage_stats_single_toil.stdout 2> coverage_stats_single_toil.stderr &
+```
+
+## Usage
+
+```
+usage: coverage_stats_single.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --file FILE [--build BUILD]
+ [--outfile OUTFILE] [--thresholds THRESHOLDS]
+ [--output_name OUTPUT_NAME]
+ [--flagstat FLAGSTAT]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ worker thread number
+ --file FILE annotated bed file on which to generate report from
+ --build BUILD text file with build number used for alignment, output
+ from mosdepth (optional) chromosome, start, end, gene,
+ transcript, exon
+ --outfile OUTFILE output file name prefix, if not given the input file
+ name will be used as the name prefix
+ --thresholds THRESHOLDS
+ threshold values to calculate coverage for as comma
+ seperated integers (default: 10, 20, 30, 50, 100)
+ --output_name OUTPUT_NAME
+ (optional) Prefix for naming output file, if not given
+ will use name from per base coverage file
+ --flagstat FLAGSTAT file for sample, required for generating run
+ statistics (in development)
+```
diff --git a/docs/bcftools/README.md b/docs/bcftools/README.md
new file mode 100644
index 00000000..a08fa464
--- /dev/null
+++ b/docs/bcftools/README.md
@@ -0,0 +1 @@
+# BCFTOOLS
diff --git a/docs/bcftools/bcftools_bgzip_v1.15.1.md b/docs/bcftools/bcftools_bgzip_v1.15.1.md
new file mode 100644
index 00000000..afae2236
--- /dev/null
+++ b/docs/bcftools/bcftools_bgzip_v1.15.1.md
@@ -0,0 +1,42 @@
+## CWL and Docker for Running bgzip using bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| -------- | ------- | -------------------------------------------------------- |
+| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io/):
+
+```
+toil-cwl-runner bcftools_bgzip_1.15.1.cwl example_input_bgzip.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```shell
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_bgzip_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: bcftools_bgzip_1.15.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/bcftools/bcftools_concat_v1.15.1.md b/docs/bcftools/bcftools_concat_v1.15.1.md
new file mode 100644
index 00000000..47033dfd
--- /dev/null
+++ b/docs/bcftools/bcftools_concat_v1.15.1.md
@@ -0,0 +1,53 @@
+# CWL and Dockerfile for running bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+toil-cwl-runner bcftools_concat_1.15.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_concat_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_concat_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_concat_1.15.1.cwl [-h]
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --allow_overlaps First coordinate of the next file can precede last
+ record of the current file.
+ --output_name OUTPUT_NAME
+ Output file name
+ --output_type OUTPUT_TYPE
+ compressed BCF (b), uncompressed BCF (u), compressed
+ VCF (z), uncompressed VCF (v)
+ --input INPUT
+
+```
diff --git a/docs/bcftools/bcftools_norm_v1.15.1.md b/docs/bcftools/bcftools_norm_v1.15.1.md
new file mode 100644
index 00000000..0268425a
--- /dev/null
+++ b/docs/bcftools/bcftools_norm_v1.15.1.md
@@ -0,0 +1,52 @@
+# CWL and Dockerfile for running bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| -------- | ------- | -------------------------------------------------------- |
+| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+toil-cwl-runner bcftools_norm_1.15.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_norm_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_norm_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_norm_1.15.1.cwl [-h]
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --check_ref CHECK_REF
+ --multiallelics MULTIALLELICS
+ use any
+ --output_type OUTPUT_TYPE
+ --output_name OUTPUT_NAME
+ --input INPUT input vcf file
+ --fastaRef FASTAREF
+
+```
diff --git a/docs/bcftools/bcftools_sort_v1.15.1.md b/docs/bcftools/bcftools_sort_v1.15.1.md
new file mode 100644
index 00000000..856d292d
--- /dev/null
+++ b/docs/bcftools/bcftools_sort_v1.15.1.md
@@ -0,0 +1,50 @@
+# CWL and Dockerfile for running bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| -------- | ------- | -------------------------------------------------------- |
+| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+toil-cwl-runner bcftools_sort_1.15.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_sort_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_sort_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```
+usage: toil-cwl-runner bcftools_sort_1.15.1.cwl [-h]
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --output_name OUTPUT_NAME
+ Output file name
+ --output_type OUTPUT_TYPE
+ compressed BCF (b), uncompressed BCF (u), compressed
+ VCF (z), uncompressed VCF (v)
+ --input INPUT input vcf files
+```
diff --git a/docs/bcftools/bcftools_tabix_v1.15.1.md b/docs/bcftools/bcftools_tabix_v1.15.1.md
new file mode 100644
index 00000000..9271454d
--- /dev/null
+++ b/docs/bcftools/bcftools_tabix_v1.15.1.md
@@ -0,0 +1,42 @@
+## CWL and Docker for Running tabs using bcftools v1.15.1
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+| -------- | ------- | -------------------------------------------------------- |
+| bcftools | 1.15.1 | https://github.com/samtools/bcftools/releases/tag/1.15.1 |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io/):
+
+```
+toil-cwl-runner bcftools_tabix_1.15.1.cwl example_input_tabix.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```shell
+#Using CWLTOOL
+cwltool --singularity --non-strict /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+mkdir bcftools_toil_log
+toil-cwl-runner --singularity --logFile /path/to/bcftools_toil_log/cwltoil.log --jobStore /path/to/bcftools_jobStore --batchSystem lsf --workDir /path/to/bcftools_toil_log --outdir . --writeLogs /path/to/bcftools_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/bcftools_tabix_1.15.1.cwl /path/to/inputs.yaml > bcftools_toil.stdout 2> bcftools_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: bcftools_tabix_1.15.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/bedtools/README.md b/docs/bedtools/README.md
new file mode 100644
index 00000000..13a0e51c
--- /dev/null
+++ b/docs/bedtools/README.md
@@ -0,0 +1,2 @@
+# Bedtools
+
diff --git a/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md
new file mode 100644
index 00000000..da41c5b2
--- /dev/null
+++ b/docs/bedtools/bedtools_genomecov_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# genomecov v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bedtools_genomecov_v2.28.0_cv2.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_genomecov_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner bedtools\_genomecov\_v2.28.0\_cv2.cwl --help
+
+usage: bedtools\_genomecov\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--option\_bedgraph\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT The input file can be in BAM format \(Note: BAM must be sorted by position\) --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --option\_bedgraph option flag parameter to choose output file format. -bg refers to bedgraph format
+
diff --git a/docs/bedtools/bedtools_merge_v2.28.0_cv2.md b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md
new file mode 100644
index 00000000..507a8994
--- /dev/null
+++ b/docs/bedtools/bedtools_merge_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# merge v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner bedtools_merge_v2.28.0_cv2.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict bedtools_merge_v2.28.0_cv2.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_merge_v2.28.0_cv2.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner bedtools\_merge\_v2.28.0\_cv2.cwl --help
+
+usage: bedtools\_merge\_v2.28.0\_cv2.cwl \[-h\] --input INPUT --output\_file\_name OUTPUT\_FILE\_NAME \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--distance\_between\_features DISTANCE\_BETWEEN\_FEATURES\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BEDgraph format file generated from Bedtools Genomecov module --output\_file\_name OUTPUT\_FILE\_NAME --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS --distance\_between\_features DISTANCE\_BETWEEN\_FEATURES Maximum distance between features allowed for features to be merged.
+
diff --git a/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md b/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md
new file mode 100644
index 00000000..7b76d73b
--- /dev/null
+++ b/docs/bedtools/bedtools_sortbed_v2.28.0_cv2.md
@@ -0,0 +1,43 @@
+# SortVCF v2.28.0\_cv2
+
+## Version of tools in [docker image ](https://hub.docker.com/r/biocontainers/bedtools)
+
+| Tool | Version | Location |
+| :------- | :----------- | :----------------------------------------------------------- |
+| Bedtools | v2.28.0\_cv2 | [https://github.com/arq5x/bedtools2/releases/tag/v2.28.0](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0) |
+
+[](https://github.com/arq5x/bedtools2/releases/tag/v2.28.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+toil-cwl-runner bedtools_sortbed_vcf.cwl example_input.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+cwltool --singularity --non-strict bedtools_sortbed_vcf.cwl inputs.yaml
+
+#Using toil-cwl-runner
+mkdir run_directory
+toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 bedtools_sortbed_vcf.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+```shell
+Usage: bedtools_sortbed_vcf.cwl [-h] --input INPUT [job_order]
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT input VCF file
+```
+
diff --git a/docs/cci_utils/README.md b/docs/cci_utils/README.md
new file mode 100644
index 00000000..bafc11f6
--- /dev/null
+++ b/docs/cci_utils/README.md
@@ -0,0 +1,2 @@
+# CCI_UTILS
+
diff --git a/docs/cci_utils/general_stats_parse_0.2.7.md b/docs/cci_utils/general_stats_parse_0.2.7.md
new file mode 100644
index 00000000..435424b3
--- /dev/null
+++ b/docs/cci_utils/general_stats_parse_0.2.7.md
@@ -0,0 +1,46 @@
+# CWL and Dockerfile for running general_stats_parse in cci_utils
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| cci_utils | 0.2.8 | |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.json to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner general_stats_parse.cwl example_inputs.json
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/general_stats_parse.cwl /path/to/example_inputs.json
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/general_stats_parse.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner general_stats_parse.cwl -h
+usage: general_stats_parse.cwl [-h] --directory DIRECTORY --samples-json
+ SAMPLES_JSON [--config CONFIG]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --directory DIRECTORY
+ Directory containing results.
+ --samples-json SAMPLES_JSON
+ Sample JSON file.
+ --config CONFIG MultQC config file.
+```
diff --git a/docs/delly/README.md b/docs/delly/README.md
new file mode 100644
index 00000000..3cf4e541
--- /dev/null
+++ b/docs/delly/README.md
@@ -0,0 +1,2 @@
+# Delly
+
diff --git a/docs/delly/delly_call_0.9.1.md b/docs/delly/delly_call_0.9.1.md
new file mode 100644
index 00000000..08dcf390
--- /dev/null
+++ b/docs/delly/delly_call_0.9.1.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 0.9.1 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_0.9.1.cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/bwa_mem_toil.log --jobStore /path/to/bwa_mem_jobStore --batchSystem lsf --workDir /path/to/bwa_mem_toil_log --outdir . --writeLogs /path/to/bwa_mem_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/bwa_mem_0.7.17.cwl /path/to/inputs.yaml > bwa_mem_toil.stdout 2> bwa_mem_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_0.9.1.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/docs/delly/delly_call_1.0.3.md b/docs/delly/delly_call_1.0.3.md
new file mode 100644
index 00000000..3bd6c5be
--- /dev/null
+++ b/docs/delly/delly_call_1.0.3.md
@@ -0,0 +1,81 @@
+# CWL and Dockerfile for running Delly
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| ubuntu | 18.04 | - |
+| DELLY | 1.0.3 | https://github.com/dellytools/delly |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner delly_1.0.3 .cwl example_inputs.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/delly_1.0.3 .cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/delly_toil.log --jobStore /path/to/delly_jobStore --batchSystem lsf --workDir /path/to/delly_toil_log --outdir . --writeLogs /path/to/delly_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/delly_0.7.17.cwl /path/to/inputs.yaml > delly_toil.stdout 2> delly_toil.stderr &
+```
+
+### Usage
+
+```
+usage: delly_1.0.3 .cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--out_file OUT_FILE] --reference_genome
+ REFERENCE_GENOME [--exclude_regions EXCLUDE_REGIONS]
+ [--vcffile VCFFILE] [--svtype SVTYPE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --out_file OUT_FILE The name to be used for the output bcf file
+ --reference_genome REFERENCE_GENOME
+ reference genome fasta file
+ --exclude_regions EXCLUDE_REGIONS
+ file with regions to exclude
+ --vcffile VCFFILE input VCF/BCF file for genotyping
+ --svtype SVTYPE SV type to compute [DEL, INS, DUP, INV, BND, ALL]
+```
+
+## Disclaimer
+Parts of this code were borrowed from the delly repository, https://github.com/dellytools/delly, which uses the following redistribution license:
+
+Copyright (c) 2012- European Molecular Biology Laboratory (EMBL)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/docs/disambiguate/README.md b/docs/disambiguate/README.md
new file mode 100644
index 00000000..4850c2fd
--- /dev/null
+++ b/docs/disambiguate/README.md
@@ -0,0 +1,2 @@
+# Disambiguate
+
diff --git a/docs/disambiguate/disambiguate_1.0.0.md b/docs/disambiguate/disambiguate_1.0.0.md
new file mode 100644
index 00000000..8e065892
--- /dev/null
+++ b/docs/disambiguate/disambiguate_1.0.0.md
@@ -0,0 +1,53 @@
+# v1.0.0
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+Dockerfile uses `biocontainers/biocontainers:latest` as a base image and installs tools from `bioconda`.
+
+| Tool | Version | Location | Notes |
+| :--- | :--- | :--- | :--- |
+| biocontainers | latest | [https://hub.docker.com/r/biocontainers/biocontainers/](https://hub.docker.com/r/biocontainers/biocontainers/) | base image; "latest" not actually latest version, just tag name on docker hub |
+| bamtools | 2.4.0 | [https://bioconda.github.io/recipes/bamtools/README.html](https://bioconda.github.io/recipes/bamtools/README.html) | - |
+| ngs-disambiguate | 2016.11.10 | [https://bioconda.github.io/recipes/ngs-disambiguate/README.html](https://bioconda.github.io/recipes/ngs-disambiguate/README.html) | - |
+
+[](https://microbadger.com/images/mskcc/disambiguate:1.0.0) [](https://microbadger.com/images/mskcc/disambiguate:1.0.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use `example_inputs.yaml` to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner disambiguate_1.0.0.cwl example_inputs.yaml
+```
+
+## Command
+
+```text
+USAGE:
+
+ cwltool disambiguate_1.0.0.cwl \
+ --prefix \
+ --output_dir \
+ [--aligner ] \
+
+
+Where:
+
+ --prefix
+ (required) Sample ID or name used as prefix. Do not include .bam
+
+ --output_dir
+ (required) Output directory
+
+ --aligner
+ Aligner option {bwa(default),tophat,hisat2,star}
+
+
+ (required) Species A BAM file
+
+
+ (required) Species B BAM file
+```
+
diff --git a/docs/fgbio/README.md b/docs/fgbio/README.md
new file mode 100644
index 00000000..f1d1ca8d
--- /dev/null
+++ b/docs/fgbio/README.md
@@ -0,0 +1,2 @@
+# Fgbio
+
diff --git a/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md
new file mode 100644
index 00000000..c8d3d73e
--- /dev/null
+++ b/docs/fgbio/fgbio_call_duplex_consensus_reads_1.2.0.md
@@ -0,0 +1,79 @@
+# CallDuplexConsensusReads v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_call_duplex_consensus_reads_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_call_duplex_consensus_reads_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--read_name_prefix READ_NAME_PREFIX]
+ [--read_group_id READ_GROUP_ID]
+ [--error_rate_pre_umi ERROR_RATE_PRE_UMI]
+ [--error_rate_post_umi ERROR_RATE_POST_UMI]
+ [--min_input_base_quality MIN_INPUT_BASE_QUALITY]
+ [--trim]
+ [--sort_order SORT_ORDER]
+ [--min_reads MIN_READS]
+ [--max_reads_per_strand MAX_READS_PER_STRAND]
+ [--threads THREADS]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input SAM or BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ Output SAM or BAM file to write consensus reads.
+ --read_name_prefix READ_NAME_PREFIX
+ The prefix all consensus read names
+ --read_group_id READ_GROUP_ID
+ The new read group ID for all the consensus reads.
+ --error_rate_pre_umi ERROR_RATE_PRE_UMI
+ The Phred-scaled error rate for an error prior to the
+ UMIs being integrated.
+ --error_rate_post_umi ERROR_RATE_POST_UMI
+ The Phred-scaled error rate for an error post the UMIs
+ have been integrated.
+ --min_input_base_quality MIN_INPUT_BASE_QUALITY
+ Ignore bases in raw reads that have Q below this
+ value.
+ --trim If true, quality trim input reads in addition to
+ masking low Q bases
+ --sort_order SORT_ORDER
+ The sort order of the output, if :none: then the same
+ as the input.
+ --min_reads MIN_READS
+ The minimum number of input reads to a consensus read.
+ --max_reads_per_strand MAX_READS_PER_STRAND
+ The maximum number of reads to use when building a
+ single-strand consensus. If more than this many reads
+ are present in a tag family, the family is randomly
+ downsampled to exactly max-reads reads.
+```
+
diff --git a/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md
new file mode 100644
index 00000000..78812a55
--- /dev/null
+++ b/docs/fgbio/fgbio_collect_duplex_seq_metrics_1.2.0.md
@@ -0,0 +1,62 @@
+# CollectDuplexSeqMetrics v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_collect_duplex_seq_metrics_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_collect_duplex_seq_metrics_1.2.0.cwl
+ [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --input INPUT --output_prefix
+ OUTPUT_PREFIX [--intervals INTERVALS] [--description DESCRIPTION]
+ [--duplex_umi_counts DUPLEX_UMI_COUNTS] [--min_ab_reads MIN_AB_READS]
+ [--min_ba_reads MIN_BA_READS] [--umi_tag UMI_TAG] [--mi_tag MI_TAG]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input BAM file generated by GroupReadByUmi.
+ --output_prefix OUTPUT_PREFIX
+ Prefix of output files to write.
+ --intervals INTERVALS
+ Optional set of intervals over which to restrict
+ analysis. [Optional].
+ --description DESCRIPTION
+ Description of data set used to label plots. Defaults
+ to sample/library. [Optional].
+ --duplex_umi_counts DUPLEX_UMI_COUNTS
+ If true, produce the .duplex_umi_counts.txt file with
+ counts of duplex UMI observations. [Optional].
+ --min_ab_reads MIN_AB_READS
+ Minimum AB reads to call a tag family a 'duplex'.
+ [Optional].
+ --min_ba_reads MIN_BA_READS
+ Minimum BA reads to call a tag family a 'duplex'.
+ [Optional].
+ --umi_tag UMI_TAG The tag containing the raw UMI. [Optional].
+ --mi_tag MI_TAG The output tag for UMI grouping. [Optional].
+```
+
diff --git a/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md
new file mode 100644
index 00000000..3d4ede7f
--- /dev/null
+++ b/docs/fgbio/fgbio_fastq_to_bam_1.2.0.md
@@ -0,0 +1,82 @@
+# FastqToBam v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_fastq_to_bam_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_fastq_to_bam_1.2.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--read-structures READ_STRUCTURES]
+ [--sort] [--umi-tag UMI_TAG]
+ [--read-group-id READ_GROUP_ID]
+ [--sample SAMPLE] [--library LIBRARY]
+ [--platform PLATFORM]
+ [--platform-unit PLATFORM_UNIT]
+ [--platform-model PLATFORM_MODEL]
+ [--sequencing-center SEQUENCING_CENTER]
+ [--predicted-insert-size PREDICTED_INSERT_SIZE]
+ [--description DESCRIPTION]
+ [--comment COMMENT] [--run-date RUN_DATE]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Fastq files corresponding to each sequencing read
+ (e.g. R1, I1, etc.).
+ --output_file_name OUTPUT_FILE_NAME
+ The output SAM or BAM file to be written.
+ --read-structures READ_STRUCTURES
+ Read structures, one for each of the FASTQs.
+ https://github.com/fulcrumgenomics/fgbio/wiki/Read-
+ Structures
+ --sort If true, queryname sort the BAM file, otherwise
+ preserve input order.
+ --umi-tag UMI_TAG Tag in which to store molecular barcodes/UMIs
+ --read-group-id READ_GROUP_ID
+ Read group ID to use in the file header.
+ --sample SAMPLE The name of the sequenced sample.
+ --library LIBRARY The name/ID of the sequenced library.
+ --platform PLATFORM Sequencing Platform
+ --platform-unit PLATFORM_UNIT
+ Platform unit (e.g. ‘..')
+ --platform-model PLATFORM_MODEL
+ Platform model to insert into the group header (ex.
+ miseq, hiseq2500, hiseqX)
+ --sequencing-center SEQUENCING_CENTER
+ The sequencing center from which the data originated
+ --predicted-insert-size PREDICTED_INSERT_SIZE
+ Predicted median insert size, to insert into the read
+ group header
+ --description DESCRIPTION
+ Description of the read group.
+ --comment COMMENT Comment(s) to include in the output file’s header
+ --run-date RUN_DATE Date the run was produced, to insert into the read
+ group header
+```
+
diff --git a/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md
new file mode 100644
index 00000000..470166a1
--- /dev/null
+++ b/docs/fgbio/fgbio_filter_consensus_reads_1.2.0.md
@@ -0,0 +1,80 @@
+# FilterConsensusReads v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_filter_consensus_reads_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_filter_consensus_reads_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ --reference_fasta
+ REFERENCE_FASTA
+ [--reverse_per_base_tags]
+ [--min_reads MIN_READS]
+ [--max_read_error_rate MAX_READ_ERROR_RATE]
+ [--max_base_error_rate MAX_BASE_ERROR_RATE]
+ [--min_base_quality MIN_BASE_QUALITY]
+ [--max_no_call_fraction MAX_NO_CALL_FRACTION]
+ [--min_mean_base_quality MIN_MEAN_BASE_QUALITY]
+ [--require_single_strand_agreement]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input SAM or BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ Output SAM or BAM file to write consensus reads.
+ --reference_fasta REFERENCE_FASTA
+ Reference fasta file.
+ --reverse_per_base_tags
+ Reverse [complement] per base tags on reverse strand
+ reads.
+ --min_reads MIN_READS
+ The minimum number of reads supporting a consensus
+ base/read. (Max 3 values)
+ --max_read_error_rate MAX_READ_ERROR_RATE
+ The maximum raw-read error rate across the entire
+ consensus read. (Max 3 values)
+ --max_base_error_rate MAX_BASE_ERROR_RATE
+ The maximum error rate for a single consensus base.
+ (Max 3 values)
+ --min_base_quality MIN_BASE_QUALITY
+ Mask (make N) consensus bases with quality less than
+ this threshold.
+ --max_no_call_fraction MAX_NO_CALL_FRACTION
+ Maximum fraction of no-calls in the read after
+ filtering
+ --min_mean_base_quality MIN_MEAN_BASE_QUALITY
+ The minimum mean base quality across the consensus
+ read
+ --require_single_strand_agreement
+ Mask (make N) consensus bases where the AB and BA
+ consensus reads disagree (for duplex-sequencing only).
+```
+
diff --git a/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md
new file mode 100644
index 00000000..87ad2aaa
--- /dev/null
+++ b/docs/fgbio/fgbio_group_reads_by_umi_1.2.0.md
@@ -0,0 +1,68 @@
+# GroupReadsByUmi v1.2.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio | 1.2.0 | quay.io/biocontainers/fgbio:1.2.0--0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_group_reads_by_umi_1.2.0.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_group_reads_by_umi_1.2.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--family_size_histogram FAMILY_SIZE_HISTOGRAM]
+ [--raw_tag RAW_TAG]
+ [--assign_tag ASSIGN_TAG]
+ [--min_map_q MIN_MAP_Q]
+ [--include_non_pf_reads]
+ --strategy STRATEGY
+ [--edits EDITS]
+ [--min_umi_length MIN_UMI_LENGTH]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input BAM file.
+ --output_file_name OUTPUT_FILE_NAME
+ The output SAM or BAM file to be written.
+ --family_size_histogram FAMILY_SIZE_HISTOGRAM
+ Optional output of tag family size counts.
+ --raw_tag RAW_TAG The tag containing the raw UMI.
+ --assign_tag ASSIGN_TAG
+ The output tag for UMI grouping.
+ --min_map_q MIN_MAP_Q
+ Minimum mapping quality.
+ --include_non_pf_reads
+ --strategy STRATEGY The UMI assignment strategy.
+ (identity,edit,adjacency,paired)
+ --edits EDITS The allowable number of edits between UMIs.
+ --min_umi_length MIN_UMI_LENGTH
+ The minimum UMI length. If not specified then all UMIs
+ must have the same length, otherwise discard reads
+ with UMIs shorter than this length and allow for
+ differing UMI lengths.
+```
+
diff --git a/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md
new file mode 100644
index 00000000..b12b00a3
--- /dev/null
+++ b/docs/fgbio/fgbio_postprocessing_simplex_filter_0.1.8.md
@@ -0,0 +1,44 @@
+# simplex\_filter v0.1.8
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| fgbio\_postprocessing | 0.1.8 | [https://github.com/msk-access/fgbio\_postprocessing](https://github.com/msk-access/fgbio_postprocessing) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner fgbio_postprocessing_simplex_filter_0.1.8.cwl example_inputs.yaml
+```
+
+## Usage
+
+```bash
+usage: fgbio_postprocessing_simplex_filter_0.1.8/fgbio_postprocessing_simplex_filter_0.1.8.cwl
+ [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] --input_bam INPUT_BAM
+ [--output_file_name OUTPUT_FILE_NAME]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input_bam INPUT_BAM
+ Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+```
+
diff --git a/docs/gatk/README.md b/docs/gatk/README.md
new file mode 100644
index 00000000..ebad94ae
--- /dev/null
+++ b/docs/gatk/README.md
@@ -0,0 +1,2 @@
+# GATK
+
diff --git a/docs/gatk/gatk_apply_bqsr_4.1.8.1.md b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md
new file mode 100644
index 00000000..c919c1f4
--- /dev/null
+++ b/docs/gatk/gatk_apply_bqsr_4.1.8.1.md
@@ -0,0 +1,43 @@
+# ApplyBQSR v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_apply_bqsr_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_apply_bqsr_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_apply\_bqsr\_4.1.8.1.cwl --help
+
+usage: gatk\_apply\_bqsr\_4.1.8.1.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS
+
diff --git a/docs/gatk/gatk_applybqsr_4.1.2.0.md b/docs/gatk/gatk_applybqsr_4.1.2.0.md
new file mode 100644
index 00000000..709855a1
--- /dev/null
+++ b/docs/gatk/gatk_applybqsr_4.1.2.0.md
@@ -0,0 +1,43 @@
+# ApplyBQSR v4.1.2.0
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_ApplyBQSR_4.1.2.0.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_ApplyBQSR_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_ApplyBQSR\_4.1.2.0.cwl --help
+
+usage: gatk\_ApplyBQSR\_4.1.2.0.cwl \[-h\] --reference REFERENCE \[--create\_output\_bam\_index\] --bqsr\_recal\_file BQSR\_RECAL\_FILE --input INPUT \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--emit\_original\_quals\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantize\_quals QUANTIZE\_QUALS\] \[--quiet\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--read\_validation\_stringency READ\_VALIDATION\_STRINGENCY\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_jdk\_deflater\] \[--use\_jdk\_inflater\] \[--use\_original\_qualities\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --reference REFERENCE Reference sequence --create\_output\_bam\_index --bqsr\_recal\_file BQSR\_RECAL\_FILE Input recalibration table for BQSR. Only run ApplyBQSR with the covariates table created from the input BAM --input INPUT A BAM file containing input read data --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --cloud\_index\_prefetch\_buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --emit\_original\_quals --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --global\_qscore\_prior GLOBAL\_QSCORE\_PRIOR --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantize\_quals QUANTIZE\_QUALS --quiet --read\_filter READ\_FILTER --read\_index READ\_INDEX --read\_validation\_stringency READ\_VALIDATION\_STRINGENCY --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_jdk\_deflater --use\_jdk\_inflater --use\_original\_qualities --memory\_overhead MEMORY\_OVERHEAD --memory\_per\_job MEMORY\_PER\_JOB --number\_of\_threads NUMBER\_OF\_THREADS
+
diff --git a/docs/gatk/gatk_base_recalibrator_4.1.8.1.md b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md
new file mode 100644
index 00000000..9b90a39f
--- /dev/null
+++ b/docs/gatk/gatk_base_recalibrator_4.1.8.1.md
@@ -0,0 +1,43 @@
+# BaseRecalibrator v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_base_recalibrator_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_base_recalibrator_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_base\_recalibrator\_4.1.8.1.cwl --help
+
+usage: gatk\_base\_recalibrator\_4.1.8.1.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2
+
diff --git a/docs/gatk/gatk_baserecalibrator_4.1.2.0.md b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md
new file mode 100644
index 00000000..41f341b4
--- /dev/null
+++ b/docs/gatk/gatk_baserecalibrator_4.1.2.0.md
@@ -0,0 +1,43 @@
+# BaseRecalibrator v4.1.2.0
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.2.0 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.2.0)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_baserecalibrator_4.1.2.0.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_baserecalibrator_4.1.2.0.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk\_baserecalibrator\_4.1.2.0.cwl --help
+
+usage: gatk\_baserecalibrator\_4.1.2.0.cwl \[-h\] --input INPUT --known\_sites\_1 KNOWN\_SITES\_1 --reference REFERENCE \[--output\_file\_name OUTPUT\_FILE\_NAME\] \[--add\_output\_sam\_program\_record\] \[--add\_output\_vcf\_command\_line\] \[--arguments\_file ARGUMENTS\_FILE\] \[--binary\_tag\_name BINARY\_TAG\_NAME\] \[--bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY\] \[--cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER\] \[--cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER\] \[--create\_output\_bam\_index\] \[--create\_output\_bam\_md5\] \[--create\_output\_variant\_index\] \[--create\_output\_variant\_md5\] \[--default\_base\_qualities DEFAULT\_BASE\_QUALITIES\] \[--deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY\] \[--disable\_bam\_index\_caching\] \[--disable\_read\_filter DISABLE\_READ\_FILTER\] \[--disable\_sequence\_dictionary\_validation\] \[--exclude\_intervals EXCLUDE\_INTERVALS\] \[--gatk\_config\_file GATK\_CONFIG\_FILE\] \[--gcs\_max\_retries GCS\_MAX\_RETRIES\] \[--gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS\] \[--indels\_context\_size INDELS\_CONTEXT\_SIZE\] \[--insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY\] \[--interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING\] \[--interval\_merging\_rule INTERVAL\_MERGING\_RULE\] \[--interval\_padding INTERVAL\_PADDING\] \[--interval\_set\_rule INTERVAL\_SET\_RULE\] \[--intervals INTERVALS\] \[--lenient\] \[--low\_quality\_tail LOW\_QUALITY\_TAIL\] \[--maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE\] \[--mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE\] \[--mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY\] \[--preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN\] \[--quantizing\_levels QUANTIZING\_LEVELS\] \[--QUIET\] \[--read\_filter READ\_FILTER\] \[--read\_index READ\_INDEX\] \[--seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES\] \[--sequence\_dictionary SEQUENCE\_DICTIONARY\] \[--sites\_only\_vcf\_output\] \[--use\_original\_qualities\] \[--number\_of\_threads NUMBER\_OF\_THREADS\] \[--memory\_per\_job MEMORY\_PER\_JOB\] \[--memory\_overhead MEMORY\_OVERHEAD\] \[--known\_sites\_2 KNOWN\_SITES\_2\] \[job\_order\]
+
+positional arguments: job\_order Job input json file
+
+optional arguments: -h, --help show this help message and exit --input INPUT BAM/SAM file containing reads --known\_sites\_1 KNOWN\_SITES\_1 One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis --reference REFERENCE Reference sequence file --output\_file\_name OUTPUT\_FILE\_NAME Output file name. Not Required --add\_output\_sam\_program\_record --add\_output\_vcf\_command\_line --arguments\_file ARGUMENTS\_FILE --binary\_tag\_name BINARY\_TAG\_NAME --bqsr\_baq\_gap\_open\_penalty BQSR\_BAQ\_GAP\_OPEN\_PENALTY --cloud-index-prefetch-buffer CLOUD\_INDEX\_PREFETCH\_BUFFER --cloud\_prefetch\_buffer CLOUD\_PREFETCH\_BUFFER --create\_output\_bam\_index --create\_output\_bam\_md5 --create\_output\_variant\_index --create\_output\_variant\_md5 --default\_base\_qualities DEFAULT\_BASE\_QUALITIES --deletions\_default\_quality DELETIONS\_DEFAULT\_QUALITY --disable\_bam\_index\_caching --disable\_read\_filter DISABLE\_READ\_FILTER --disable\_sequence\_dictionary\_validation --exclude\_intervals EXCLUDE\_INTERVALS --gatk\_config\_file GATK\_CONFIG\_FILE --gcs\_max\_retries GCS\_MAX\_RETRIES --gcs\_project\_for\_requester\_pays GCS\_PROJECT\_FOR\_REQUESTER\_PAYS --indels\_context\_size INDELS\_CONTEXT\_SIZE --insertions\_default\_quality INSERTIONS\_DEFAULT\_QUALITY --interval\_exclusion\_padding INTERVAL\_EXCLUSION\_PADDING --interval\_merging\_rule INTERVAL\_MERGING\_RULE --interval\_padding INTERVAL\_PADDING --interval\_set\_rule INTERVAL\_SET\_RULE --intervals INTERVALS --lenient --low\_quality\_tail LOW\_QUALITY\_TAIL --maximum\_cycle\_value MAXIMUM\_CYCLE\_VALUE --mismatches\_context\_size MISMATCHES\_CONTEXT\_SIZE --mismatches\_default\_quality MISMATCHES\_DEFAULT\_QUALITY --preserve\_qscores\_less\_than PRESERVE\_QSCORES\_LESS\_THAN --quantizing\_levels QUANTIZING\_LEVELS --QUIET --read\_filter READ\_FILTER --read\_index READ\_INDEX --seconds\_between\_progress\_updates SECONDS\_BETWEEN\_PROGRESS\_UPDATES --sequence\_dictionary SEQUENCE\_DICTIONARY --sites\_only\_vcf\_output --use\_original\_qualities --number\_of\_threads NUMBER\_OF\_THREADS --memory\_per\_job MEMORY\_PER\_JOB --memory\_overhead MEMORY\_OVERHEAD --known\_sites\_2 KNOWN\_SITES\_2
+
diff --git a/docs/gatk/gatk_downsamplesam_4.1.8.1.md b/docs/gatk/gatk_downsamplesam_4.1.8.1.md
new file mode 100644
index 00000000..b0f093d9
--- /dev/null
+++ b/docs/gatk/gatk_downsamplesam_4.1.8.1.md
@@ -0,0 +1,113 @@
+# ApplyBQSR v4.1.8.1
+
+## Version of tools in [docker image ](https://hub.docker.com/r/broadinstitute/gatk)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| GATK | 4.1.8.1 | [https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1) |
+
+[](https://github.com/broadinstitute/gatk/releases/tag/4.1.8.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl example_inputs.yml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict gatk_downsamplesam_4.1.8.1.cwl inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir run_directory
+> toil-cwl-runner --singularity --logFile path/to/run_directory/cwltoil.log --jobStore path/to/jobStore --batchSystem lsf --workDir /path/to/run_directory --outdir /path/to/run_directory --writeLogs /path/to/run_directory --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 gatk_downsamplesam_4.1.8.1.cwl inputs.yaml > file.stdout 2> file.stderr &
+```
+
+## Usage
+
+\`\`\`bash
+
+> toil-cwl-runner gatk_downsamplesam_4.1.8.1.cwl --help
+
+usage: gatk_downsamplesam_4.1.8.1.cwl [-h] --input INPUT --reference REFERENCE
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--output_file_name_metrics OUTPUT_FILE_NAME_METRICS]
+ [--probability PROBABILITY]
+ [--random_seed RANDOM_SEED]
+ [--strategy STRATEGY]
+ [--arguments_file ARGUMENTS_FILE]
+ [--cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER]
+ [--cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER]
+ [--create_output_bam_index]
+ [--create_output_bam_md5]
+ [--disable_bam_index_caching]
+ [--disable_read_filter DISABLE_READ_FILTER]
+ [--disable_sequence_dictionary_validation]
+ [--exclude_intervals EXCLUDE_INTERVALS]
+ [--gatk_config_file GATK_CONFIG_FILE]
+ [--gcs_max_retries GCS_MAX_RETRIES]
+ [--gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS]
+ [--QUIET] [--read_filter READ_FILTER]
+ [--read_index READ_INDEX]
+ [--seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES]
+ [--lenient]
+ [--number_of_threads NUMBER_OF_THREADS]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--temporary_directory TEMPORARY_DIRECTORY]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT BAM/SAM file containing reads
+ --reference REFERENCE
+ Reference sequence file
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name. Not Required
+ --output_file_name_metrics OUTPUT_FILE_NAME_METRICS
+ Output file name for metrics file. Not Required
+ --probability PROBABILITY
+ The probability of keeping any individual read,
+ between 0 and 1.
+ --random_seed RANDOM_SEED
+ Random seed used for deterministic results. Setting to
+ null will cause multiple invocations to produce
+ different results.
+ --strategy STRATEGY The --STRATEGY argument is an enumerated type
+ (Strategy), which can have one of the following
+ values: HighAccuracy ConstantMemory Chained default
+ Strategy ConstantMemory
+ --arguments_file ARGUMENTS_FILE
+ --cloud-index-prefetch-buffer CLOUD_INDEX_PREFETCH_BUFFER
+ --cloud_prefetch_buffer CLOUD_PREFETCH_BUFFER
+ --create_output_bam_index
+ --create_output_bam_md5
+ --disable_bam_index_caching
+ --disable_read_filter DISABLE_READ_FILTER
+ Read filters to be disabled before analysis
+ --disable_sequence_dictionary_validation
+ --exclude_intervals EXCLUDE_INTERVALS
+ --gatk_config_file GATK_CONFIG_FILE
+ --gcs_max_retries GCS_MAX_RETRIES
+ --gcs_project_for_requester_pays GCS_PROJECT_FOR_REQUESTER_PAYS
+ --QUIET
+ --read_filter READ_FILTER
+ --read_index READ_INDEX
+ --seconds_between_progress_updates SECONDS_BETWEEN_PROGRESS_UPDATES
+ --lenient
+ --number_of_threads NUMBER_OF_THREADS
+ --memory_per_job MEMORY_PER_JOB
+ --memory_overhead MEMORY_OVERHEAD
+ --temporary_directory TEMPORARY_DIRECTORY
+ Default value: null.
+
+
diff --git a/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md
new file mode 100644
index 00000000..8a593757
--- /dev/null
+++ b/docs/gatk/gatk_merge_bam_alignment_4.1.8.0.md
@@ -0,0 +1,245 @@
+# MergeBamAlignment v4.1.8.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_merge_bam_alignment_4.1.8.0.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: gatk_merge_bam_alignment_4.1.8.0.cwl [-h]
+ [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --unmapped_bam UNMAPPED_BAM
+ --reference REFERENCE
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--add_mate_cigar]
+ [--add_pg_tag_to_reads]
+ [--aligned_bam ALIGNED_BAM]
+ [--aligned_reads_only]
+ [--aligner_proper_pair_flags]
+ [--attributes_to_remove ATTRIBUTES_TO_REMOVE]
+ [--attributes_to_retain ATTRIBUTES_TO_RETAIN]
+ [--attributes_to_reverse ATTRIBUTES_TO_REVERSE]
+ [--attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT]
+ [--clip_adapters]
+ [--clip_overlapping_reads]
+ [--expected_orientations EXPECTED_ORIENTATIONS]
+ [--hard_clip_overlapping_reads]
+ [--include_secondary_alignments]
+ [--is_bisulfite_sequence]
+ [--jump_size JUMP_SIZE]
+ [--matching_dictionary_tags MATCHING_DICTIONARY_TAGS]
+ [--max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS]
+ [--min_unclipped_bases MIN_UNCLIPPED_BASES]
+ [--paired_run]
+ [--primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY]
+ [--read1_aligned_bam READ1_ALIGNED_BAM]
+ [--read1_trim READ1_TRIM]
+ [--read2_aligned_bam READ2_ALIGNED_BAM]
+ [--read2_trim READ2_TRIM]
+ [--sort_order SORT_ORDER]
+ [--unmap_contaminant_reads]
+ [--unmapped_read_strategy UNMAPPED_READ_STRATEGY]
+ [--validation_stringency VALIDATION_STRINGENCY]
+ [--create_index]
+ [--create_md5_file]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --unmapped_bam UNMAPPED_BAM
+ Original SAM or BAM file of unmapped reads, which must
+ be in queryname order. Reads MUST be unmapped.
+ Required.
+ --reference REFERENCE
+ Reference sequence file. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Merged SAM or BAM file to write to. Required.
+ --add_mate_cigar Adds the mate CIGAR tag (MC) if true, does not if
+ false. Default value: true. Possible values: {true,
+ false}
+ --add_pg_tag_to_reads
+ Add PG tag to each read in a SAM or BAM Default value:
+ true. Possible values: {true, false}
+ --aligned_bam ALIGNED_BAM
+ SAM or BAM file(s) with alignment data. This argument
+ may be specified 0 or more times. Default value: null.
+ Cannot be used in conjunction with argument(s)
+ READ1_ALIGNED_BAM (R1_ALIGNED) READ2_ALIGNED_BAM
+ (R2_ALIGNED)
+ --aligned_reads_only Whether to output only aligned reads. Default value:
+ false. Possible values: {true, false}
+ --aligner_proper_pair_flags
+ Use the aligners idea of what a proper pair is rather
+ than computing in this program. Default value: false.
+ Possible values: {true, false}
+ --attributes_to_remove ATTRIBUTES_TO_REMOVE
+ Attributes from the alignment record that should be
+ removed when merging. This overrides
+ ATTRIBUTES_TO_RETAIN if they share common tags. This
+ argument may be specified 0 or more times. Default
+ value: null.
+ --attributes_to_retain ATTRIBUTES_TO_RETAIN
+ Reserved alignment attributes (tags starting with X,
+ Y, or Z) that should be brought over from the
+ alignment data when merging. This argument may be
+ specified 0 or more times. Default value: null.
+ --attributes_to_reverse ATTRIBUTES_TO_REVERSE
+ Attributes on negative strand reads that need to be
+ reversed. This argument may be specified 0 or more
+ times. Default value: [OQ, U2].
+ --attributes_to_reverse_complement ATTRIBUTES_TO_REVERSE_COMPLEMENT
+ Attributes on negative strand reads that need to be
+ reverse complemented. This argument may be specified 0
+ or more times. Default value: [E2, SQ].
+ --clip_adapters Whether to clip adapters where identified. Default
+ value: true. Possible values: {true, false}
+ --clip_overlapping_reads
+ For paired reads, clip the 3' end of each read if
+ necessary so that it does not extend past the 5' end
+ of its mate. Clipping will be either soft or hard
+ clipping, depending on CLIP_OVERLAPPING_READS_OPERATOR
+ setting. Hard clipped bases and their qualities will
+ be stored in the XB and XQ tags respectively. Default
+ value: true. Possible values: {true, false}
+ --expected_orientations EXPECTED_ORIENTATIONS
+ The expected orientation of proper read pairs.
+ Replaces JUMP_SIZE This argument may be specified 0 or
+ more times. Default value: null. Possible values: {FR,
+ RF, TANDEM} Cannot be used in conjunction with
+ argument(s) JUMP_SIZE (JUMP)
+ --hard_clip_overlapping_reads
+ If true, hard clipping will be applied to overlapping
+ reads. By default, soft clipping is used. Default
+ value: false. Possible values: {true, false}
+ --include_secondary_alignments
+ If false, do not write secondary alignments to output.
+ Default value: true. Possible values: {true, false}
+ --is_bisulfite_sequence
+ Whether the lane is bisulfite sequence (used when
+ calculating the NM tag). Default value: false.
+ Possible values: {true, false}
+ --jump_size JUMP_SIZE
+ The expected jump size (required if this is a jumping
+ library). Deprecated. Use EXPECTED_ORIENTATIONS
+ instead Default value: null. Cannot be used in
+ conjunction with argument(s) EXPECTED_ORIENTATIONS
+ (ORIENTATIONS)
+ --matching_dictionary_tags MATCHING_DICTIONARY_TAGS
+ List of Sequence Records tags that must be equal (if
+ present) in the reference dictionary and in the
+ aligned file. Mismatching tags will cause an error if
+ in this list, and a warning otherwise. This argument
+ may be specified 0 or more times. Default value: [M5,
+ LN].
+ --max_insertions_or_deletions MAX_INSERTIONS_OR_DELETIONS
+ The maximum number of insertions or deletions
+ permitted for an alignment to be included. Alignments
+ with more than this many insertions or deletions will
+ be ignored. Set to -1 to allow any number of
+ insertions or deletions. Default value: 1.
+ --min_unclipped_bases MIN_UNCLIPPED_BASES
+ If UNMAP_CONTAMINANT_READS is set, require this many
+ unclipped bases or else the read will be marked as
+ contaminant. Default value: 32.
+ --paired_run DEPRECATED. This argument is ignored and will be
+ removed. Default value: true. Possible values: {true,
+ false}
+ --primary_alignment_strategy PRIMARY_ALIGNMENT_STRATEGY
+ Strategy for selecting primary alignment when the
+ aligner has provided more than one alignment for a
+ pair or fragment, and none are marked as primary, more
+ than one is marked as primary, or the primary
+ alignment is filtered out for some reason. For all
+ strategies, ties are resolved arbitrarily. Default
+ value: BestMapq. BestMapq (Expects that multiple
+ alignments will be correlated with HI tag, and prefers
+ the pair of alignments with the largest MAPQ, in the
+ absence of a primary selected by the aligner.)
+ EarliestFragment (Prefers the alignment which maps the
+ earliest base in the read. Note that EarliestFragment
+ may not be used for paired reads.) BestEndMapq
+ (Appropriate for cases in which the aligner is not
+ pair-aware, and does not output the HI tag. It simply
+ picks the alignment for each end with the highest
+ MAPQ, and makes those alignments primary, regardless
+ of whether the two alignments make sense together.)
+ MostDistant (Appropriate for a non-pair-aware aligner.
+ Picks the alignment pair with the largest insert size.
+ If all alignments would be chimeric, it picks the
+ alignments for each end with the best MAPQ.)
+ --read1_aligned_bam READ1_ALIGNED_BAM
+ SAM or BAM file(s) with alignment data from the first
+ read of a pair. This argument may be specified 0 or
+ more times. Default value: null. Cannot be used in
+ conjunction with argument(s) ALIGNED_BAM (ALIGNED)
+ --read1_trim READ1_TRIM
+ The number of bases trimmed from the beginning of read
+ 1 prior to alignment Default value: 0.
+ --read2_aligned_bam READ2_ALIGNED_BAM
+ SAM or BAM file(s) with alignment data from the second
+ read of a pair. This argument may be specified 0 or
+ more times. Default value: null. Cannot be used in
+ conjunction with argument(s) ALIGNED_BAM (ALIGNED)
+ --read2_trim READ2_TRIM
+ The number of bases trimmed from the beginning of read
+ 2 prior to alignment Default value: 0.
+ --sort_order SORT_ORDER
+ The order in which the merged reads should be output.
+ Default value: coordinate. Possible values: {unsorted,
+ queryname, coordinate, duplicate, unknown}
+ --unmap_contaminant_reads
+ Detect reads originating from foreign organisms (e.g.
+ bacterial DNA in a non-bacterial sample),and unmap +
+ label those reads accordingly. Default value: false.
+ Possible values: {true, false}
+ --unmapped_read_strategy UNMAPPED_READ_STRATEGY
+ How to deal with alignment information in reads that
+ are being unmapped (e.g. due to cross-species
+ contamination.) Currently ignored unless
+ UNMAP_CONTAMINANT_READS = true. Note that the
+ DO_NOT_CHANGE strategy will actually reset the cigar
+ and set the mapping quality on unmapped reads since
+ otherwisethe result will be an invalid record. To
+ force no change use the DO_NOT_CHANGE_INVALID
+ strategy. Default value: DO_NOT_CHANGE. Possible
+ values: {COPY_TO_TAG, DO_NOT_CHANGE,
+ DO_NOT_CHANGE_INVALID, MOVE_TO_TAG}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --create_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value: false.
+ Possible values: {true, false}
+ --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ
+ files created. Default value: false. Possible values:
+ {true, false}
+```
+
diff --git a/docs/gatk/gatk_merge_sam_files_4.1.8.0.md b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md
new file mode 100644
index 00000000..0602f9be
--- /dev/null
+++ b/docs/gatk/gatk_merge_sam_files_4.1.8.0.md
@@ -0,0 +1,98 @@
+# MergeSamFiles v4.1.8.0
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.0 | broadinstitute/gatk:4.1.8.0 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gatk_merge_sam_files_4.1.8.0.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: gatk_merge_sam_files_4.1.8.0.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS]
+ --input INPUT
+ [--output_file_name OUTPUT_FILE_NAME]
+ [--assume_sorted] [--comment COMMENT]
+ [--create_index] [--create_md5_file]
+ [--intervals INTERVALS]
+ [--merge_sequence_dictionaries]
+ [--reference_sequence REFERENCE_SEQUENCE]
+ [--sort_order SORT_ORDER]
+ [--use_threading]
+ [--validation_stringency VALIDATION_STRINGENCY]
+ [--verbosity VERBOSITY]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT SAM or BAM input file This argument must be specified
+ at least once. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ SAM or BAM file to write merged result to Required.
+ --assume_sorted If true, assume that the input files are in the same
+ sort order as the requested output sort order, even if
+ their headers say otherwise. Default value: false.
+ Possible values: {true, false}
+ --comment COMMENT Comment(s) to include in the merged output files
+ header. This argument may be specified 0 or more
+ times. Default value: null.
+ --create_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value: false.
+ Possible values: {true, false}
+ --create_md5_file Whether to create an MD5 digest for any BAM or FASTQ
+ files created. Default value: false. Possible values:
+ {true, false}
+ --intervals INTERVALS
+ An interval list file that contains the locations of
+ the positions to merge. Assume bam are sorted and
+ indexed. The resulting file will contain alignments
+ that may overlap with genomic regions outside the
+ requested region. Unmapped reads are discarded.
+ Default value: null.
+ --merge_sequence_dictionaries
+ Merge the sequence dictionaries Default value: false.
+ Possible values: {true, false}
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Default value: null.
+ --sort_order SORT_ORDER
+ Sort order of output file Default value: coordinate.
+ Possible values: {unsorted, queryname, coordinate,
+ duplicate, unknown}
+ --use_threading Option to create a background thread to encode,
+ compress and write to disk the output file. The
+ threaded version uses about 20% more CPU and decreases
+ runtime by ~20% when writing out a compressed BAM
+ file. Default value: false. Possible values: {true,
+ false}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ Possible values: {STRICT, LENIENT, SILENT}
+ --verbosity VERBOSITY
+ Control verbosity of logging. Default value: INFO.
+ Possible values: {ERROR, WARNING, INFO, DEBUG}
+```
+
diff --git a/docs/gatk/samtofastq-v4.1.8.0.md b/docs/gatk/samtofastq-v4.1.8.0.md
new file mode 100644
index 00000000..b2b44b12
--- /dev/null
+++ b/docs/gatk/samtofastq-v4.1.8.0.md
@@ -0,0 +1,2 @@
+# SamToFastq v4.1.8.0
+
diff --git a/docs/manta/README.md b/docs/manta/README.md
new file mode 100644
index 00000000..1849796d
--- /dev/null
+++ b/docs/manta/README.md
@@ -0,0 +1,2 @@
+# Manta
+
diff --git a/docs/manta/manta_1.5.1.md b/docs/manta/manta_1.5.1.md
new file mode 100644
index 00000000..9b285335
--- /dev/null
+++ b/docs/manta/manta_1.5.1.md
@@ -0,0 +1,70 @@
+# Manta v1.5.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| ubuntu base image | 16.04 | - |
+| manta | 1.5.1 | [https://github.com/Illumina/manta/releases/download/](https://github.com/Illumina/manta/releases/download/) |
+| samtools | 1.9 | [https://github.com/samtools/samtools/releases/download/](https://github.com/samtools/samtools/releases/download/) |
+| htslib | 1.9 | "[https://github.com/samtools/htslib/releases/download/](https://github.com/samtools/htslib/releases/download/) |
+
+[](https://microbadger.com/images/mskaccess/manta:0.0.2)[](https://microbadger.com/images/mskaccess/manta:0.0.2) [](https://microbadger.com/images/mskaccess/manta:0.0.2) [](https://microbadger.com/images/mskaccess/manta:0.0.2)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner manta_1.5.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/manta_1.5.1/manta_1.51.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir toil_log
+> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/manta_1.5.1/manta.1.5.1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner manta_1.5.1.cwl --help
+usage: manta_1.5.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --call_regions CALL_REGIONS
+ bgzip-compressed, tabix-indexed BED file specifiying
+ regions to which variant analysis will be restricted
+ --non_wgs toggles on settings for WES
+ --normal_bam NORMAL_BAM
+ Normal sample BAM or CRAM file. May be specified more
+ than once, multiple inputs will be treated as each BAM
+ file representing a different sample. [optional] (no
+ default)
+ --output_contigs if true, outputs assembled contig sequences in final
+ VCF files, in the INFO field CONTIG
+ --reference_fasta REFERENCE_FASTA
+ samtools-indexed reference fasta file [required]
+ --tumor_bam TUMOR_BAM
+ Tumor sample BAM or CRAM file. Only up to one tumor
+ bam file accepted.
+ --generateEvidenceBam
+ Generate a bam of supporting reads for all SVs
+```
+
diff --git a/docs/marianas/README.md b/docs/marianas/README.md
new file mode 100644
index 00000000..67939d60
--- /dev/null
+++ b/docs/marianas/README.md
@@ -0,0 +1,2 @@
+# Marianas
+
diff --git a/docs/marianas/marianas_collapsing_first_pass_1.8.1.md b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md
new file mode 100644
index 00000000..f0b24afe
--- /dev/null
+++ b/docs/marianas/marianas_collapsing_first_pass_1.8.1.md
@@ -0,0 +1,19 @@
+# Collapsing First Pass v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml
+```
+
diff --git a/docs/marianas/marianas_collapsing_second_pass_1.8.1.md b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md
new file mode 100644
index 00000000..7117bec5
--- /dev/null
+++ b/docs/marianas/marianas_collapsing_second_pass_1.8.1.md
@@ -0,0 +1,19 @@
+# Collapsing Second Pass v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl test_inputs_second_pass.yaml
+```
+
diff --git a/docs/marianas/marianas_process_loop_umi_1.8.1.md b/docs/marianas/marianas_process_loop_umi_1.8.1.md
new file mode 100644
index 00000000..7c1efc78
--- /dev/null
+++ b/docs/marianas/marianas_process_loop_umi_1.8.1.md
@@ -0,0 +1,19 @@
+# Process Loop UMI v1.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_first_pass.cwl example_inputs.yaml
+```
+
diff --git a/docs/marianas/marianas_separate_bams_1.8.1.md b/docs/marianas/marianas_separate_bams_1.8.1.md
new file mode 100644
index 00000000..232c89da
--- /dev/null
+++ b/docs/marianas/marianas_separate_bams_1.8.1.md
@@ -0,0 +1,33 @@
+# Seprate BAMs v1.8.1
+
+## Version of tools in docker image \(../marianas\_process\_loop\_umi\_1.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| Marianas | 1.8.1 | [https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar](https://github.com/juberpatel/Marianas/releases/download/v1.8.1/Marianas-1.8.1.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner marianas_seprate_bams_1.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: marianas_separate_bams_1.8.1/marianas_separate_bams_1.8.1.cwl
+ [-h] --input_bam INPUT_BAM [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input_bam INPUT_BAM
+```
+
diff --git a/docs/merge-fastq/README.md b/docs/merge-fastq/README.md
new file mode 100644
index 00000000..19544a3d
--- /dev/null
+++ b/docs/merge-fastq/README.md
@@ -0,0 +1,2 @@
+# Merge Fastq
+
diff --git a/docs/merge-fastq/merge_fastq_0.1.7.md b/docs/merge-fastq/merge_fastq_0.1.7.md
new file mode 100644
index 00000000..ec1c9213
--- /dev/null
+++ b/docs/merge-fastq/merge_fastq_0.1.7.md
@@ -0,0 +1,67 @@
+# v0.1.7
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| miniconda | 3 | [https://hub.docker.com/r/continuumio/miniconda3](https://hub.docker.com/r/continuumio/miniconda3) |
+| merge\_fastq | 0.1.7 | [https://pypi.org/project/merge-fastq/](https://pypi.org/project/merge-fastq/) |
+
+[](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1) [](https://microbadger.com/images/mskaccess/merge_fastq:0.6.1)
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner merge_fastq_0.1.7.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/merge_fastq_0.1.7/merge_fastq_0.1.7.cwl /path/to/inputs.yaml > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner merge_fastq_0.1.7.cwl --help
+usage: merge_fastq_0.1.7.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --fastq1 FASTQ1 Full path to gziped READ1 fastq files, can be
+ specified multiple times for example: --fastq1
+ test_part1_R1.fastq.gz --fastq1 test_part2_R1.fastq.gz
+ [required]
+ --fastq2 FASTQ2 Full path to gziped READ2 fastq files, can be
+ specified multiple times for example: --fastq1
+ test_part1_R2.fastq.gz --fastq1 test_part2_R2.fastq.gz
+ [required]
+ --output_path OUTPUT_PATH
+ Full path to write the output files (default: Current
+ working directory)
+ --out_fastq1_name OUT_FASTQ1_NAME
+ Name of the merged output READ1 fastq file(default:
+ merged_fastq_R1.fastq.gz)
+ --out_fastq2_name OUT_FASTQ2_NAME
+ Name of the merged output READ2 fastq file(default:
+ merged_fastq_R2.fastq.gz)
+```
+
diff --git a/docs/mosdepth/README.md b/docs/mosdepth/README.md
new file mode 100644
index 00000000..d576ad29
--- /dev/null
+++ b/docs/mosdepth/README.md
@@ -0,0 +1,2 @@
+# Mosdepth
+
diff --git a/docs/mosdepth/mosdepth_0.3.3.md b/docs/mosdepth/mosdepth_0.3.3.md
new file mode 100644
index 00000000..44080bde
--- /dev/null
+++ b/docs/mosdepth/mosdepth_0.3.3.md
@@ -0,0 +1,68 @@
+Mosdepth: fast BAM/CRAM depth calculation for **WGS**, **exome**, or **targeted sequencing**.
+
+`mosdepth` can output:
++ per-base depth about 2x as fast `samtools depth`--about 25 minutes of CPU time for a 30X genome.
++ mean per-window depth given a window size--as would be used for CNV calling.
++ the mean per-region given a BED file of regions.
+* the mean or median per-region cumulative coverage histogram given a window size
++ a distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide.
++ quantized output that merges adjacent bases as long as they fall in the same coverage bins e.g. (10-20)
++ threshold output to indicate how many bases in each region are covered at the given thresholds.
++ A summary of mean depths per chromosome and within specified regions per chromosome.
+
+# CWL for running Mosdepth - Coverage tool
+## Version of tools in docker image
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| mosdepth | 0.3.3 | [https://hub.docker.com/r/brentp/mosdepth/tags](https://hub.docker.com/r/brentp/mosdepth/tags) [https://github.com/brentp/mosdepth/releases/tag/v0.3.3](https://github.com/brentp/mosdepth/releases/tag/v0.3.3) |
+
+[](https://github.com/brentp/mosdepth/releases/tag/v0.3.3)|
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner mosdepth_0.3.3.cwl example_inputs.yaml
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> nohup toil-cwl-runner --singularity --outdir /path/to/output/folder /path/to/mosdepth_0.3.3.cwl /path/to/inputs.yaml &
+```
+
+### Usage
+
+```bash
+usage: mosdepth_0.3.3.cwl [-h] [--memory_per_job MEMORY_PER_JOB]
+ [--memory_overhead MEMORY_OVERHEAD]
+ [--number_of_threads NUMBER_OF_THREADS] [--bed BED]
+ [--chrom CHROM] [--prefix PREFIX] [--flag FLAG]
+ [--mapq MAPQ]
+ [job_order]
+
+fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --bed BED optional BED file or (integer) window-sizes.
+ --chrom CHROM chromosome to restrict depth calculation.
+ --prefix PREFIX Prefix for the output files
+ --flag FLAG exclude reads with any of the bits in FLAG set
+ --mapq MAPQ mapping quality threshold. reads with a mapping
+ quality less than this are ignored
+```
diff --git a/docs/multiqc/README.md b/docs/multiqc/README.md
new file mode 100644
index 00000000..a3dcea74
--- /dev/null
+++ b/docs/multiqc/README.md
@@ -0,0 +1,2 @@
+# MultiQC
+
diff --git a/docs/multiqc/multiqc_1.10.1.7.md b/docs/multiqc/multiqc_1.10.1.7.md
new file mode 100644
index 00000000..02fffc9c
--- /dev/null
+++ b/docs/multiqc/multiqc_1.10.1.7.md
@@ -0,0 +1,56 @@
+# CWL and Dockerfile for running merge_fastq
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| multiqc | 1.10.1.7 | |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.json to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner multiqc.cwl example_inputs.json
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/multiqc_1.10.1.7/multiqc.cwl /path/to/example_inputs.json
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.10.1/multiqc_1.10.1.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner multiqc.cwl --helpusage: multiqc_1.10.1.cwl [-h]
+ [--qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY]
+ [--qc_files_dir QC_FILES_DIR]
+ [--qc_list_of_dirs QC_LIST_OF_DIRS]
+ [--report_name REPORT_NAME] [--config CONFIG]
+ [job_order]
+
+Run multiqc on log files from supported bioinformatic tools.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --qc_files_array_of_array QC_FILES_ARRAY_OF_ARRAY
+ qc files which shall be part of the multiqc summary;
+ optional, only one of qc_files_array or
+ qc_files_array_of_array must be provided
+ --qc_files_dir QC_FILES_DIR
+ qc files in a Directory
+ --qc_list_of_dirs QC_LIST_OF_DIRS
+ qc files in multiple directories
+ --report_name REPORT_NAME
+ name used for the html report
+ --config CONFIG
+```
diff --git a/docs/multiqc/multiqc_1.12.md b/docs/multiqc/multiqc_1.12.md
new file mode 100644
index 00000000..411afef8
--- /dev/null
+++ b/docs/multiqc/multiqc_1.12.md
@@ -0,0 +1,48 @@
+# CWL and Dockerfile for running merge_fastq
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| multiqc | 1.12 | |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.json to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner multiqc.cwl example_inputs.json
+```
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/multiqc_1.10.1/multiqc.cwl /path/to/example_inputs.json
+
+#Using toil-cwl-runner
+> mkdir tool_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/tool_toil_log/cwltoil.log --jobStore /path/to/tool_jobStore --batchSystem lsf --workDir /path/to/tool_toil_log --outdir . --writeLogs /path/to/tool_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/multiqc_1.12/multiqc_1.12.cwl /path/to/example_inputs.json > tool_toil.stdout 2> tool_toil.stderr &
+```
+
+### Usage
+
+```bash
+usage: multiqc_1.12.cwl [-h] [--qc_files_dir QC_FILES_DIR]
+ [--report_name REPORT_NAME] [--config CONFIG]
+ [job_order]
+
+Run multiqc on log files from supported bioinformatic tools.
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --qc_files_dir QC_FILES_DIR
+ qc files in a Directory
+ --report_name REPORT_NAME
+ name used for the html report and the corresponding
+ zip file
+ --config CONFIG
+```
\ No newline at end of file
diff --git a/docs/mutect/README.md b/docs/mutect/README.md
new file mode 100644
index 00000000..639d595b
--- /dev/null
+++ b/docs/mutect/README.md
@@ -0,0 +1,2 @@
+# MuTect
+
diff --git a/docs/mutect/mutect_1.1.5.md b/docs/mutect/mutect_1.1.5.md
new file mode 100644
index 00000000..f5a70ddb
--- /dev/null
+++ b/docs/mutect/mutect_1.1.5.md
@@ -0,0 +1,273 @@
+# MuTect 1.1.5
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| openjdk:7 base image | 7 | - |
+| muTect | 1.1.5 | [https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip](https://github.com/broadinstitute/mutect/releases/download/1.1.5/muTect-1.1.5-bin.zip) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner mutect_1.1.5.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir mutect_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/mutect_toil_log/cwltoil.log --jobStore /path/to/mutect_jobStore --batchSystem lsf --workDir /path/to/mutect_toil_log --outdir . --writeLogs /path/to/mutect_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/mutect_1.1.5.cwl /path/to/inputs.yaml > mutect_toil.stdout 2> mutect_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: toil-cwl-runner mutect_1.1.5.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --BQSR BQSR The input covariates table file which enables on-the-
+ fly base quality score recalibration
+ --absolute_copy_number_data ABSOLUTE_COPY_NUMBER_DATA
+ Absolute Copy Number Data, as defined by Absolute, to
+ use in power calculations
+ --arg_file ARG_FILE Reads arguments from the specified file
+ --bam_tumor_sample_name BAM_TUMOR_SAMPLE_NAME
+ if the tumor bam contains multiple samples, only use
+ read groups with SM equal to this value
+ --baq BAQ Type of BAQ calculation to apply in the engine
+ (OFF|CALCULATE_AS_NECESSARY| RECALCULATE)
+ --baqGapOpenPenalty BAQGAPOPENPENALTY
+ BAQ gap open penalty (Phred Scaled). Default value is
+ 40. 30 is perhaps better for whole genome call sets
+ --clipping_bias_pvalue_threshold CLIPPING_BIAS_PVALUE_THRESHOLD
+ pvalue threshold for fishers exact test of clipping
+ bias in mutant reads vs ref reads
+ --cosmic COSMIC VCF file of COSMIC sites
+ --coverage_20_q20_file COVERAGE_20_Q20_FILE
+ write out 20x of Q20 coverage in WIGGLE format to this
+ file
+ --coverage_file COVERAGE_FILE
+ write out coverage in WIGGLE format to this file
+ --dbsnp DBSNP VCF file of DBSNP information
+ --dbsnp_normal_lod DBSNP_NORMAL_LOD
+ LOD threshold for calling normal non-variant at dbsnp
+ sites
+ --defaultBaseQualities DEFAULTBASEQUALITIES
+ If reads are missing some or all base quality scores,
+ this value will be used for all base quality scores
+ --disableRandomization
+ Completely eliminates randomization from
+ nondeterministic methods. To be used mostly in the
+ testing framework where dynamic parallelism can result
+ in differing numbers of calls to the generator.
+ --disable_indel_quals
+ If true, disables printing of base insertion and base
+ deletion tags (with -BQSR)
+ --downsample_to_coverage DOWNSAMPLE_TO_COVERAGE
+ Target coverage threshold for downsampling to coverage
+ --downsampling_type DOWNSAMPLING_TYPE
+ Type of reads downsampling to employ at a given locus.
+ Reads will be selected randomly to be removed from the
+ pile based on the method described here
+ (NONE|ALL_READS| BY_SAMPLE) given locus; note that
+ downsampled reads are randomly selected from all
+ possible reads at a locus
+ --emit_original_quals
+ If true, enables printing of the OQ tag with the
+ original base qualities (with -BQSR)
+ --enable_extended_output
+ --excludeIntervals EXCLUDEINTERVALS
+ One or more genomic intervals to exclude from
+ processing. Can be explicitly specified on the command
+ line or in a file (including a rod file)
+ --filter_mismatching_base_and_quals
+ if a read has mismatching number of bases and base
+ qualities, filter out the read instead of blowing up.
+ --force_alleles force output for all alleles at each site
+ --force_output force output for each site
+ --fraction_contamination FRACTION_CONTAMINATION
+ estimate of fraction (0-1) of physical contamination
+ with other unrelated samples
+ --fraction_mapq0_threshold FRACTION_MAPQ0_THRESHOLD
+ threshold for determining if there is relatedness
+ between the alt and ref allele read piles
+ --gap_events_threshold GAP_EVENTS_THRESHOLD
+ how many gapped events (ins/del) are allowed in
+ proximity to this candidate
+ --gatk_key GATK_KEY GATK Key file. Required if running with -et NO_ET.
+ Please see -phone-home-and-how-does-it-affect-
+ me#latest for details.
+ --heavily_clipped_read_fraction HEAVILY_CLIPPED_READ_FRACTION
+ if this fraction or more of the bases in a read are
+ soft/hard clipped, do not use this read for mutation
+ calling
+ --initial_tumor_lod INITIAL_TUMOR_LOD
+ Initial LOD threshold for calling tumor variant
+ --input_file_normal INPUT_FILE_NORMAL
+ SAM or BAM file(s)
+ --input_file_tumor INPUT_FILE_TUMOR
+ SAM or BAM file(s)
+ --interval_merging INTERVAL_MERGING
+ Indicates the interval merging rule we should use for
+ abutting intervals (ALL| OVERLAPPING_ONLY)
+ --interval_padding INTERVAL_PADDING
+ Indicates how many basepairs of padding to include
+ around each of the intervals specified with the -L/
+ --interval_set_rule INTERVAL_SET_RULE
+ Indicates the set merging approach the interval parser
+ should use to combine the various -L or -XL inputs
+ (UNION| INTERSECTION)
+ --java_7 JAVA_7
+ --keep_program_records
+ Should we override the Walkers default and keep
+ program records from the SAM header
+ --log_to_file LOG_TO_FILE
+ Set the logging location
+ --logging_level LOGGING_LEVEL
+ Set the minimum level of logging, i.e. setting INFO
+ gets you INFO up to FATAL, setting ERROR gets you
+ ERROR and FATAL level logging.
+ --maxRuntime MAXRUNTIME
+ If provided, that GATK will stop execution cleanly as
+ soon after maxRuntime has been exceeded, truncating
+ the run but not exiting with a failure. By default the
+ value is interpreted in minutes, but this can be
+ changed by maxRuntimeUnits
+ --maxRuntimeUnits MAXRUNTIMEUNITS
+ The TimeUnit for maxRuntime (NANOSECONDS|
+ MICROSECONDS|MILLISECONDS|SECONDS|MINUTES| HOURS|DAYS)
+ --max_alt_allele_in_normal_fraction MAX_ALT_ALLELE_IN_NORMAL_FRACTION
+ threshold for maximum alternate allele fraction in
+ normal
+ --max_alt_alleles_in_normal_count MAX_ALT_ALLELES_IN_NORMAL_COUNT
+ threshold for maximum alternate allele counts in
+ normal
+ --max_alt_alleles_in_normal_qscore_sum MAX_ALT_ALLELES_IN_NORMAL_QSCORE_SUM
+ threshold for maximum alternate allele quality score
+ sum in normal
+ --min_qscore MIN_QSCORE
+ threshold for minimum base quality score
+ --minimum_mutation_cell_fraction MINIMUM_MUTATION_CELL_FRACTION
+ minimum fraction of cells which are presumed to have a
+ mutation, used to handle non-clonality and
+ contamination
+ --minimum_normal_allele_fraction MINIMUM_NORMAL_ALLELE_FRACTION
+ minimum allele fraction to be considered in normal,
+ useful for normal sample contaminated with tumor
+ --monitorThreadEfficiency
+ Enable GATK threading efficiency monitoring
+ --mutect MUTECT
+ --nonDeterministicRandomSeed
+ Makes the GATK behave non deterministically, that is,
+ the random numbers generated will be different in
+ every run
+ --noop used for debugging, basically exit as soon as we get
+ the reads
+ --normal_depth_file NORMAL_DEPTH_FILE
+ write out normal read depth in WIGGLE format to this
+ file
+ --normal_lod NORMAL_LOD
+ LOD threshold for calling normal non-germline
+ --normal_sample_name NORMAL_SAMPLE_NAME
+ name to use for normal in output files
+ --num_bam_file_handles NUM_BAM_FILE_HANDLES
+ The total number of BAM file handles to keep open
+ simultaneously
+ --num_cpu_threads_per_data_thread NUM_CPU_THREADS_PER_DATA_THREAD
+ How many CPU threads should be allocated per data
+ thread to running this analysis?
+ --num_threads NUM_THREADS
+ How many data threads should be allocated to running
+ this analysis.
+ --only_passing_calls only emit passing calls
+ --pedigree PEDIGREE Pedigree files for samples
+ --pedigreeString PEDIGREESTRING
+ Pedigree string for samples
+ --pedigreeValidationType PEDIGREEVALIDATIONTYPE
+ How strict should we be in validating the pedigree
+ information? (STRICT|SILENT)
+ --performanceLog PERFORMANCELOG
+ If provided, a GATK runtime performance log will be
+ written to this file
+ --phone_home PHONE_HOME
+ What kind of GATK run report should we generate?
+ STANDARD is the default, can be NO_ET so nothing is
+ posted to the run repository. Please see -phone-home-
+ and-how-does-it-affect-me#latest for details.
+ (NO_ET|STANDARD|STDOUT)
+ --pir_mad_threshold PIR_MAD_THRESHOLD
+ threshold for clustered read position artifact MAD
+ --pir_median_threshold PIR_MEDIAN_THRESHOLD
+ threshold for clustered read position artifact median
+ --power_constant_af POWER_CONSTANT_AF
+ Allelic fraction constant to use in power calculations
+ --power_constant_qscore POWER_CONSTANT_QSCORE
+ Phred scale quality score constant to use in power
+ calculations
+ --power_file POWER_FILE
+ write out power in WIGGLE format to this file
+ --preserve_qscores_less_than PRESERVE_QSCORES_LESS_THAN
+ Bases with quality scores less than this threshold
+ wont be recalibrated (with -BQSR)
+ --read_buffer_size READ_BUFFER_SIZE
+ Number of reads per SAM file to buffer in memory
+ --read_filter READ_FILTER
+ Specify filtration criteria to apply to each read
+ individually
+ --read_group_black_list READ_GROUP_BLACK_LIST
+ Filters out read groups matching - or a
+ .txt file containing the filter strings one per line.
+ --reference_sequence REFERENCE_SEQUENCE
+ --remove_program_records
+ Should we override the Walkers default and remove
+ program records from the SAM header
+ --required_maximum_alt_allele_mapping_quality_score
+ required minimum value for
+
+ tumor alt allele maximum mapping quality score
+ --somatic_classification_normal_power_threshold
+ Power threshold for normal to
+
+ determine germline vs variant
+ --tag TAG Arbitrary tag string to identify this GATK run as part
+ of a group of runs, for later analysis
+ --tumor_depth_file TUMOR_DEPTH_FILE
+ write out tumor read depth in WIGGLE format to this
+ file
+ --tumor_f_pretest TUMOR_F_PRETEST
+ for computational efficiency, reject sites with
+ allelic fraction below this threshold
+ --tumor_lod TUMOR_LOD
+ LOD threshold for calling tumor variant
+ --tumor_sample_name TUMOR_SAMPLE_NAME
+ name to use for tumor in output files
+ --unsafe UNSAFE If set, enables unsafe operations - nothing will be
+ checked at runtime. For expert users only who know
+ what they are doing. We do not support usage of this
+ argument. (ALLOW_UNINDEXED_BAM|
+ ALLOW_UNSET_BAM_SORT_ORDER|
+ NO_READ_ORDER_VERIFICATION|
+ ALLOW_SEQ_DICT_INCOMPATIBILITY|
+ LENIENT_VCF_PROCESSING|ALL)
+ --useOriginalQualities
+ If set, use the original base quality scores from the
+ OQ tag when present instead of the standard scores
+ --validation_strictness VALIDATION_STRICTNESS
+ How strict should we be with validation
+ (STRICT|LENIENT|SILENT)
+ --vcf VCF VCF output of mutation candidates
+```
+
diff --git a/docs/octopus/README.md b/docs/octopus/README.md
new file mode 100644
index 00000000..c19f208c
--- /dev/null
+++ b/docs/octopus/README.md
@@ -0,0 +1,2 @@
+# Octopus
+
diff --git a/docs/octopus/octopus_0.7.4.md b/docs/octopus/octopus_0.7.4.md
new file mode 100644
index 00000000..2967f783
--- /dev/null
+++ b/docs/octopus/octopus_0.7.4.md
@@ -0,0 +1,74 @@
+## CWL and Docker for Running Octopus
+
+## Version of tools in [docker image](https://hub.docker.com/r/dancooke/octopus/tags)
+
+| Tool | Version | Location |
+| ------- | ------- | ---------------------------------------------------------- |
+| Octopus | v0.7.4 | https://github.com/luntergroup/octopus/releases/tag/v0.7.4 |
+
+### CWL
+
+CWL specification 1.0
+Use example_input.yaml to see the inputs to the cwl
+Example Command using [toil](https://toil.readthedocs.io/):
+`toil-cwl-runner octopus_0-7-4.cwl example_input.yaml`
+
+If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing type==X86_64 && you can use the following command
+
+### Using CWLTOOL
+
+```
+cwltool --singularity --non-strict /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml
+```
+
+### Using toil-cwl-runner
+
+```shell
+mkdir octopus_toil_log
+toil-cwl-runner --singularity --logFile /path/to/octopus_toil_log/cwltoil.log --jobStore /path/to/octopus_jobStore --batchSystem lsf --workDir /path/to/octopus_toil_log --outdir . --writeLogs /path/to/octopus_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/octopus_0-7-4.cwl /path/to/inputs.yaml > octopus_toil.stdout 2> octopus_toil.stderr &
+```
+
+### Usage
+
+```shell
+usage: octopus_0-7-4.cwl [-h] --input INPUT [--normalId NORMALID]
+ [--tumorOnlySample] [--somaticOnlyCalls]
+ [--targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY]
+ [--skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY]
+ [--targettedCalling_file TARGETTEDCALLING_FILE]
+ [--skipRegions_file SKIPREGIONS_FILE]
+ [--error_models ERROR_MODELS] --reference REFERENCE
+ --output_file_name OUTPUT_FILE_NAME
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --input INPUT Tumor and normal bam files with .bai
+ --normalId NORMALID add the name of the normal sample
+ --tumorOnlySample mention this parameter if it is tumor only sample.
+ --somaticOnlyCalls if somatics only call is required. Use this with -f ON
+ parameter
+ --targettedCalling_singleEntry TARGETTEDCALLING_SINGLEENTRY
+ list of regions to call variants from. eg 1. chr1: all
+ of chr1. 2. chr2:10,000,000: the single position
+ 10000000 in chr2. chr3:5,000,000-: everything from 3.
+ chr3:5,000,000 onwards. 4.
+ chr4:100,000,000-200,000,000: everything between
+ chr4:100,000,000 and chr4:200,000,000. The interval is
+ half open so position chr4:200,000,000 is not
+ included.
+ --skipRegions_singleEntry SKIPREGIONS_SINGLEENTRY
+ to skip a set of regions
+ --targettedCalling_file TARGETTEDCALLING_FILE
+ regions in a text or bed file
+ --skipRegions_file SKIPREGIONS_FILE
+ regions in text or bed file format
+ --error_models ERROR_MODELS
+ error model will be in the format - [library
+ preparation]<.sequencer> eg: PCR.NOVASEQ
+ --reference REFERENCE
+ --output_file_name OUTPUT_FILE_NAME
+```
diff --git a/docs/picard-tools/README.md b/docs/picard-tools/README.md
new file mode 100644
index 00000000..094001ac
--- /dev/null
+++ b/docs/picard-tools/README.md
@@ -0,0 +1,2 @@
+# Picard Tools
+
diff --git a/picard_add_or_replace_read_groups_1.96/README.md b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
similarity index 78%
rename from picard_add_or_replace_read_groups_1.96/README.md
rename to docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
index b07355a4..e5a70249 100644
--- a/picard_add_or_replace_read_groups_1.96/README.md
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_1.96.md
@@ -1,26 +1,26 @@
-# CWL and Dockerfile for running Picard - AddOrReplaceReadGroups
+# AddOrReplaceReadGroups v1.96
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip |
-| R | 3.3.3 | r-base for opnejdk:8 |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
-[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_add_or_replace_read_groups_1.96.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md
new file mode 100644
index 00000000..70eb1246
--- /dev/null
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_2.21.2.md
@@ -0,0 +1,90 @@
+# AddOrReplaceReadGroups v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardAddOrReplaceReadGroup_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_2.21.2/picard_add_or_replace_read_groups_2.21.2.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner picard_add_or_replace_read_groups_2.21.2.cwl --help
+usage: picard_add_or_replace_read_groups_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --read_group_identifier READ_GROUP_IDENTIFIER
+ Read Group ID Default value: 1. This option can be set
+ to 'null' to clear the default value Required
+ --read_group_sequnecing_center READ_GROUP_SEQUNECING_CENTER
+ Read Group sequencing center name Default value: null.
+ Required
+ --read_group_library READ_GROUP_LIBRARY
+ Read Group Library. Required
+ --read_group_platform_unit READ_GROUP_PLATFORM_UNIT
+ Read Group platform unit (eg. run barcode) Required.
+ --read_group_sample_name READ_GROUP_SAMPLE_NAME
+ Read Group sample name. Required
+ --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM
+ Read Group platform (e.g. illumina, solid) Required.
+ --read_group_description READ_GROUP_DESCRIPTION
+ Read Group description Default value: null.
+ --read_group_run_date READ_GROUP_RUN_DATE
+ Read Group run date Default value: null.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md
new file mode 100644
index 00000000..707df7f2
--- /dev/null
+++ b/docs/picard-tools/picard_add_or_replace_read_groups_4.1.8.1.md
@@ -0,0 +1,95 @@
+# AddOrReplaceReadGroups v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardAddOrReplaceReadGroup_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardAddOrReplaceReadGroup_toil_log/cwltoil.log --jobStore /path/to/picardAddOrReplaceReadGroup_jobStore --batchSystem lsf --workDir /path/to picardAddOrReplaceReadGroup_toil_log --outdir . --writeLogs /path/to/picardAddOrReplaceReadGroup_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_add_or_replace_read_groups_4.1.8.1/picard_add_or_replace_read_groups_4.1.8.1.cwl /path/to/inputs.yaml > picardAddOrReplaceReadGroup_toil.stdout 2> picardAddOrReplaceReadGroup_toil.stderr &
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner picard_add_or_replace_read_groups_4.1.8.1.cwl --help
+usage: picard_add_or_replace_read_groups_4.1.8.1.cwl
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file ( sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --read_group_identifier READ_GROUP_IDENTIFIER
+ Read Group ID Default value: 1. This option can be set
+ to 'null' to clear the default value Required
+ --read_group_sequencing_center READ_GROUP_SEQUENCING_CENTER
+ Read Group sequencing center name Default value: null.
+ Required
+ --read_group_library READ_GROUP_LIBRARY
+ Read Group Library. Required
+ --read_group_platform_unit READ_GROUP_PLATFORM_UNIT
+ Read Group platform unit (eg. run barcode) Required.
+ --read_group_sample_name READ_GROUP_SAMPLE_NAME
+ Read Group sample name. Required
+ --read_group_sequencing_platform READ_GROUP_SEQUENCING_PLATFORM
+ Read Group platform (e.g. illumina, solid) Required.
+ --read_group_description READ_GROUP_DESCRIPTION
+ Read Group description Default value: null.
+ --read_group_run_date READ_GROUP_RUN_DATE
+ Read Group run date Default value: null.
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md
new file mode 100644
index 00000000..b88b626f
--- /dev/null
+++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.21.2.md
@@ -0,0 +1,78 @@
+# CollectAlignmentSummaryMetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collect_alignment_summary_metrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collect_alignment_summary_metrics_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/picard_collect_alignment_summary_metrics_2.8.1/README.md b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
similarity index 85%
rename from picard_collect_alignment_summary_metrics_2.8.1/README.md
rename to docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
index 16d0b11b..1a318d60 100644
--- a/picard_collect_alignment_summary_metrics_2.8.1/README.md
+++ b/docs/picard-tools/picard_collect_alignment_summary_metrics_2.8.1.md
@@ -1,19 +1,18 @@
-# CWL and Dockerfile for running Picard - CollectAlignmentSummaryMetrics
+# CollectAlignmentSummaryMetrics v2.8.1
-## Version of tools in docker image (../picard_mark_duplicates_2.8.1/container/Dockerfile)
-
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 2.8.1 | https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar |
-| R | 3.3.3 | r-base for opnejdk:8 |
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_collect_alignment_summary_metrics_2.8.1.cwl example_inputs.yaml
@@ -78,3 +77,4 @@ optional arguments:
Default value: 0. This option can be set to 'null' to
clear the default value.
```
+
diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md
new file mode 100644
index 00000000..14b0eaee
--- /dev/null
+++ b/docs/picard-tools/picard_collectmultiplemetric_2.21.2.md
@@ -0,0 +1,78 @@
+# CollectMultipleMetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collectmultiplemetrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collectmultiplemetrics_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md
new file mode 100644
index 00000000..259b84d5
--- /dev/null
+++ b/docs/picard-tools/picard_collectmultiplemetric_2.8.1.md
@@ -0,0 +1,80 @@
+# CollectMultipleMetrics v2.8.1
+
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_collectmultiplemetrics_2-8-1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_collectmultiplemetrics_2-8-1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --metrics_acciumulation_level METRICS_ACCIUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --max_insert_size MAX_INSERT_SIZE
+ Paired-end reads above this insert size will be
+ considered chimeric along with inter-chromosomal
+ pairs. Default value: 100000. This option can be set
+ to 'null' to clear the default value.
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --assume_sorted
+ --reference_sequence REFERENCE_SEQUENCE
+ Reference sequence file. Note that while this argument
+ isn't required, without it only a small subset of the
+ metrics will be calculated. Note also that if a
+ reference sequence is provided, it must be accompanied
+ by a sequence dictionary. Default value: null.
+ --stop_after STOP_AFTER
+ Stop after processing N reads, mainly for debugging.
+ Default value: 0. This option can be set to 'null' to
+ clear the default value.
+```
+
diff --git a/picard_fix_mate_information_1.96/README.md b/docs/picard-tools/picard_fix_mate_information_1.96.md
similarity index 71%
rename from picard_fix_mate_information_1.96/README.md
rename to docs/picard-tools/picard_fix_mate_information_1.96.md
index 567a78e3..22a9cd50 100644
--- a/picard_fix_mate_information_1.96/README.md
+++ b/docs/picard-tools/picard_fix_mate_information_1.96.md
@@ -1,26 +1,26 @@
-# CWL and Dockerfile for running Picard - FixMateInformation
+# FixMateInformation v1.96
-## Version of tools in docker image (../picard_add_or_replace_read_groups_1.96/container/Dockerfile)
+## Version of tools in docker image \(../picard\_add\_or\_replace\_read\_groups\_1.96/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| picard | 1.96 | https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip |
-| R | 3.3.3 | r-base for opnejdk:8 |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
-[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own image badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own version badge on microbadger.com") [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0 "Get your own license badge on microbadger.com")
+[](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0) [](https://microbadger.com/images/mskcc/picard_1.96:0.1.0)
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner picard_fix_mate_information_1.96.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -33,7 +33,7 @@
### Usage
-```
+```text
usage: picard_fix_mate_information_1.96.cwl [-h]
positional arguments:
@@ -72,4 +72,5 @@ optional arguments:
coordinate-sorted BAM file. Default value:false. This
option can be set to 'null' to clear the default
value. Possible values:{true, false}
-```
\ No newline at end of file
+```
+
diff --git a/docs/picard-tools/picard_fix_mate_information_2.21.2.md b/docs/picard-tools/picard_fix_mate_information_2.21.2.md
new file mode 100644
index 00000000..659bf4b1
--- /dev/null
+++ b/docs/picard-tools/picard_fix_mate_information_2.21.2.md
@@ -0,0 +1,72 @@
+# FixMateInformation v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_fix_mate_information_2.21.2.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_1.96/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardFixMate_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_2.21.2/picard_fix_mate_information_2.21.2.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: picard_fix_mate_information_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input file to fix. This option may be specified 0
+ or more times
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md
new file mode 100644
index 00000000..7039d4d4
--- /dev/null
+++ b/docs/picard-tools/picard_fix_mate_information_4.1.8.1.md
@@ -0,0 +1,77 @@
+# FixMateInformation v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_fix_mate_information_4.1.8.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir picardFixMate_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/picardFixMate_toil_log/cwltoil.log --jobStore /path/to/picardFixMate_jobStore --batchSystem lsf --workDir /path/to picardFixMate_toil_log --outdir . --writeLogs /path/to/picardFixMate_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/picard_fix_mate_information_4.1.8.1/picard_fix_mate_information_4.1.8.1.cwl /path/to/inputs.yaml > picardFixMate_toil.stdout 2> picardFixMate_toil.stderr &
+```
+
+### Usage
+
+```text
+usage: picard_fix_mate_information_4.1.8.1.cwl
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT The input file to fix. This option may be specified 0
+ or more times
+ --output_file_name OUTPUT_FILE_NAME
+ Output file name (bam or sam). Not Required
+ --sort_order SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+```
+
diff --git a/docs/picard-tools/picard_hsmetrics_2.21.2.md b/docs/picard-tools/picard_hsmetrics_2.21.2.md
new file mode 100644
index 00000000..8c43151b
--- /dev/null
+++ b/docs/picard-tools/picard_hsmetrics_2.21.2.md
@@ -0,0 +1,86 @@
+# HSmetrics v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+> toil-cwl-runner picard_hsmetrics_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_hsmetrics_2.21.2.cwl [-h]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --bait_intervals BAIT_INTERVALS
+ An interval list file that contains the locations of
+ the baits used. Default value: null. This option must
+ be specified at least 1 times.
+ --bait_set_name BAIT_SET_NAME
+ Bait set name. If not provided it is inferred from the
+ filename of the bait intervals. Default value: null
+ --minimum_mapping_quality MINIMUM_MAPPING_QUALITY
+ Minimum mapping quality for a read to contribute
+ coverage. Default value: 20. This option can be set to
+ 'null' to clear the default value.
+ --minimum_base_quality MINIMUM_BASE_QUALITY
+ Minimum base quality for a base to contribute
+ coverage. Default value: 20. This option can be set to
+ 'null' to clear the default value.
+ --clip_overlapping_reads
+ True if we are to clip overlapping reads, false
+ otherwise. Default value: true. This option can be set
+ to 'null' to clear the default value. Possible values:
+ {true, false}
+ --target_intervals TARGET_INTERVALS
+ An interval list file that contains the locations of
+ the targets. Default value: null. This option must be
+ specified at least 1 times.
+ --input INPUT An aligned SAM or BAM file. Required.
+ --output_file_name OUTPUT_FILE_NAME
+ The output file to write the metrics to. Required.
+ --metric_accumulation_level METRIC_ACCUMULATION_LEVEL
+ The level(s) at which to accumulate metrics. Default
+ value: [ALL_READS]. This option can be set to 'null'
+ to clear the default value. Possible values:
+ {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option
+ may be specified 0 or more times. This option can be
+ set to 'null' to clear the default list.
+ --per_target_coverage PER_TARGET_COVERAGE
+ An optional file to output per target coverage
+ information to. Default value: null.
+ --per_base_coverage PER_BASE_COVERAGE
+ An optional file to output per base coverage
+ information to. The per-base file contains one line
+ per target base and can grow very large. It is not
+ recommended for use with large target sets. Default
+ value: null.
+ --near_distance NEAR_DISTANCE
+ The maximum distance between a read and the nearest
+ probe/bait/amplicon for the read to be considered
+ 'near probe' and included in percent selected. Default
+ value: 250. This option can be set to 'null' to clear
+ the default value.
+ --coverage_cap COVERAGE_CAP
+ Parameter to set a max coverage limit for Theoretical
+ Sensitivity calculations. Default is 200. Default
+ value: 200. This option can be set to 'null' to clear
+ the default value.
+ --sample_size SAMPLE_SIZE
+ Sample Size used for Theoretical Het Sensitivity
+ sampling. Default is 10000. Default value: 10000. This
+ option can be set to 'null' to clear the default
+ value.
+```
+
diff --git a/docs/picard-tools/picard_hsmetrics_2.8.1.md b/docs/picard-tools/picard_hsmetrics_2.8.1.md
new file mode 100644
index 00000000..627d3286
--- /dev/null
+++ b/docs/picard-tools/picard_hsmetrics_2.8.1.md
@@ -0,0 +1,26 @@
+# HSmetrics v2.8.1
+
+## Version of tools in docker image \(../picard\_mark\_duplicates\_2.8.1/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+> toil-cwl-runner picard_hsmetrics_2.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> usage: picard_hsmetrics_2.8.1.cwl [-h]
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_1.96.md b/docs/picard-tools/picard_mark_duplicates_1.96.md
new file mode 100644
index 00000000..dd13d5ed
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_1.96.md
@@ -0,0 +1,20 @@
+# MarkDuplicates v1.96
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 1.96 | [https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip](https://sourceforge.net/projects/picard/files/picard-tools/1.96/picard-tools-1.96.zip) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_1.96.cwl example_inputs.yaml
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_2.21.2.md b/docs/picard-tools/picard_mark_duplicates_2.21.2.md
new file mode 100644
index 00000000..f978a4ba
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_2.21.2.md
@@ -0,0 +1,77 @@
+# MarkDuplicates v2.21.2
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| picard | 2.21.2 | [https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.21.2/picard.jar) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_2.21.2.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: picard_mark_duplicates_2.21.2.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --duplication_metrics DUPLICATION_METRICS
+ File to write duplication metrics to Required.
+ --assume_sort_order ASSUME_SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY
+ The scoring strategy for choosing the non-duplicate
+ among candidates. Default value:SUM_OF_BASE_QUALITIES.
+ This option can be set to 'null' to clear the default
+ value.Possible values: {SUM_OF_BASE_QUALITIES,
+ TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}
+ --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE
+ The maximum offset between two duplicate clusters in
+ order to consider them optical duplicates. The default
+ is appropriate for unpatterned versions of the
+ Illumina platform. For the patterned flowcell models,
+ 2500 is moreappropriate. For other platforms and
+ models, users should experiment to find what works
+ best. Default value: 100. This option can be set to
+ 'null' to clear the default value.
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_2.8.1.md b/docs/picard-tools/picard_mark_duplicates_2.8.1.md
new file mode 100644
index 00000000..cfb0fc92
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_2.8.1.md
@@ -0,0 +1,20 @@
+# MarkDuplicates v2.8.1
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| picard | 2.8.1 | [https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar](https://github.com/broadinstitute/picard/releases/download/2.8.1/picard.jar) |
+| R | 3.3.3 | r-base for opnejdk:8 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_2.8.1.cwl example_inputs.yaml
+```
+
diff --git a/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md
new file mode 100644
index 00000000..fe6c11f1
--- /dev/null
+++ b/docs/picard-tools/picard_mark_duplicates_4.1.8.1.md
@@ -0,0 +1,115 @@
+# MarkDuplicates v4.1.8.1
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| gatk | 4.1.8.1 | broadinstitute/gatk:4.1.8.1 |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner picard_mark_duplicates_4.1.8.1.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+usage: picard_mark_duplicates_4.1.8.1.cwl [-h]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --memory_per_job MEMORY_PER_JOB
+ Memory per job in megabytes
+ --memory_overhead MEMORY_OVERHEAD
+ Memory overhead per job in megabytes
+ --number_of_threads NUMBER_OF_THREADS
+ --input INPUT Input file (bam or sam). Required.
+ --output_file_name OUTPUT_FILE_NAME
+ Output file (bam or sam).
+ --duplication_metrics DUPLICATION_METRICS
+ File to write duplication metrics to Required.
+ --assume_sort_order ASSUME_SORT_ORDER
+ Optional sort order to output in. If not supplied
+ OUTPUT is in the same order as INPUT.Default value:
+ null. Possible values: {unsorted, queryname,
+ coordinate}
+ --tmp_dir TMP_DIR This option may be specified 0 or more times
+ --validation_stringency VALIDATION_STRINGENCY
+ Validation stringency for all SAM files read by this
+ program. Setting stringency to SILENT can improve
+ performance when processing a BAM file in which
+ variable-length data (read, qualities, tags) do not
+ otherwise need to be decoded. Default value: STRICT.
+ This option can be set to 'null' to clear the default
+ value. Possible values: {STRICT,LENIENT, SILENT}
+ --bam_compression_level BAM_COMPRESSION_LEVEL
+ Compression level for all compressed files created
+ (e.g. BAM and GELI). Default value:5. This option can
+ be set to 'null' to clear the default value.
+ --create_bam_index Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --read_name_regex READ_NAME_REGEX
+ MarkDuplicates can use the tile and cluster positions
+ to estimate the rate of optical duplication in
+ addition to the dominant source of duplication, PCR,
+ to provide a more accurate estimation of library size.
+ By default (with no READ_NAME_REGEX specified),
+ MarkDuplicates will attempt to extract coordinates
+ using a split on ':' (see Note below). Set
+ READ_NAME_REGEX to 'null' to disable optical duplicate
+ detection. Note that without optical duplicate counts,
+ library size estimation will be less accurate. If the
+ read name does not follow a standard Illumina colon-
+ separation convention, but does contain tile and x,y
+ coordinates, a regular expression can be specified to
+ extract three variables: tile/region, x coordinate and
+ y coordinate from a read name. The regular expression
+ must contain three capture groups for the three
+ variables, in order. It must match the entire read
+ name. e.g. if field names were separated by semi-colon
+ (';') this example regex could be specified
+ (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$
+ Note that if no READ_NAME_REGEX is specified, the read
+ name is split on ':'. For 5 element names, the 3rd,
+ 4th and 5th elements are assumed to be tile, x and y
+ values. For 7 element names (CASAVA 1.8), the 5th,
+ 6th, and 7th elements are assumed to be tile, x and y
+ values.
+ --sorting_collection_size_ratio SORTING_COLLECTION_SIZE_RATIO
+ This number, plus the maximum RAM available to the
+ JVM, determine the memory footprint used by some of
+ the sorting collections. If you are running out of
+ memory, try reducing this number.
+ --use_jdk_deflater Use the JDK Deflater instead of the Intel Deflater for
+ writing compressed output
+ --use_jdk_inflater Whether to create a BAM index when writing a
+ coordinate-sorted BAM file. Default value:false. This
+ option can be set to 'null' to clear the default
+ value. Possible values:{true, false}
+ --duplicate_scoring_strategy DUPLICATE_SCORING_STRATEGY
+ The scoring strategy for choosing the non-duplicate
+ among candidates. Default value:SUM_OF_BASE_QUALITIES.
+ This option can be set to 'null' to clear the default
+ value.Possible values: {SUM_OF_BASE_QUALITIES,
+ TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}
+ --optical_duplicate_pixel_distance OPTICAL_DUPLICATE_PIXEL_DISTANCE
+ The maximum offset between two duplicate clusters in
+ order to consider them optical duplicates. The default
+ is appropriate for unpatterned versions of the
+ Illumina platform. For the patterned flowcell models,
+ 2500 is moreappropriate. For other platforms and
+ models, users should experiment to find what works
+ best. Default value: 100. This option can be set to
+ 'null' to clear the default value.
+```
+
diff --git a/docs/postprocessing_variant_calls/README.md b/docs/postprocessing_variant_calls/README.md
new file mode 100644
index 00000000..a4cf29de
--- /dev/null
+++ b/docs/postprocessing_variant_calls/README.md
@@ -0,0 +1 @@
+# postprocessing variant calls (pv)
diff --git a/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md b/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md
new file mode 100644
index 00000000..167be1fb
--- /dev/null
+++ b/docs/postprocessing_variant_calls/vardict_filter_case-control_0.1.4.md
@@ -0,0 +1,47 @@
+## CWL pv_vardict_case-control_filter.cwl
+
+- CWL specification 1.0
+- Use example_inputs_case-control_filter.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner pv_vardict_case-control_filter.cwl example_inputs_case-control_filter.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/pv_vardict_case-control_filter.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/pv_vardict_case-control_filter_toil.log --jobStore /path/to/pv_vardict_case-control_filter_jobStore --batchSystem lsf --workDir /path/to/pv_vardict_case-control_filter_toil_log --outdir . --writeLogs /path/to/pv_vardict_case-control_filter_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/pv_vardict_case-control_filter.cwl /path/to/inputs.yaml > pv_vardict_case-control_filter_toil.stdout 2> pv_vardict_case-control_filter_toil.stderr &
+```
+
+### Usage:
+
+```
+Usage: pv vardict case-control filter [OPTIONS]
+
+ This tool helps to filter vardict version 1.4.6 VCFs for case control
+ calling
+
+Options:
+ -i, --inputVcf FILE Input vcf generated by vardict which needs
+ to be processed [required]
+ --tsampleName TEXT Name of the tumor Sample [required]
+ -dp, --totalDepth INTEGER RANGE
+ Tumor total depth threshold [default: 20;
+ x>=20]
+ -ad, --alleledepth INTEGER RANGE
+ [x>=1]
+ -tnr, --tnRatio INTEGER Tumor-Normal variant fraction ratio
+ threshold [default: 1]
+ -vf, --variantFraction FLOAT Tumor variant fraction threshold [default:
+ 5e-05]
+ -mq, --minQual INTEGER Minimum variant call quality [default: 0]
+ -fg, --filterGermline Whether to remove calls without 'somatic'
+ status
+ -o, --outDir TEXT Full Path to the output dir
+ --help Show this message and exit.
+```
+
diff --git a/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md b/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md
new file mode 100644
index 00000000..163beaf3
--- /dev/null
+++ b/docs/postprocessing_variant_calls/vardict_filter_single-sample_0.1.4.md
@@ -0,0 +1,44 @@
+## CWL pv_vardict_single_filter.cwl
+- CWL specification 1.0
+- Use example_inputs_single_filter.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner pv_vardict_single_filter.cwl example_inputs_single_filter.yaml
+```
+**If at MSK, using the JUNO cluster having installed toil-msk version 3.21.1 you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/pv_vardict_single_filter.cwl /path/to/inputs.yaml
+#Using toil-cwl-runner
+> toil-cwl-runner --singularity --logFile /path/to/pv_vardict_single_filter_toil.log --jobStore /path/to/pv_vardict_single_filter_jobStore --batchSystem lsf --workDir /path/to/pv_vardict_single_filter_toil_log --outdir . --writeLogs /path/to/pv_vardict_single_filter_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --disableChaining --maxLogFileSize 20000000000 /path/to/pv_vardict_single_filter_0.7.17.cwl /path/to/inputs.yaml > pv_vardict_single_filter_toil.stdout 2> pv_vardict_single_filter_toil.stderr &
+```
+
+### Usage
+
+```
+Usage: pv vardict single filter [OPTIONS]
+
+ This tool helps to filter vardict version 1.4.6 VCFs for single sample
+ calling
+
+Options:
+ -i, --inputVcf FILE Input vcf generated by vardict which needs
+ to be processed [required]
+ --tsampleName TEXT Name of the tumor Sample [required]
+ -dp, --totalDepth INTEGER RANGE
+ Tumor total depth threshold [default: 20;
+ x>=20]
+ -ad, --alleledepth INTEGER RANGE
+ [x>=1]
+ -tnr, --tnRatio INTEGER Tumor-Normal variant fraction ratio
+ threshold [default: 1]
+ -vf, --variantFraction FLOAT Tumor variant fraction threshold [default:
+ 5e-05]
+ -mq, --minQual INTEGER Minimum variant call quality [default: 0]
+ -fg, --filterGermline Whether to remove calls without 'somatic'
+ status
+ -o, --outDir TEXT Full Path to the output dir
+ --help Show this message and exit.
+```
diff --git a/docs/trim-galore/README.md b/docs/trim-galore/README.md
new file mode 100644
index 00000000..2f5f0270
--- /dev/null
+++ b/docs/trim-galore/README.md
@@ -0,0 +1,2 @@
+# Trim Galore
+
diff --git a/trim_galore_0.6.2/README.md b/docs/trim-galore/trim_galore_0.6.2.md
similarity index 83%
rename from trim_galore_0.6.2/README.md
rename to docs/trim-galore/trim_galore_0.6.2.md
index 3727924e..4ef02a4d 100644
--- a/trim_galore_0.6.2/README.md
+++ b/docs/trim-galore/trim_galore_0.6.2.md
@@ -1,25 +1,25 @@
-# CWL and Dockerfile for running Trim Galore
+# v0.6.2
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| Ubuntu base image | 18.04 | - |
-| cutadapt | 2.3 | https://pypi.org/project/cutadapt/ |
-| FASTQC | 0.11.8 | https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip |
-| Trim Galore | 0.6.2 | https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Ubuntu base image | 18.04 | - |
+| cutadapt | 2.3 | [https://pypi.org/project/cutadapt/](https://pypi.org/project/cutadapt/) |
+| FASTQC | 0.11.8 | [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc\_v0.11.8.zip](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.8.zip) |
+| Trim Galore | 0.6.2 | [https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz](https://github.com/FelixKrueger/TrimGalore/archive/0.6.2.tar.gz) |
## CWL
-- CWL specification 1.0
-- Use example_inputs.yaml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner trim_galore_0.6.2.cwl example_inputs.yaml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -32,7 +32,7 @@
### Usage
-```
+```text
usage: trim_galore_0.6.2.cwl [-h]
positional arguments:
@@ -92,4 +92,5 @@ optional arguments:
--error_rate ERROR_RATE
Maximum allowed error rate (no. of errors divided by
the length of the matching region) (default: 0.1)
-```
\ No newline at end of file
+```
+
diff --git a/docs/ubuntu-utilites/README.md b/docs/ubuntu-utilites/README.md
new file mode 100644
index 00000000..5d69693e
--- /dev/null
+++ b/docs/ubuntu-utilites/README.md
@@ -0,0 +1,2 @@
+# Ubuntu utilites
+
diff --git a/docs/ubuntu-utilites/utilities_ubuntu_18.04.md b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md
new file mode 100644
index 00000000..ba695d67
--- /dev/null
+++ b/docs/ubuntu-utilites/utilities_ubuntu_18.04.md
@@ -0,0 +1,26 @@
+# v18.04
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| Ubuntu base image | 18.04 | - |
+
+## Available tools
+
+| Tool | Description |
+| :--- | :--- |
+| sort.cwl | sort lines of text files |
+| gzip.cwl | compress or expand files |
+| mv.cwl | move \(rename\) files |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs\_toolname.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner gzip.cwl example_inputs_gzip.yaml
+```
+
diff --git a/docs/vardictjava/README.md b/docs/vardictjava/README.md
new file mode 100644
index 00000000..5cf8e57f
--- /dev/null
+++ b/docs/vardictjava/README.md
@@ -0,0 +1,3 @@
+# VardictJava
+
+
diff --git a/docs/vardictjava/vardictjava_1.8.2.md b/docs/vardictjava/vardictjava_1.8.2.md
new file mode 100644
index 00000000..21b4ac81
--- /dev/null
+++ b/docs/vardictjava/vardictjava_1.8.2.md
@@ -0,0 +1,73 @@
+# Vardict v1.8.2 - Single sample mode
+To run VarDistJava in single sample mode vardict_workflow_single_sample.cwl should be run. vardict_workflow_single_sample.cwl will run 3 workflows to implement the example command in the original documentations as explained here:
+https://github.com/AstraZeneca-NGS/VarDictJava#single-sample-mode
+
+
+
+## Version of tools in docker image \(/container/Dockerfile\)
+
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| ubuntu base image (alpine) | 3.8 | - |
+| vardict | 1.8.2 | [https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2](https://github.com/AstraZeneca-NGS/VarDictJava/releases/tag/v1.8.2) |
+| perl | 5.26.2-r1 | [https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl](https://pkgs.alpinelinux.org/package/edge/main/aarch64/perl) |
+| r | 3.5.1 | [https://pkgs.alpinelinux.org/package/edge/community/x86/R](https://pkgs.alpinelinux.org/package/edge/community/x86/R) |
+
+## CWL
+
+* CWL specification 1.0
+* Use example\_inputs.yaml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner vardict_single_sample.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/vardict_1.8.2/vardict_single_sample.cwl /path/to/inputs.yaml
+
+#Using Toil-cwl-runner
+toil-cwl-runner --singularity vardict_single_sample.cwl example_inputs.yaml
+```
+
+### Usage
+
+```bash
+> toil-cwl-runner vardict_single_sample.cwl --help
+usage: vardict_single_sample.cwl [-h] [--bedfile BEDFILE]
+ [--input_bam_control INPUT_BAM_CONTROL]
+ --input_bam_case INPUT_BAM_CASE
+ --reference_fasta REFERENCE_FASTA
+ [--sample_name SAMPLE_NAME]
+ [--bed_file_column_for_region_start BED_FILE_COLUMN_FOR_REGION_START]
+ [--bed_file_column_for_region_end BED_FILE_COLUMN_FOR_REGION_END]
+ [--bed_file_column_for_gene_name BED_FILE_COLUMN_FOR_GENE_NAME]
+ [--bed_file_column_for_chromsome BED_FILE_COLUMN_FOR_CHROMSOME]
+ --control_sample_name CONTROL_SAMPLE_NAME
+ [--filter_variants]
+ [--minimum_allele_frequency MINIMUM_ALLELE_FREQUENCY]
+ --output_vcf OUTPUT_VCF
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --bedfile BEDFILE
+ --input_bam_control INPUT_BAM_CONTROL
+ --input_bam_case INPUT_BAM_CASE
+ --reference_fasta REFERENCE_FASTA
+ --sample_name SAMPLE_NAME
+ --bed_file_column_for_region_start BED_FILE_COLUMN_FOR_REGION_START
+ --bed_file_column_for_region_end BED_FILE_COLUMN_FOR_REGION_END
+ --bed_file_column_for_gene_name BED_FILE_COLUMN_FOR_GENE_NAME
+ --bed_file_column_for_chromsome BED_FILE_COLUMN_FOR_CHROMSOME
+ --control_sample_name CONTROL_SAMPLE_NAME
+ --filter_variants
+ --minimum_allele_frequency MINIMUM_ALLELE_FREQUENCY
+ --output_vcf OUTPUT_VCF
+```
diff --git a/docs/vcf2maf/README.md b/docs/vcf2maf/README.md
new file mode 100644
index 00000000..e349a6cd
--- /dev/null
+++ b/docs/vcf2maf/README.md
@@ -0,0 +1,3 @@
+# VCF2MAF
+
+
diff --git a/docs/vcf2maf/vcf2maf_1.6.21.md b/docs/vcf2maf/vcf2maf_1.6.21.md
new file mode 100644
index 00000000..6d8cd35d
--- /dev/null
+++ b/docs/vcf2maf/vcf2maf_1.6.21.md
@@ -0,0 +1,76 @@
+# CWL and Dockerfile for running vcf2maf v1.6.21
+
+## Version of tools in docker image (/container/Dockerfile)
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| clearlinux (base image) | - | - |
+| vcf2maf | 1.6.21 | https://github.com/mskcc/vcf2maf/archive/v1.6.21.zip |
+| VEP | 105 | - | - |
+|MINICONDA_VERSION | py37_4.9.2 | https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh
+|BCFTOOLS_VERSION | 1.10.2 | - | - |
+|SAMTOOLS_VERSION | 1.10 | - | - |
+|VCF2MAF_VERSION | 1.6.21 | - | - |
+|HTSLIB_VERSION | 1.10.2 | - | - |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner vcf2maf_1.6.21.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool --singularity --non-strict /path/to/vcf2maf_1.6.21.cwl /path/to/inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir vcf2maf_toil_log
+> toil-cwl-runner --singularity --logFile /path/to/vcf2maf_toil_log/cwltoil.log --jobStore /path/to/vcf2maf_jobStore --batchSystem lsf --workDir /path/to/vcf2maf_toil_log --outdir . --writeLogs /path/to/vcf2maf_toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/vcf2maf_1.6.17.cwl /path/to/inputs.yaml > vcf2maf_toil.stdout 2> vcf2maf_toil.stderr &
+```
+
+### Usage
+
+```
+Usage:
+ perl vcf2maf.pl --help
+ perl vcf2maf.pl --input-vcf input.vcf --output-maf output.maf --tumor-id TUMOR_ID --normal-id NORMAL_ID
+
+--input-vcf Path to input file in VCF format
+--output-maf Path to output MAF file
+--tmp-dir Folder to retain intermediate VCFs after runtime [Default: Folder containing input VCF]
+--tumor-id Tumor_Sample_Barcode to report in the MAF [TUMOR]
+--normal-id Matched_Norm_Sample_Barcode to report in the MAF [NORMAL]
+--vcf-tumor-id Tumor sample ID used in VCF's genotype columns [--tumor-id]
+--vcf-normal-id Matched normal ID used in VCF's genotype columns [--normal-id]
+--custom-enst List of custom ENST IDs that override canonical selection
+--vep-path Folder containing the vep script [~/miniconda3/bin]
+--vep-data VEP's base cache/plugin directory [~/.vep]
+--vep-forks Number of forked processes to use when running VEP [4]
+--vep-custom String to pass into VEP's --custom option []
+--vep-config Config file to pass into VEP's --config option []
+--vep-overwrite Allow VEP to overwrite output VCF if it exists
+--buffer-size Number of variants VEP loads at a time; Reduce this for low memory systems [5000]
+--any-allele When reporting co-located variants, allow mismatched variant alleles too
+--inhibit-vep Skip running VEP, but extract VEP annotation in VCF if found
+--online Use useastdb.ensembl.org instead of local cache (supports only GRCh38 VCFs listing <100 events)
+--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
+--max-subpop-af Add FILTER tag common_variant if gnomAD reports any subpopulation AFs greater than this [0.0004]
+--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens]
+--ncbi-build NCBI reference assembly of variants MAF (e.g. GRCm38 for mouse) [GRCh37]
+--cache-version Version of offline cache to use with VEP (e.g. 75, 91, 102) [Default: Installed version]
+--maf-center Variant calling center to report in MAF [.]
+--retain-info Comma-delimited names of INFO fields to retain as extra columns in MAF []
+--retain-fmt Comma-delimited names of FORMAT fields to retain as extra columns in MAF []
+--retain-ann Comma-delimited names of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF []
+--min-hom-vaf If GT undefined in VCF, minimum allele fraction to call a variant homozygous [0.7]
+--remap-chain Chain file to remap variants to a different assembly before running VEP
+--verbose Print more things to log progress
+--help Print a brief help message and quit
+--man Print the detailed manual
+```
diff --git a/docs/waltz/README.md b/docs/waltz/README.md
new file mode 100644
index 00000000..0d43eaff
--- /dev/null
+++ b/docs/waltz/README.md
@@ -0,0 +1,2 @@
+# Waltz
+
diff --git a/waltz_count_reads_3.1.1/README.md b/docs/waltz/waltz_count_reads_3.1.1.md
similarity index 70%
rename from waltz_count_reads_3.1.1/README.md
rename to docs/waltz/waltz_count_reads_3.1.1.md
index 8ad83443..e3131e9f 100644
--- a/waltz_count_reads_3.1.1/README.md
+++ b/docs/waltz/waltz_count_reads_3.1.1.md
@@ -1,24 +1,25 @@
-# CWL and Dockerfile for running Waltz - Count Reads
+# CountReads v3.1.1
-## Version of tools in docker image (/container/Dockerfile)
+## Version of tools in docker image \(/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) |
[](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1)
+
## CWL
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner waltz_count_reads_3.1.1.cwl example_inputs.yml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -55,3 +56,4 @@ optional arguments:
--number_of_threads NUMBER_OF_THREADS
--bed_file BED_FILE
```
+
diff --git a/waltz_pileupmatrices_3.1.1/README.md b/docs/waltz/waltz_pileupmatrices_3.1.1.md
similarity index 70%
rename from waltz_pileupmatrices_3.1.1/README.md
rename to docs/waltz/waltz_pileupmatrices_3.1.1.md
index b5aed666..cc432392 100644
--- a/waltz_pileupmatrices_3.1.1/README.md
+++ b/docs/waltz/waltz_pileupmatrices_3.1.1.md
@@ -1,24 +1,25 @@
-# CWL and Dockerfile for running Waltz - PileupMetrics
+# PileupMetrics v3.1.1
-## Version of tools in docker image (../waltz_count_reads_3.1.1/container/Dockerfile)
+## Version of tools in docker image \(../waltz\_count\_reads\_3.1.1/container/Dockerfile\)
-| Tool | Version | Location |
-|--- |--- |--- |
-| java base image | 8 | - |
-| waltz | 3.1.1 | https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar |
+| Tool | Version | Location |
+| :--- | :--- | :--- |
+| java base image | 8 | - |
+| waltz | 3.1.1 | [https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar](https://github.com/juberpatel/Waltz/releases/download/v3.1.1/Waltz-3.1.1.jar) |
[](https://github.com/juberpatel/Waltz/releases/tag/v3.1.1)
+
## CWL
-- CWL specification 1.0
-- Use example_inputs.yml to see the inputs to the cwl
-- Example Command using [toil](https://toil.readthedocs.io):
+* CWL specification 1.0
+* Use example\_inputs.yml to see the inputs to the cwl
+* Example Command using [toil](https://toil.readthedocs.io):
```bash
> toil-cwl-runner waltz_pileupmatrices_3.1.1.cwl example_inputs.yml
```
-**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+**If at MSK, using the JUNO cluster you can use the following command**
```bash
#Using CWLTOOL
@@ -56,3 +57,4 @@ optional arguments:
--number_of_threads NUMBER_OF_THREADS
--bed_file BED_FILE
```
+
diff --git a/expression_tools/README.md b/expression_tools/README.md
new file mode 100644
index 00000000..f0c79bf1
--- /dev/null
+++ b/expression_tools/README.md
@@ -0,0 +1,17 @@
+# CWL Expression tools
+
+## Available tools
+
+| Tool | Description |
+| -------- | ------------------------ |
+| put_in_dir.cwl | put the list of files into the same directory |
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs_toolname.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner put_in_dir.cwl example_inputs_gzip.yaml
+```
diff --git a/expression_tools/put_in_dir.cwl b/expression_tools/put_in_dir.cwl
new file mode 100644
index 00000000..382b64e5
--- /dev/null
+++ b/expression_tools/put_in_dir.cwl
@@ -0,0 +1,103 @@
+#!/usr/bin/env cwl-runner
+# originally from https://github.com/mskcc/pluto-cwl
+
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+class: ExpressionTool
+id: put-in-dir
+
+inputs:
+ output_directory_name:
+ type: string
+ doc: >-
+ Put all `files` in a directory called `output_directory_name`.
+ output_subdirectory_name:
+ type: string?
+ doc: >-
+ If specified, nest all `files` within a directory called `output_subdirectory_name`, which itself is within `output_directory_name`.
+ files:
+ type:
+ type: array
+ items:
+ - File
+ - type: array
+ items:
+ - File
+ - Directory
+ - 'null'
+
+outputs:
+ directory:
+ type: Directory
+
+# This tool returns a Directory object,
+# which holds all output files from the list
+# of supplied input files
+expression: |
+ ${
+ var output_files = [];
+ var input_files = inputs.files.filter(function(single_file) {
+ return String(single_file).toUpperCase() != 'NONE';
+ });
+
+ for (var i = 0; i < input_files.length; i++) {
+ // Handle list of list of files
+ if (input_files[i] && input_files[i].length) {
+ for (var ii = 0; ii < input_files[i].length; ii++) {
+ output_files.push(input_files[i][ii]);
+ }
+ // Handle list of files
+ } else if (input_files[i]) {
+ output_files.push(input_files[i]);
+ }
+ }
+
+ if (inputs.output_subdirectory_name) {
+ return {
+ 'directory': {
+ 'class': 'Directory',
+ 'basename': inputs.output_directory_name,
+ 'listing': [
+ {
+ 'class': 'Directory',
+ 'basename': inputs.output_subdirectory_name,
+ 'listing': output_files
+ }
+ ]
+ }
+ };
+ } else {
+ return {
+ 'directory': {
+ 'class': 'Directory',
+ 'basename': inputs.output_directory_name,
+ 'listing': output_files
+ }
+ };
+ }
+
+ }
+
+requirements:
+ - class: ResourceRequirement
+ ramMin: 2000
+ coresMin: 1
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
diff --git a/fastp_0.20.1/README.md b/fastp_0.20.1/README.md
new file mode 100644
index 00000000..10e9866a
--- /dev/null
+++ b/fastp_0.20.1/README.md
@@ -0,0 +1,84 @@
+# CWL and Dockerfile for running Fastp
+
+## Version of tools in docker image
+
+| Tool | Version | Location |
+|--- |--- |--- |
+| fastp | 0.20.1 | quay.io/biocontainers/fastp:0.20.1--h8b12597_0 |
+
+
+## CWL
+
+- CWL specification 1.0
+- Use example_inputs.yaml to see the inputs to the cwl
+- Example Command using [toil](https://toil.readthedocs.io):
+
+```bash
+ > toil-cwl-runner ./fastp_0.20.1.cwl example_inputs.yaml
+```
+
+**If at MSK, using the JUNO cluster having installed toil version 3.19 and manually modifying [lsf.py](https://github.com/DataBiosphere/toil/blob/releases/3.19.0/src/toil/batchSystems/lsf.py#L170) by removing `type==X86_64 &&` you can use the following command**
+
+```bash
+#Using CWLTOOL
+> cwltool ./fastp_0.20.1.cwl example_inputs.yaml
+
+#Using toil-cwl-runner
+> mkdir toil_log
+> toil-cwl-runner --singularity --logFile /path/to/toil_log/cwltoil.log --jobStore /path/to/jobStore --batchSystem lsf --workDir /path/to =toil_log --outdir . --writeLogs /path/to/toil_log --logLevel DEBUG --stats --retryCount 2 --disableCaching --maxLogFileSize 20000000000 /path/to/fastp-0_20_1/fastp-0_20_1.cwl /path/to/inputs.yaml > toil.stdout 2> toil.stderr &
+```
+
+### Usage
+```
+usage: fastp_0.20.1.cwl [-h] --read1_input READ1_INPUT --read1_output_path
+ READ1_OUTPUT_PATH [--read2_input READ2_INPUT]
+ [--read2_output_path READ2_OUTPUT_PATH]
+ [--unpaired1_path UNPAIRED1_PATH]
+ [--unpaired2_path UNPAIRED2_PATH]
+ [--failed_reads_path FAILED_READS_PATH]
+ [--read1_adapter_sequence READ1_ADAPTER_SEQUENCE]
+ [--read2_adapter_sequence READ2_ADAPTER_SEQUENCE]
+ [--minimum_read_length MINIMUM_READ_LENGTH]
+ --json_output_path JSON_OUTPUT_PATH --html_output_path
+ HTML_OUTPUT_PATH
+ [job_order]
+
+Setup and execute Fastp
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --read1_input READ1_INPUT
+ read1 input file name
+ --read1_output_path READ1_OUTPUT_PATH
+ read1 output file name
+ --read2_input READ2_INPUT
+ read2 input file name, for PE data
+ --read2_output_path READ2_OUTPUT_PATH
+ read2 output file name
+ --unpaired1_path UNPAIRED1_PATH
+ for PE input, if read1 passed QC but read2 not, it
+ will be written to unpaired1.
+ --unpaired2_path UNPAIRED2_PATH
+ for PE input, if read2 passed QC but read1 not, it
+ will be written to unpaired2.
+ --failed_reads_path FAILED_READS_PATH
+ specify the file to store reads that cannot pass the
+ filters.
+ --read1_adapter_sequence READ1_ADAPTER_SEQUENCE
+ the adapter for read1. For SE data, if not specified,
+ the adapter will be auto-detected. For PE data, this
+ is used if R1/R2 are found not overlapped.
+ --read2_adapter_sequence READ2_ADAPTER_SEQUENCE
+ the adapter for read2. For PE data, this is used if
+ R1/R2 are found not overlapped.
+ --minimum_read_length MINIMUM_READ_LENGTH
+ reads shorter than length_required will be discarded,
+ default is 15.
+ --json_output_path JSON_OUTPUT_PATH
+ the json format report file name
+ --html_output_path HTML_OUTPUT_PATH
+ the html format report file name
+```
diff --git a/fastp_0.20.1/example_inputs.yaml b/fastp_0.20.1/example_inputs.yaml
new file mode 100644
index 00000000..5d3af3bf
--- /dev/null
+++ b/fastp_0.20.1/example_inputs.yaml
@@ -0,0 +1,13 @@
+read1_input:
+ class: File
+ path: "./test_data/R1.fq"
+read2_input:
+ class: File
+ path: "./test_data/R2.fq"
+read1_output_path: "./R1.output"
+read2_output_path: "./R2.output"
+read1_adapter_sequence: "GATCGGAAGAGC"
+read2_adapter_sequence: "AGATCGGAAGAGC"
+minimum_read_length: 25
+json_output_path: "sample_name.json"
+html_output_path: "sample_name.html"
diff --git a/fastp_0.20.1/fastp_0.20.1.cwl b/fastp_0.20.1/fastp_0.20.1.cwl
new file mode 100644
index 00000000..f6356a6a
--- /dev/null
+++ b/fastp_0.20.1/fastp_0.20.1.cwl
@@ -0,0 +1,225 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fastp_0_20_1
+baseCommand:
+ - fastp
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ doc: 'worker thread number, default is 2 (int [=2])'
+ - id: read1_input
+ type: File
+ inputBinding:
+ position: 0
+ prefix: '--in1'
+ doc: |
+ read1 input file name
+ - id: read1_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--out1'
+ doc: |
+ read1 output file name
+ - id: read2_input
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--in2'
+ doc: |
+ read2 input file name, for PE data
+ - id: read2_output_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--out2'
+ doc: |
+ read2 output file name
+ - id: unpaired1_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--unpaired1'
+ doc: >
+ for PE input, if read1 passed QC but read2 not, it will be written to
+ unpaired1.
+ - id: unpaired2_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--unpaired2'
+ doc: >
+ for PE input, if read2 passed QC but read1 not, it will be written to
+ unpaired2.
+ - id: failed_reads_path
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--failed_out'
+ doc: |
+ specify the file to store reads that cannot pass the filters.
+ - id: read1_adapter_sequence
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--adapter_sequence'
+ doc: >
+ the adapter for read1. For SE data, if not specified, the adapter will be
+ auto-detected. For PE data, this is used if R1/R2 are found not
+ overlapped.
+ - id: read2_adapter_sequence
+ type: string?
+ inputBinding:
+ position: 0
+ prefix: '--adapter_sequence_r2'
+ doc: >
+ the adapter for read2. For PE data, this is used if R1/R2 are found not
+ overlapped.
+ - id: minimum_read_length
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--length_required'
+ doc: |
+ reads shorter than length_required will be discarded, default is 15.
+ - id: maximum_read_length
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--length_limit'
+ doc: >
+ reads longer than length_limit will be discarded, default 0 means no
+ limitation.
+ - id: max_len_read1
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--max_len1'
+ doc: >-
+ if read1 is longer than max_len1, then trim read1 at its tail to make it
+ as long as max_len1. Default 0 means no limitation
+ - id: max_len_read2
+ type: int?
+ inputBinding:
+ position: 0
+ prefix: '--max_len2'
+ doc: >-
+ if read2 is longer than max_len2, then trim read2 at its tail to make it
+ as long as max_len2. Default 0 means no limitation. If it's not specified,
+ it will follow read1's settings
+ - default: fastp.json
+ id: json_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--json'
+ doc: |
+ the json format report file name
+ - default: fastp.html
+ id: html_output_path
+ type: string
+ inputBinding:
+ position: 0
+ prefix: '--html'
+ doc: |
+ the html format report file name
+ - id: disable_quality_filtering
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--disable_quality_filtering'
+ doc: >-
+ quality filtering is enabled by default. If this option is specified,
+ quality filtering is disabled
+ - id: disable_trim_poly_g
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: '--disable_trim_poly_g'
+ doc: >-
+ disable polyG tail trimming, by default trimming is automatically enabled
+ for Illumina NextSeq/NovaSeq data
+ - id: verbose
+ type: File?
+ inputBinding:
+ position: 0
+ prefix: '--verbose'
+ doc: output verbose log information (i.e. when every 1M reads are processed)
+outputs:
+ - id: fastp_json_output
+ type: File
+ outputBinding:
+ glob: $(inputs.json_output_path)
+ - id: fastp_html_output
+ type: File
+ outputBinding:
+ glob: $(inputs.html_output_path)
+ - id: fastp_read1_output
+ type: File
+ outputBinding:
+ glob: $(inputs.read1_output_path)
+ - id: fastp_read2_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.read2_output_path)
+ - id: fastp_unpaired1_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.unpaired1_path)
+ - id: fastp_unpaired2_output
+ type: File?
+ outputBinding:
+ glob: $(inputs.unpaired2_path)
+doc: Setup and execute Fastp
+label: fastp_0.20.1
+arguments:
+ - position: 0
+ prefix: '--thread'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ResourceRequirement
+ ramMin: 17000
+ coresMin: 4
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/fastp:0.20.1--h8b12597_0'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:murphyc4@mskcc.org'
+ 'foaf:name': Charlie Murphy
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:fraihaa@mskcc.org'
+ 'foaf:name': Adrian Fraiha
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': fastp
+ 'doap:revision': 0.20.1
diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml
new file mode 100644
index 00000000..59eaa165
--- /dev/null
+++ b/fgbio_call_duplex_consensus_reads_1.2.0/example_inputs.yaml
@@ -0,0 +1,17 @@
+error_rate_post_umi: null
+error_rate_pre_umi: null
+input: /path/to/bam_file
+max_reads_per_strand: null
+memory_overhead: null
+memory_per_job: null
+min_input_base_quality: null
+min_reads:
+ - 1
+ - 1
+ - 0
+number_of_threads: null
+output_file_name: null
+read_group_id: null
+read_name_prefix: null
+sort_order: null
+trim: null
diff --git a/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl
new file mode 100644
index 00000000..96211f57
--- /dev/null
+++ b/fgbio_call_duplex_consensus_reads_1.2.0/fgbio_call_duplex_consensus_reads_1.2.0.cwl
@@ -0,0 +1,226 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fgbio_call_duplex_consensus_reads_1_2_0
+baseCommand:
+ - fgbio
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input
+ type: File
+ inputBinding:
+ position: 2
+ prefix: '--input'
+ shellQuote: false
+ doc: The input SAM or BAM file.
+ - id: output_file_name
+ type: string?
+ doc: Output SAM or BAM file to write consensus reads.
+ - id: read_name_prefix
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--read-name-prefix'
+ doc: The prefix all consensus read names
+ - id: read_group_id
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--read-group-id'
+ doc: The new read group ID for all the consensus reads.
+ - id: error_rate_pre_umi
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--error-rate-pre-umi'
+ doc: >-
+ The Phred-scaled error rate for an error prior to the UMIs being
+ integrated.
+ - id: error_rate_post_umi
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--error-rate-post-umi'
+ doc: >-
+ The Phred-scaled error rate for an error post the UMIs have been
+ integrated.
+ - id: min_input_base_quality
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-input-base-quality'
+ doc: Ignore bases in raw reads that have Q below this value.
+ - id: trim
+ type: boolean?
+ inputBinding:
+ position: 2
+ prefix: '--trim'
+ doc: 'If true, quality trim input reads in addition to masking low Q bases'
+ - id: sort_order
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--sort-order'
+ doc: 'The sort order of the output, if :none: then the same as the input.'
+ - id: min_reads
+ type: 'int[]'
+ inputBinding:
+ position: 2
+ prefix: '--min-reads'
+ itemSeparator: ' '
+ shellQuote: false
+ doc: The minimum number of input reads to a consensus read.
+ - id: max_reads_per_strand
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--max-reads-per-strand'
+ doc: >-
+ The maximum number of reads to use when building a single-strand
+ consensus. If more than this many reads are present in a tag family, the
+ family is randomly downsampled to exactly max-reads reads.
+ - id: temporary_directory
+ type: string?
+ doc: 'Default value: null.'
+ - id: async_io
+ type: string?
+ inputBinding:
+ position: 0
+ separate: false
+ prefix: '--async-io='
+ doc: >-
+ 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'
+outputs:
+ - id: fgbio_call_duplex_consensus_reads_bam
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_file_name)
+ return inputs.output_file_name;
+ return inputs.input.basename.replace(/.bam/,'_cons.bam');
+ }
+doc: >-
+ Calls duplex consensus sequences from reads generated from the same
+ double-stranded source molecule. Prior to running this tool, read must have
+ been grouped with GroupReadsByUmi using the paired strategy. Doing so will
+ apply (by default) MI tags to all reads of the form */A and */B where the /A
+ and /B suffixes with the same identifier denote reads that are derived from
+ opposite strands of the same source duplex molecule.
+
+
+ Reads from the same unique molecule are first partitioned by source strand and
+ assembled into single strand consensus molecules as described by
+ CallMolecularConsensusReads. Subsequently, for molecules that have at least
+ one observation of each strand, duplex consensus reads are assembled by
+ combining the evidence from the two single strand consensus reads.
+
+
+ Because of the nature of duplex sequencing, this tool does not support
+ fragment reads - if found in the input they are ignored. Similarly, read pairs
+ for which consensus reads cannot be generated for one or other read (R1 or R2)
+ are omitted from the output.
+
+
+ Consensus reads have a number of additional optional tags set in the resulting
+ BAM file. The tag names follow a pattern where the first letter (a, b or c)
+ denotes that the tag applies to the first single strand consensus (a), second
+ single-strand consensus (b) or the final duplex consensus (c). The second
+ letter is intended to capture the meaning of the tag (e.g. d=depth, m=min
+ depth, e=errors/error-rate) and is upper case for values that are one per read
+ and lower case for values that are one per base.
+label: fgbio_call_duplex_consensus_reads_1.2.0
+arguments:
+ - position: 0
+ valueFrom: |-
+ ${
+ if(inputs.memory_per_job && inputs.memory_overhead) {
+ if(inputs.memory_per_job % 1000 == 0) {
+ return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G"
+ }
+ else {
+ return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G"
+ }
+ }
+ else if (inputs.memory_per_job && !inputs.memory_overhead){
+ if(inputs.memory_per_job % 1000 == 0) {
+ return "-Xmx" + (inputs.memory_per_job/1000).toString() + "G"
+ }
+ else {
+ return "-Xmx" + Math.floor((inputs.memory_per_job/1000)).toString() + "G"
+ }
+ }
+ else if(!inputs.memory_per_job && inputs.memory_overhead){
+ return "-Xmx10G"
+ }
+ else {
+ return "-Xmx10G"
+ }
+ }
+ - position: 0
+ valueFrom: '-XX:-UseGCOverheadLimit'
+ - position: 1
+ valueFrom: CallDuplexConsensusReads
+ - position: 0
+ prefix: '--tmp-dir='
+ separate: false
+ valueFrom: |-
+ ${
+ if(inputs.temporary_directory)
+ return inputs.temporary_directory;
+ return runtime.tmpdir
+ }
+ - position: 2
+ prefix: '--output'
+ shellQuote: false
+ valueFrom: |-
+ ${
+ if(inputs.output_file_name)
+ return inputs.output_file_name;
+ return inputs.input.basename.replace(/.bam/,'_cons.bam');
+ }
+ - position: 2
+ prefix: '--threads'
+ valueFrom: |-
+ ${
+ if(inputs.number_of_threads)
+ return inputs.number_of_threads
+ return runtime.cores
+ }
+requirements:
+ - class: ShellCommandRequirement
+ - class: ResourceRequirement
+ ramMin: 20000
+ coresMin: 16
+ - class: DockerRequirement
+ dockerPull: 'ghcr.io/msk-access/fgbio:1.2.0'
+ - class: InlineJavascriptRequirement
+'dct:contributor':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'dct:creator':
+ - class: 'foaf:Organization'
+ 'foaf:member':
+ - class: 'foaf:Person'
+ 'foaf:mbox': 'mailto:shahr2@mskcc.org'
+ 'foaf:name': Ronak Shah
+ 'foaf:name': Memorial Sloan Kettering Cancer Center
+'doap:release':
+ - class: 'doap:Version'
+ 'doap:name': fgbio CallDuplexConsensusReads
+ 'doap:revision': 1.2.0
diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml
new file mode 100644
index 00000000..38dd911b
--- /dev/null
+++ b/fgbio_collect_duplex_seq_metrics_1.2.0/example_inputs.yaml
@@ -0,0 +1,16 @@
+input:
+ class: File
+ metadata: {}
+ path: /path/to/bam
+output_prefix: prefix
+intervals:
+ class: File?
+ metadata: {}
+ path: /path/to/intervals
+description: null
+duplex_umi_counts: null
+min_ab_reads: null
+min_ba_reads: null
+number_of_threads: null
+umi_tag: null
+mi_tag: null
diff --git a/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl
new file mode 100644
index 00000000..28697020
--- /dev/null
+++ b/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl
@@ -0,0 +1,294 @@
+class: CommandLineTool
+cwlVersion: v1.0
+$namespaces:
+ dct: 'http://purl.org/dc/terms/'
+ doap: 'http://usefulinc.com/ns/doap#'
+ foaf: 'http://xmlns.com/foaf/0.1/'
+ sbg: 'https://www.sevenbridges.com/'
+id: fgbio_collect_duplex_seq_metrics_1_2_0
+baseCommand:
+ - fgbio
+inputs:
+ - id: memory_per_job
+ type: int?
+ doc: Memory per job in megabytes
+ - id: memory_overhead
+ type: int?
+ doc: Memory overhead per job in megabytes
+ - id: number_of_threads
+ type: int?
+ - id: input
+ type: File
+ inputBinding:
+ position: 2
+ prefix: '--input'
+ doc: Input BAM file generated by GroupReadByUmi.
+ - id: output_prefix
+ type: string?
+ doc: Prefix of output files to write.
+ - id: intervals
+ type: File?
+ inputBinding:
+ position: 2
+ prefix: '--intervals'
+ doc: 'Optional set of intervals over which to restrict analysis. [Optional].'
+ - id: description
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--description'
+ doc: >-
+ Description of data set used to label plots. Defaults to sample/library.
+ [Optional].
+ - id: duplex_umi_counts
+ type: boolean?
+ inputBinding:
+ position: 2
+ prefix: '--duplex-umi-counts'
+ doc: >-
+ If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI
+ observations. [Optional].
+ - id: min_ab_reads
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-ab-reads'
+ doc: 'Minimum AB reads to call a tag family a ''duplex''. [Optional].'
+ - id: min_ba_reads
+ type: int?
+ inputBinding:
+ position: 2
+ prefix: '--min-ba-reads'
+ doc: 'Minimum BA reads to call a tag family a ''duplex''. [Optional].'
+ - id: umi_tag
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--umi-tag'
+ doc: 'The tag containing the raw UMI. [Optional].'
+ - id: mi_tag
+ type: string?
+ inputBinding:
+ position: 2
+ prefix: '--mi-tag'
+ doc: 'The output tag for UMI grouping. [Optional].'
+ - id: temporary_directory
+ type: string?
+ doc: 'Default value: null.'
+ - id: async_io
+ type: string?
+ inputBinding:
+ position: 0
+ separate: false
+ prefix: '--async-io='
+ doc: >-
+ 'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'
+outputs:
+ - id: fgbio_collect_duplex_seq_metrics_family_size
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.family_sizes.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.family_sizes.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_family_size
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_family_sizes.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_yield_metrics.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_umi_counts
+ type: File
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.umi_counts.txt'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.umi_counts.txt')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_qc
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if(inputs.output_prefix){
+ return inputs.output_prefix + '.duplex_qc.pdf'
+ }
+ else{
+ return inputs.input.basename.replace('.bam','.duplex_qc.pdf')
+ }
+ }
+ - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts
+ type: File?
+ outputBinding:
+ glob: |-
+ ${
+ if (inputs.output_prefix) {
+ return inputs.output_prefix + '.duplex_umi_counts.txt'
+ } else {
+ return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt')
+ }
+ }
+doc: >-
+ Collects a suite of metrics to QC duplex sequencing data.
+
+ Inputs ------
+
+ The input to this tool must be a BAM file that is either:
+
+ 1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it
+ was produced in) 2. A BAM file that has MI tags present on all reads (usually
+ set by 'GroupReadsByUmi' and has been sorted with
+ 'SortBam' into 'TemplateCoordinate' order.
+
+ Calculation of metrics may be restricted to a set of regions using the
+ '--intervals' parameter. This can significantly affect results as off-target
+ reads in duplex sequencing experiments often have very different properties
+ than on-target reads due to the lack of enrichment.
+
+ Several metrics are calculated related to the fraction of tag families that
+ have duplex coverage. The definition of "duplex" is controlled by the
+ '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any
+ tag family with at least one observation of each strand as a duplex, but this
+ could be made more stringent, e.g. by setting '--min-ab-reads=3
+ --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must
+ be the higher value.
+
+ Outputs -------
+
+ The following output files are produced:
+
+ 1.