diff --git a/.gitignore b/.gitignore
index f5ea8b8..dbce267 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,162 @@
-*.pyc
-*.pdf
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
 .DS_Store
diff --git a/PIPE-CLIP_manual.pdf b/PIPE-CLIP_manual.pdf
deleted file mode 100644
index 3a32a72..0000000
Binary files a/PIPE-CLIP_manual.pdf and /dev/null differ
diff --git a/Python.gitignore b/Python.gitignore
new file mode 100644
index 0000000..68bc17f
--- /dev/null
+++ b/Python.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
index 64b0060..19e6813 100644
--- a/README.md
+++ b/README.md
@@ -1,38 +1,48 @@
-PIPE-CLIP
-=========
-This is version 1.1.0
+# PIPE-CLIP
+
+[![Pypi Releases](https://img.shields.io/pypi/v/pipeclip.svg)](https://pypi.python.org/pypi/pipeclip)
+[![Downloads](https://pepy.tech/badge/pipeclip)](https://pepy.tech/project/pipeclip)
+
 Pipeline for CLIP-seq Analysis.
-- Galaxy site: the galaxy service is discontinued in 2019.
-- Publication: http://genomebiology.com/2014/15/1/R18
-- Google Code site: https://code.google.com/p/pipe-clip/
-
-
-Requirement:
--  Python 2.7; 
--  R 3.0 and above;
--  Perl 5 and above;
--  Python packages: pysam, pybedtools and ghmm;
--  R packages: MASS,VGAM and their dependencies.
--  Other packages: HOMER and annotation files
-
-
-Installation tips:
-- Make sure HOMER are in your PATH. You can test this by type "annotatePeaks.pl" from anywhere and you should get help information of this command.
- 
-How to use:
-
-- After unzip the package, you cd into the program folder and run PIPE-CLIP by typing:
-- python pipeclip.py -i input.bam -o output_prefix -c CLIP_type -l minimum_matchlength -m maximum_mismatchcount  -r Remove_PCR_duplicate -M FDR_for_mutations -C FDR_for_clusters -s species
-
-- -i input BAM
-- -o output prefix
-- -c CLIP type,[0,1,2,3] (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP
-- -l minimum match length
-- -m maximum mismatch count
-- -r method to remove PCR duplicate,[0,1,2] (0)No removal; (1)Remove by read start; (2)Remove by sequence
-- -M FDR to get significant mutations
-- -C FDR to get enriched clusters
-- -s species 
-Here, species might be hg19.
-
-Contact: Zhiqun.Xie@UTSouthwestern.edu
+This is a fork version (2.0.x) of [PIPE-CLIP](https://github.com/QBRC/PIPE-CLIP).
+
+## Requirement:
+
+- Python >=3.6;
+- Python packages: `pysam`, `pybedtools` and `rpy2`. (Python packages will be installed automaticallly)
+
+- R >=3.0;
+- R packages: `MASS`, `VGAM` and their dependencies. (R packages will be installed automatically)
+
+- Perl >=5.0
+- Other packages: `HOMER` (annotatePeaks.pl) and annotation files
+  - Make sure HOMER are in your PATH. You can test this by type "annotatePeaks.pl" from anywhere and you should get help information of this command.
+
+## How to install:
+
+```bash
+pip install pipeclip
+```
+
+## How to use:
+
+```bash
+pipeclip -i input.bam -o output_prefix -c CLIP_type -l minimum_matchlength -m maximum_mismatchcount -r Remove_PCR_duplicate -M FDR_for_mutations -C FDR_for_clusters -s species
+```
+
+- `-i` input BAM
+- `-t` control BAM
+- `-o` output prefix
+- `-c` CLIP type,[0,1,2,3] (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP
+- `-r` method to remove PCR duplicate,[0,1,2] (0)No removal; (1)Remove by read start; (2)Remove by sequence
+- `-l` minimum match length
+- `-m` maximum mismatch count
+- `-M` FDR to get significant mutations
+- `-C` FDR to get enriched clusters
+- `-s` species. (species might be hg19, mm10, mm9.) Leave blank to skip annotation step.
+
+## Footnote
+
+> Cite:
+>
+> - Chen, B., Yun, J., Kim, M.S. et al. PIPE-CLIP: a comprehensive online tool for CLIP-seq data analysis. Genome Biol 15, R18 (2014). https://doi.org/10.1186/gb-2014-15-1-r18
diff --git a/lib/Alignment.py b/lib/Alignment.py
deleted file mode 100644
index 4b1336f..0000000
--- a/lib/Alignment.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/python
-# programmer: beibei.chen@utsouthwestern.edu
-# Usage: definition of alignment files including BED and BAM
-
-import gzip
-import Enrich
-
-class BED():
-	def __init__(self,chr,start,stop,name,score,strand):
-		self.chr = chr
-		self.start = int(start)
-		self.stop = int(stop)
-		self.name = name
-		self.score = int(score)
-		self.strand = strand
-	
-	def __str__(self):
-		st = "\t".join([str(self.chr),str(self.start),str(self.stop),self.name,str(self.score),self.strand])
-		return st
-
-	def merge(self,read):
-		self.stop = read.stop
-		self.score += 1
-
-	def overlap(self,read):
-		if self.chr == read.chr and self.strand == read.strand:
-			if self.start <= read.stop and self.stop >=read.start:
-				return True
-			else:
-				return False
-	
-	def updateScore(self,s):
-		self.score = int(s)
-	
-	def increaseScore(self):
-		self.score += 1
-
-class ClusterBed(BED):
-	def __init__(self,chr,start,stop,name,score,strand):
-		self.pvalue = 0
-		self.qvalue = 0
-		self.sig = False
-		BED.__init__(self,chr,start,stop,name,score,strand)
-
-class MutationBed(BED):
-	def __init__(self,chr,start,stop,name,score,strand,type):
-		self.type = type#insertion,deletion,type of substitution
-		self.kvalue = 0
-		BED.__init__(self,chr,start,stop,name,score,strand)
-		self.pvalue = 0
-		self.qvalue = 0
-		self.sig = False
-
-	def updateK(self,k):
-		self.kvalue = k
-
-	def __str__(self):
-		st  = BED.__str__(self)
-		st += "\t"+self.type+"\t"+str(self.kvalue)
-		return st
-
-class CrosslinkingBed(BED):
-	def __init__(self,chr,start,stop,name,score,strand,clusterP,clusterQ,mStart,mName,mP):
-		self.fisherP = 0
-		self.clusterP = float(clusterP)
-		self.mutationP = [mP]
-		self.qvalue = float(clusterQ)
-		self.mutationStarts = [str(mStart)]
-		self.mutationNames = [mName]
-		BED.__init__(self,chr,start,stop,name,score,strand)
-	
-	def addMutation(self,mu):#mu is a instance
-		self.mutationNames.append(mu.name)
-		self.mutationStarts.append(str(mu.start))
-		self.mutationP.append(mu.pvalue)
-	
-	def fishertest(self):
-		fp = Enrich.fisherTest(self.clusterP,self.mutationP)
-		self.fisherP = fp
-
-
-class wiglist:
-	def __init__(self):
-		self.pos = []
-		self.value = []
-
-	def valueByPos(self,p):
-		try:
-			return self.value[self.pos.index(p)]
-		except:
-			return False
-
-	def update(self,p,v):
-		self.pos.append(p)
-		self.value.append(v)
-
diff --git a/lib/CLIP.py b/lib/CLIP.py
deleted file mode 100644
index b49327f..0000000
--- a/lib/CLIP.py
+++ /dev/null
@@ -1,568 +0,0 @@
-'''Define CLIP class'''
-import sys
-import gzip
-import copy
-import logging
-import math
-import pysam
-import random
-import Utils
-import Alignment
-import Mutation2
-import OptValidator
-import subprocess
-import datetime
-import time
-import gc
-import os
-
-gc.enable()
-OptValidator.opt_validate()
-
-
-class CLIP:
-	def __init__(self,fileaddr,prefix):
-		self.filepath = fileaddr
-		self.originalBAM = None
-		self.originalMapped = 0
-		self.refInfo = []
-		self.posfilteredBAM = None
-		self.negfilteredBAM = None
-		self.filteredAlignment = 0
-		self.type = 0
-		self.outprefix = prefix
-		self.currentGroupKey = "None"
-		self.currentGroup = [] #Used in rmdup
-		self.previousQul = [0,0,0]#for rmdup,[matchlen,mapq,mismatch]
-		self.clusters = []
-		self.clusters_plus = []
-		self.clusters_minus = []
-		self.clusterCount = 0
-		self.currentCluster_plus = Alignment.BED("",0,0,"",0,"+")
-		self.currentCluster_minus = Alignment.BED("",0,0,"",0,"-")
-		#self.sigClusters = {}
-		self.mutations = {} #Dictionary of bed instance
-		self.mutationCount = 0
-		self.mutationList = []
-		self.sigMutations = {}#same as sigClusters
-		self.kmpair = {}
-		self.sigMutationCount = 0
-		self.sigClusterCount = 0
-		self.coverage = 0 #"reads coverage of this sample"
-		self.bamheader = None
-		self.crosslinking = {}
-		self.crosslinkingMutations = []
-
-	def __str__(self):
-		pass
-
-	def testInput(self):
-		'''Test the input file format, modify self.filepath and return bool
-		Status: True:file is ready to use; False: wrong file, program stop
-		'''
-		#test if file has header
-		try:
-			self.header = pysam.view("-H",self.filepath)
-		except:
-			try:
-				self.header = pysam.view("-SH",self.filepath)
-			except:
-				logging.error("Input file does not have header, please check your file. Program quit")
-				return (False,"None")
-		#Header test passed, test if it is BAM
-		try:
-			infile = gzip.open(self.filepath)
-			infile.readline(10)
-		except:#cannot read line, should be sam
-			logging.info("Input is SAM, converting to BAM...")
-			bamout = ".".join(self.filepath.split(".")[0:-1])+"."+"bam"
-			infile = pysam.Samfile(self.filepath,"r",header=self.header)
-			#print >> sys.stderr,pysam.view("-SH",infile)
-			outfile = pysam.Samfile(bamout,"wb",template=infile)
-			for i in infile.fetch():
-				outfile.write(i)
-			self.filepath = bamout
-		#Now the infile is BAM,check if it is sorted
-		if Utils.is_sorted(self.header):
-			pysam.index(self.filepath)
-			return True
-		else:#sort the BAM
-			logging.info("Input is not sorted, sorting file...")
-			bamsort = ".".join(self.filepath.split(".")[0:-1])+"."+"sort"
-			pysam.sort(self.filepath,bamsort)
-			pysam.index(bamsort+".bam")
-			self.filepath = bamsort+".bam" # change input file path
-			self.header = pysam.view("-H",bamsort+".bam")
-			logging.info("Input file sorted")
-			#if Utils.is_sorted(self.header):
-			#	print >> sys.stderr, "The file is sorted"
-			return True
-			
-	def readfile(self):
-		try:
-			self.originalBAM = pysam.Samfile(self.filepath,"rb")
-			self.refInfo = zip(self.originalBAM.references,self.originalBAM.lengths)
-			return True
-		except IOError,message:
-			logging.error("Cannot open input file")
-			return False
-
-#	def printFilteredReads(self):
-#		for i in self.filteredAlignment:
-#			print i
-	
-	def printClusters(self):
-		outfile = open(self.outprefix+".clusters.bed","w")
-		for i in [self.clusters_plus,self.clusters_minus]:
-			for j in i:
-				print >>outfile,j
-		outfile.close()
-
-	def printMutations(self):
-		outfile = open(self.outprefix+".mutations.bed","w")
-		for i in self.mutations.values():
-			print >>outfile,i
-		outfile.close()
-
-
-	def printMutations_chr(self):
-		fhs = {}#file handle dic, key is chr, value is file handle
-		for chr,chrlen in self.refInfo:
-			outfile = open(self.outprefix+"."+chr+".mutations.bed","w")
-			fhs[chr] = outfile
-		for i in self.mutations.itervalues():
-			print >>fhs[i.chr],i
-		#clean up
-		for fh in fhs.itervalues():
-			fh.close()
-		del fhs
-
-
-	def printMutationSingleChr(self,chr):
-		outfile = open(self.outprefix+"."+chr+".mutations.bed","w")
-		for i in self.mutations.itervalues():
-			print >>outfile,i
-		#clean up
-		outfile.close()
-	
-	def printReliableMutations(self):
-		outfile = open(self.outprefix+".reliableMutations.pipeclip.bed","w")
-		header = "#chr\tstart\tstop\tmutation_name\tM_value\tstrand\ttype\tK_value\tp_value\tfdr"
-		print >> outfile,header
-		self.printEnrichedItem(self.sigMutations,outfile)
-
-	
-	def printEnrichedClusters(self):
-		outfile = open(self.outprefix+".enrichedClusters.pipeclip.bed","w")
-		header = "#chr\tstart\tstop\tcluster_name\tread_count\tstrand\tp_value\tfdr"
-		print >> outfile,header
-		self.printReliableList(self.clusters,outfile)
-		return [self.outprefix+".enrichedClusters.pipeclip.bed"]
-	
-	def printCrosslinkingMutations(self):
-		outfile = open(self.outprefix+".crosslinkingMutations.pipeclip.bed","w")
-		header = "#chr\tstart\tstop\tmutation_name\tM_value\tstrand\ttype\tK_value\tp_value\tfdr"
-		print >> outfile,header
-		self.printReliableList(self.crosslinkingMutations,outfile)
-
-
-	def printReliableList(self,mylist,fh):
-		for i in mylist:
-			if i.sig:
-				st = i.__str__()
-				st += "\t"+str(i.pvalue)+"\t"+str(i.qvalue)
-				print >>fh,st
-	
-	def printList(self,mylist,fh):
-		for i in mylist:
-			print >>fh,i
-
-	def printEnrichedItem(self,dic,fh):
-		for k in dic.keys():
-			for i in dic[k]:
-				st = i.__str__()
-				st += "\t"+str(i.pvalue)+"\t"+str(i.qvalue)
-				print >> fh,st
-	
-	def printCrosslinkingSites(self):
-		output = self.outprefix
-		header = "#chr\tstart\tstop\tcluster_name\treads_count\tstrand\tcluster_fdr\tcrosslinking_fisherP\tmutation_pos\tmutation_name"
-		if self.type == 0:#HITS-CLIP, three output
-			output_del = open(output+"_deletion_crosslinking.pipeclip.txt","w")
-			output_sub = open(output+"_substitution_crosslinking.pipeclip.txt","w")
-			output_ins = open(output+"_insertion_crosslinking.pipeclip.txt","w")
-			print >> output_del,header
-			print >> output_sub,header
-			print >> output_ins,header
-		else:
-			output_name = open(output+"_crosslinking.pipeclip.txt","w")
-			print >> output_name,header
-		for k in self.crosslinking.keys():
-			st = self.crosslinking[k].__str__()
-			st += "\t"+"\t".join([str(self.crosslinking[k].qvalue),str(self.crosslinking[k].fisherP)])
-			st += "\t"+",".join(self.crosslinking[k].mutationStarts)
-			st += "\t"+",".join(self.crosslinking[k].mutationNames)
-			if self.type == 0:
-				output_key =  k.split("_")[-1]
-				if output_key == "Deletion":
-					print >> output_del,st
-				elif output_key == "Insertion":
-					print >> output_ins,st
-				elif output_key == "Substitution":
-					print >> output_sub,st
-			else:
-				print >> output_name,st
-		
-		if self.type == 0:
-			output_del.close()
-			output_sub.close()
-			output_ins.close()
-			return [output+"_insertion_crosslinking.pipeclip.txt",output+"_deletion_crosslinking.pipeclip.txt",output+"_substitution_crosslinking.pipeclip.txt"]
-		else:
-			output_name.close()
-			return [output+"_crosslinking.pipeclip.txt"]
-		
-
-	def updatePreviousQul(self,n,q,m):
-		self.previousQul[0] = n
-		self.previousQul[1] = q
-		self.previousQul[2] = m
-	
-
-	def updateCurrentGroup(self,read,mlen,mis):
-		'''Compare read to current duplication group parameters, determine to add to, to drop or to replace, make sure duplication group only has reads with best quality'''
-		if mlen >= self.previousQul[0] and read.mapq >= self.previousQul[1] and mis <= self.previousQul[2]:# consider to append or replace only when read has no worse quality
-			if mlen > self.previousQul[0] or read.mapq > self.previousQul[1] or mis < self.previousQul[2]:# read has better quality,replace
-				self.currentGroup = [read]
-				self.updatePreviousQul(mlen,read.mapq,mis)
-			else:
-				self.currentGroup.append(read)
-	
-	def iniDupGroupInfo(self,read,group_key,mlength,mismatch):
-		self.currentGroupKey = group_key
-		self.currentGroup = [read]
-		self.updatePreviousQul(mlength,read.mapq,mismatch)
-
-	def rmdup(self):
-		'''Return random one read of highest quality from list'''
-		#print "get one from group"
-		if len(self.currentGroup)==1:
-			#print self.currentGroup[0]
-			return self.currentGroup[0]
-		else:
-			random.seed(1)
-			index = random.randint(0,len(self.currentGroup)-1)
-			#print self.currentGroup[index]
-			return self.currentGroup[index]
-
-	def updateCluster(self,read):
-		'''Cluster new read to known clusters and update cluster reads count'''
-		strandDic = {"True":"-","False":"+"}
-		clusterName = "cluster"+"_"+str(len(self.clusters_plus)+len(self.clusters_minus)+1)
-		newRead = Alignment.ClusterBed(self.originalBAM.getrname(read.tid),read.pos,read.pos+len(read.seq),clusterName,1,strandDic[str(read.is_reverse)])
-		if read.is_reverse:
-			if self.currentCluster_minus.chr == "": #Initiate cluster
-				self.currentCluster_minus = newRead
-				self.clusters_minus.append(self.currentCluster_minus)
-				self.clusterCount += 1
-			else:
-				if self.currentCluster_minus.overlap(newRead):
-					self.currentCluster_minus.merge(newRead)
-					self.clusters_minus[-1]=self.currentCluster_minus
-				else:#New read is a new cluster
-					#self.clusters.append(self.currentCluster)
-					self.currentCluster_minus = newRead
-					self.clusters_minus.append(self.currentCluster_minus)
-					self.clusterCount+=1
-		else:
-			if self.currentCluster_plus.chr == "": #Initiate cluster
-				self.currentCluster_plus = newRead
-				self.clusters_plus.append(self.currentCluster_plus)
-				self.clusterCount+=1
-			else:
-				if self.currentCluster_plus.overlap(newRead):
-					self.currentCluster_plus.merge(newRead)
-					self.clusters_plus[-1]=self.currentCluster_plus
-				else:#New read is a new cluster
-					#self.clusters.append(self.currentCluster)
-					self.currentCluster_plus = newRead
-					self.clusters_plus.append(self.currentCluster_plus)
-					self.clusterCount += 1
-
-	
-	def updateMutation(self,read,mis):
-		mutations = []
-		if self.type == 3:#iclip,find truncation
-			mutations = Mutation2.getTruncations(self.originalBAM,read)
-		else:
-			mutations = Mutation2.getMutations(self.originalBAM,read)
-			if self.type ==1:
-				mutation_filter = Utils.filterMutations(mutations,"T->C",True)
-				mutations = mutation_filter
-			elif self.type ==2:
-				mutation_filter = Utils.filterMutations(mutations,"G->A",True)
-				mutations = mutation_filter
-		#logging.debug("read %s " % read)
-		if len(mutations)>0:
-			for m in mutations:
-				#print m
-				self.mutationCount += 1
-				m_key = "_".join([m.chr,str(m.start),m.strand,m.type])
-				if self.mutations.has_key(m_key):
-					self.mutations[m_key].increaseScore()
-				else:
-					self.mutations[m_key]=m
-	
-	def updateCLIPinfo(self,read,matchlen,miscount):
-		'''Update sample coverage info, clustering, mutation info'''
-		#logging.debug("read %s, cigar %s,mismatch %d" % (read.qname,read.cigar,miscount))
-		#update sample coverage info
-		self.coverage += matchlen
-		#update cluster info
-		self.updateCluster(read)
-		#update mutation info
-		if miscount > 0:
-			self.updateMutation(read,miscount)
-
-	def addSigToDic(self,dic,mu):
-		'''Add new mutation into the dictionary.Mutations should be sorted'''
-		if dic.has_key(mu.chr):
-			dic[mu.chr].append(mu)
-		else:
-			dic[mu.chr] = [mu]
-	
-	def getCrosslinking(self):
-		'''Merge enriched clusters and reliable mutations together
-				Call Enrich.fisherTest() to calculate joint p vlaue
-		'''
-		for cluster in self.clusters:
-			#logging.debug("P value of cluster is %f" % cluster.pvalue)
-			if cluster.sig and self.sigMutations.has_key(cluster.chr):
-				for mutation in self.sigMutations[cluster.chr]:
-					#logging.debug("Cluster loc %d,%d ; Mutation loc %d,%d, mutation type %s" % (cluster.start,cluster.stop,mutation.start,mutation.stop,mutation.type))
-					if cluster.overlap(mutation):
-						#logging.debug("Overlapped")
-						if self.type == 0:#HITS-CLIP
-							mutation_key = mutation.type.split("->")[0]
-							if mutation_key in ["A","C","G","T"]:
-								mutation_key = "Substitution"
-							cross_key = cluster.name+"_"+mutation_key
-						else:
-							cross_key = cluster.name
-						if self.crosslinking.has_key(cross_key):
-							#logging.debug("Existing mutation pvalue:",self.crosslinking[cluster.name].mutationP)
-							self.crosslinking[cross_key].addMutation(mutation)
-							self.crosslinkingMutations.append(mutation)
-						else:
-							self.crosslinking[cross_key] = Alignment.CrosslinkingBed(cluster.chr,cluster.start,cluster.stop,cluster.name,cluster.score,cluster.strand,cluster.pvalue,cluster.qvalue,mutation.start,mutation.name,mutation.pvalue)
-							self.crosslinkingMutations.append(mutation)
-		#start to calculate fisher test p value
-		for k in self.crosslinking.iterkeys():
-			self.crosslinking[k].fishertest()
-
-
-	def filter(self,matchLen,mismatch,cliptype,duprm):
-		'''Filter the input BAM file according to parameters. Make clusters and mutations at the same time'''
-		logging.info("Start to filter alignment using parameters:")
-		logging.info("match length:%d" % (matchLen))
-		logging.info("mismatch count: %d" % (mismatch))
-		logging.info("CLIP type:%s" % (str(cliptype)))
-		logging.info("Rmdup code:%s" % (str(duprm)))
-		logging.info("There are %d reads in origianl input file" % (self.originalBAM.mapped))
-		#print >> sys.stderr,zip(self.originalBAM.references,self.originalBAM.lengths)
-		#sys_info = os.system("free -m")
-		#logging.debug(sys_info)
-		#outBAM = pysam.Samfile(outprefix+".filtered.bam","wb",template=self.originalBAM)
-		self.originalMapped = self.originalBAM.mapped
-		outBAM_pos = pysam.Samfile(self.outprefix+".pos.filtered.bam","wb",template=self.originalBAM)
-		outBAM_neg = pysam.Samfile(self.outprefix+".neg.filtered.bam","wb",template=self.originalBAM)
-		self.type = cliptype
-		if cliptype == 3:#make sure there is no rmdup for iCLIP data
-			duprm = 0
-		count = 0
-		start_time = datetime.datetime.now()
-		for alignment in self.originalBAM:
-			#print "Now processing",alignment.qname
-			if not alignment.cigar : #reads is unmapped
-				continue
-			count += 1
-			if count % 500000 ==0:
-				stop_time = datetime.datetime.now()
-				logging.debug("Processed %d reads in %s" % (count,str(stop_time-start_time)))
-				start_time = stop_time
-			flag,mlen,mis = Utils.readQuaFilter(alignment,matchLen,mismatch)
-			if flag:
-				#print "Qualified read"
-				#print	alignment
-				#print "current Gourp key",self.currentGroupKey
-				if duprm > 0:
-					#get group key
-					if duprm == 1:
-						groupkey = Utils.rmdupKey_Start(alignment)
-					elif duprm == 2:
-						groupkey = Utils.rmdupKey_Seq(alignment)
-					#check current group
-					if groupkey == self.currentGroupKey:#overlap with current group, update group
-						self.updateCurrentGroup(alignment,mlen,mis)
-					else:#get read from current group and discard it, use current read to start a new current group
-						if self.currentGroupKey!="None":#current group exists
-							keep = self.rmdup()
-							#logging.debug("Pop out read to keep %s" % keep)
-							self.currentGroup = []
-							self.filteredAlignment += 1
-							flag,mlen,mis = Utils.readQuaFilter(keep,matchLen,mismatch)
-							self.updateCLIPinfo(keep,mlen,mis)
-							#outBAM.write(keep)
-							if keep.is_reverse:
-								outBAM_neg.write(keep)
-							else:
-								outBAM_pos.write(keep)
-						self.iniDupGroupInfo(alignment,groupkey,mlen,mis)#make new group using current alignment
-				else:#there is no rmdup
-					#logging.debug("Good read, update clip info %s" % read.qname)
-					self.filteredAlignment+=1
-					self.updateCLIPinfo(alignment,mlen,mis)
-					#outBAM.write(alignment)
-					if alignment.is_reverse:
-						outBAM_neg.write(alignment)
-					else:
-						outBAM_pos.write(alignment)
-		#clean up the final dupGroup, if rmdup==0, there is no final dupGroup
-		if len(self.currentGroup)>0:
-			keep = self.rmdup()
-			self.currentGroup = []
-			self.filteredAlignment+=1
-			flag,mlen,mis = Utils.readQuaFilter(keep,matchLen,mismatch)
-			self.updateCLIPinfo(keep,mlen,mis)
-			#outBAM.write(alignment)
-			if keep.is_reverse:
-				outBAM_neg.write(keep)
-			else:
-				outBAM_pos.write(keep)
-		#Logging CLIP sample information
-		#outBAM.close()
-		outBAM_pos.close()
-		outBAM_neg.close()
-		#pysam.index(outprefix+".filtered.bam")
-		pysam.index(self.outprefix+".pos.filtered.bam")
-		pysam.index(self.outprefix+".neg.filtered.bam")
-		self.posfilteredBAM = self.outprefix+".pos.filtered.bam"
-		self.negfilteredBAM = self.outprefix+".neg.filtered.bam"
-		self.clusters = self.clusters_plus + self.clusters_minus
-		self.printClusters()
-		self.printMutations_chr()
-		#Clean up variables
-		self.originalBAM = None
-		self.clusters_plus = None
-		self.clusters_minus = None
-		self.mutations = None
-		gc.collect()
-		logging.debug("After filtering, %d reads left" % (self.filteredAlignment))
-		logging.debug("There are %d clusters in total" % (self.clusterCount))
-		logging.debug("There are %d mutations in total" % (self.mutationCount))
-
-
-	def filter2(self,matchLen,mismatch,cliptype,duprm):
-		'''Filter the input BAM file according to parameters. Make clusters and mutations at the same time'''
-		logging.info("Start to filter alignment using parameters:")
-		logging.info("match length:%d" % (matchLen))
-		logging.info("mismatch count: %d" % (mismatch))
-		logging.info("CLIP type:%s" % (str(cliptype)))
-		logging.info("Rmdup code:%s" % (str(duprm)))
-		logging.info("There are %d reads in origianl input file" % (self.originalBAM.mapped))
-		
-		self.originalMapped = self.originalBAM.mapped
-		outBAM_pos = pysam.Samfile(self.outprefix+".pos.filtered.bam","wb",template=self.originalBAM)
-		outBAM_neg = pysam.Samfile(self.outprefix+".neg.filtered.bam","wb",template=self.originalBAM)
-		self.type = cliptype
-		if cliptype == 3:#make sure there is no rmdup for iCLIP data
-			duprm = 0
-		count = 0
-		start_time = datetime.datetime.now()
-		currentChr = ""
-		for alignment in self.originalBAM:
-			if not alignment.cigar : #reads is unmapped
-				continue
-			c_chr = self.originalBAM.getrname(alignment.tid)
-			if currentChr == "":
-				currentChr = c_chr
-			elif c_chr!=currentChr:
-				#logging.debug("Mutation sites for Current ref %s is %d" % (currentChr,len(self.mutations.keys())))
-				self.printMutationSingleChr(currentChr)
-				self.mutations = None
-				gc.collect()
-				self.mutations = {}
-				currentChr = c_chr
-			count += 1
-			if count % 5000000 ==0:
-				stop_time = datetime.datetime.now()
-				logging.debug("Processed %d reads in %s" % (count,str(stop_time-start_time)))
-				start_time = stop_time
-			flag,mlen,mis = Utils.readQuaFilter(alignment,matchLen,mismatch)
-			if flag:
-				if duprm > 0:
-					#get group key
-					if duprm == 1:
-						groupkey = Utils.rmdupKey_Start(alignment)
-					elif duprm == 2:
-						groupkey = Utils.rmdupKey_Seq(alignment)
-					#check current group
-					if groupkey == self.currentGroupKey:#overlap with current group, update group
-						self.updateCurrentGroup(alignment,mlen,mis)
-					else:#get read from current group and discard it, use current read to start a new current group
-						if self.currentGroupKey!="None":#current group exists
-							keep = self.rmdup()
-							#logging.debug("Pop out read to keep %s" % keep)
-							self.currentGroup = []
-							self.filteredAlignment += 1
-							flag,mlen,mis = Utils.readQuaFilter(keep,matchLen,mismatch)
-							self.updateCLIPinfo(keep,mlen,mis)
-							#outBAM.write(keep)
-							if keep.is_reverse:
-								outBAM_neg.write(keep)
-							else:
-								outBAM_pos.write(keep)
-						self.iniDupGroupInfo(alignment,groupkey,mlen,mis)#make new group using current alignment
-				else:#there is no rmdup
-					#logging.debug("Good read, update clip info %s" % read.qname)
-					self.filteredAlignment+=1
-					self.updateCLIPinfo(alignment,mlen,mis)
-					#outBAM.write(alignment)
-					if alignment.is_reverse:
-						outBAM_neg.write(alignment)
-					else:
-						outBAM_pos.write(alignment)
-		#clean up the final dupGroup, if rmdup==0, there is no final dupGroup
-		if len(self.currentGroup)>0:
-			keep = self.rmdup()
-			self.currentGroup = []
-			self.filteredAlignment+=1
-			flag,mlen,mis = Utils.readQuaFilter(keep,matchLen,mismatch)
-			self.updateCLIPinfo(keep,mlen,mis)
-			#outBAM.write(alignment)
-			if keep.is_reverse:
-				outBAM_neg.write(keep)
-			else:
-				outBAM_pos.write(keep)
-		#clean up current mutation hash
-		self.printMutationSingleChr(currentChr)
-		self.mutations = None
-		#Logging CLIP sample information
-		#outBAM.close()
-		outBAM_pos.close()
-		outBAM_neg.close()
-		pysam.index(self.outprefix+".pos.filtered.bam")
-		pysam.index(self.outprefix+".neg.filtered.bam")
-		self.posfilteredBAM = self.outprefix+".pos.filtered.bam"
-		self.negfilteredBAM = self.outprefix+".neg.filtered.bam"
-		self.clusters = self.clusters_plus + self.clusters_minus
-		self.printClusters()
-		#Clean up variables
-		self.originalBAM = None
-		self.clusters_plus = None
-		self.clusters_minus = None
-		self.mutations = None
-		gc.collect()
-		logging.debug("After filtering, %d reads left" % (self.filteredAlignment))
-		logging.debug("There are %d clusters in total" % (self.clusterCount))
-		logging.debug("There are %d mutations in total" % (self.mutationCount))
diff --git a/lib/Enrich.py b/lib/Enrich.py
deleted file mode 100755
index 89c322c..0000000
--- a/lib/Enrich.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/python
-# Programmer : beibei.chen@utsouthwestern.edu
-# Usage: Get reliable mutations using binomial distribution
-# Input: Filtered BAM, reads coverage (generated by SAMFilter.py), mutation file
-# Output: BED 
-# Last modified: 18 Sep.2013
-
-
-import sys
-import re
-import random
-import string
-import logging
-import pysam
-from pysam import *
-import argparse as ap
-from pybedtools import BedTool
-import copy
-import rpy2.robjects as robject
-from rpy2.robjects.packages import importr
-from rpy2.robjects import FloatVector
-import math
-from collections import Counter
-import subprocess
-import OptValidator
-import datetime
-import time
-import Utils
-import os
-import gc
-import Alignment
-import os
-
-OptValidator.opt_validate()
-gc.enable()
-
-stats = importr('stats')
-
-def freqRank(readCount,rev=False):
-	key = sorted(readCount.keys(),reverse=rev)
-	r_rank = {}
-	rank = 0
-	for i in key:
-		rank += readCount[i]
-		r_rank[i] =rank
-	return r_rank
-
-def BH(pvalue,pRank,N):
-	a = N/float(pRank)
-	q = a * pvalue
-	qva = max(pvalue, q)
-	return qva
-
-#@profile
-def KMvalue(posmapfile,negmapfile,mufile):
-		'''
-		Calculate K(coverage) value for each mutation location
-		Mutations are already unique.
-		'''
-		km = []#store mutations with updated k value
-		km_pair = {}#Dic of count tuples of (k,m),key:"K_M"
-		count = 0
-		logging.debug("make wig %s" % str(datetime.datetime.now()))
-		poswig = Utils.makeWig(posmapfile)
-		negwig = Utils.makeWig(negmapfile)
-		start_time = datetime.datetime.now()
-		logging.debug("finish making wig %s" %  str(start_time))
-		for item in mufile:
-			count += 1
-			if count % 5000 == 0:
-				stop_time = datetime.datetime.now()
-				logging.debug("Counting K-M for %d mutation sites, using %s" % (count,str(stop_time-start_time)))
-				start_time = stop_time
-			st = []
-			strand = item.strand 
-			M = item.score
-			K = 0
-			#logging.debug("Time begin to pileup is %s" % (str(datetime.datetime.now())))
-			if strand == "+":	
-				try:	
-					K = poswig[item.chr][str(item.start)]
-				except:
-					continue
-			
-			elif strand == "-":
-				try:	
-					K = negwig[item.chr][str(item.start)]
-				except:
-					continue
-			if K>=M:
-				item.updateK(K)
-				#print item
-				#logging.debug("K value for item %s is %d" % (item, K))
-				pair_name = str(K)+"_"+str(M)
-				if km_pair.has_key(pair_name):
-					km_pair[pair_name] += 1
-				else:
-					km_pair[pair_name] = 1
-				#km.append(item)
-		gc.collect()
-		return km_pair
-
-def KMvalue_test(clip,mutations,chr,chrlen):
-		'''
-		Calculate K(coverage) value for each mutation location
-		Mutations are already unique.
-		'''
-		km = []#store mutations with updated k value
-		count = 0
-		#logging.debug("make wig %s" % str(datetime.datetime.now()))
-		posBAM = pysam.Samfile(clip.posfilteredBAM,"rb")
-		negBAM = pysam.Samfile(clip.negfilteredBAM,"rb")
-		start_time = datetime.datetime.now()
-		poswig = Utils.makeWigByChr(posBAM,chr)
-		negwig = Utils.makeWigByChr(negBAM,chr)
-		stop_time = datetime.datetime.now()
-		#logging.debug("Finished making wig for %s using %s" % (chr,str(stop_time-start_time)))
-		start_time = stop_time
-		for item in mutations:
-			count += 1
-			if count % 100000 == 0:
-				stop_time = datetime.datetime.now()
-				logging.debug("Counting K-M for %d mutation sites, using %s" % (count,str(stop_time-start_time)))
-				start_time = stop_time
-			st = []
-			M = item.score
-			K = 0
-			strand = item.strand
-			if strand == "+":
-				try:
-					K = poswig[item.start]
-				except:
-					#log.warning("Did not find mutation in poswig")
-					continue
-			elif strand == "-":
-				try:
-					K = negwig[item.start]
-				except:
-					continue
-			if K>=M:
-				item.updateK(K)
-				pair_name = str(K)+"_"+str(M)
-				if clip.kmpair.has_key(pair_name):
-					clip.kmpair[pair_name] += 1
-				else:
-					clip.kmpair[pair_name] = 1
-		posBAM.close()
-		negBAM.close()
-
-def uniq(b): #b is a list
-	uniqElements = []
-	for i in b:
-		if uniqElements.count(i)==0:
-			uniqElements.append(i)
-	uniqElements.sort()
-	return uniqElements
-
-def mutationEnrich(clip,threshold=0.01):
-	coverage = clip.coverage *1.0
-	totalMuCount = clip.mutationCount
-	mutations = []
-	total_test = 0
-	for chr,chrlen in clip.refInfo:
-		#logging.debug(chr)
-		try:
-			mufile = open(clip.outprefix+"."+chr+".mutations.bed")
-		except:
-			logging.info("Cannot open mutation file %s , move on." % (clip.outprefix+"."+chr+".mutations.bed"))
-			continue
-		for record in mufile:
-			total_test += 1
-			info = record.rstrip().split("\t")
-			new_mu = Alignment.MutationBed(info[0],int(info[1]),int(info[2]),info[3],int(info[4]),info[5],info[6])
-			mutations.append(new_mu)
-			try:
-				pass
-        #os.remove(clip.outprefix+"."+chr+".mutations.bed")
-			except:
-				pass
-		logging.debug(len(mutations))
-		KMvalue_test(clip,mutations,chr,chrlen)#check after doing KM, if clip.mutations changed
-	try:
-		os.remove(clip.posfilteredBAM)
-		os.remove(clip.negfilteredBAM)
-		os.remove(clip.posfilteredBAM+".bai")
-		os.remove(clip.negfilteredBAM+".bai")
-	except:
-		pass
-	del clip.posfilteredBAM 
-	del clip.negfilteredBAM 
-	gc.collect()#logging.info("Finished K-M counting, starting fitting.")
-	
-	R = robject.r
-	reliableList = []
-	P = totalMuCount/coverage
-	km_p = {}#store km and corresponding p value
-	pvalues = []
-	for k in clip.kmpair:#KM_test:
-		parameters = k.split("_")
-		p = R.pbinom(int(parameters[1])-1,int(parameters[0]),P,False)[0]	
-		pvalues.append(p)
-		km_p[k]=p
-	pCount = dict(Counter(pvalues))
-	pRank = freqRank(pCount,True)
-	pqDic={}
-	for i in pRank.keys():
-		try:
-			p_rank = pRank[i]
-			q_value = BH(i,p_rank,total_test)
-			pqDic[i]=q_value
-		except:
-			print >> sys.stderr,"Cannot find p value in dictionary"
-			continue
-	count = 0
-	for mu in mutations:
-		name = str(mu.kvalue)+"_"+str(mu.score)
-		try:
-			mu.pvalue = km_p[name]
-		except:
-			#logging.debug("Problem with %s" % mu)
-			continue
-		mu.qvalue = pqDic[mu.pvalue]
-		if mu.qvalue <= threshold:
-			count += 1
-			new_mutationName = "Mutation_"+str(count)
-			mu.name = new_mutationName
-			mu.sig = True
-			clip.sigMutationCount+=1
-			clip.addSigToDic(clip.sigMutations,mu)
-			
-	mutations = None
-	gc.collect()
-
-def mutationEnrichWCtrl(clip,ctrlclip,threshold=0.01):
-	coverage = ctrlclip.coverage *1.0
-	totalMuCount = ctrlclip.mutationCount
-	mutations = []
-	total_test = 0
-	for chr,chrlen in clip.refInfo:
-		try:
-			mufile = open(clip.outprefix+"."+chr+".mutations.bed")
-		except:
-			logging.info("Cannot open mutation file %s , move on." % (clip.outprefix+"."+chr+".mutations.bed"))
-			continue
-		for record in mufile:
-			total_test += 1
-			info = record.rstrip().split("\t")
-			new_mu = Alignment.MutationBed(info[0],int(info[1]),int(info[2]),info[3],int(info[4]),info[5],info[6])
-			mutations.append(new_mu)
-			try:
-				os.remove(clip.outprefix+"."+chr+".mutations.bed")
-				os.remove(controlclip.outprefix+"."+chr+".mutations.bed")
-			except:
-				pass
-		KM_test = KMvalue_test(clip,mutations,chr,chrlen)#check after doing KM, if clip.mutations changed
-	try:
-		os.remove(clip.posfilteredBAM)
-		os.remove(clip.negfilteredBAM)
-		os.remove(clip.posfilteredBAM+".bai")
-		os.remove(clip.negfilteredBAM+".bai")
-	except:
-		pass
-	del clip.posfilteredBAM 
-	del clip.negfilteredBAM 
-	gc.collect()#logging.info("Finished K-M counting, starting fitting.")
-	
-	R = robject.r
-	reliableList = []
-	P = totalMuCount/coverage
-	km_p = {}#store km and corresponding p value
-	pvalues = []
-	for k in clip.kmpair:#KM_test:
-		parameters = k.split("_")
-		p = R.pbinom(int(parameters[1])-1,int(parameters[0]),P,False)[0]	
-		pvalues.append(p)
-		km_p[k]=p
-	pCount = dict(Counter(pvalues))
-	pRank = freqRank(pCount,True)
-	pqDic={}
-	for i in pRank.keys():
-		try:
-			p_rank = pRank[i]
-			q_value = BH(i,p_rank,total_test)
-			pqDic[i]=q_value
-		except:
-			print >> sys.stderr,"Cannot find p value in dictionary"
-			continue
-	count = 0
-	for mu in mutations:
-		name = str(mu.kvalue)+"_"+str(mu.score)
-		try:
-			mu.pvalue = km_p[name]
-		except:
-			#logging.debug("Problem with %s" % mu)
-			continue
-		mu.qvalue = pqDic[mu.pvalue]
-		if mu.qvalue <= threshold:
-			count += 1
-			new_mutationName = "Mutation_"+str(count)
-			mu.name = new_mutationName
-			mu.sig = True
-			clip.sigMutationCount+=1
-			clip.addSigToDic(clip.sigMutations,mu)
-			
-	mutations = None
-	gc.collect()
-
-def clusterEnrich(clip,threshold=0.01):
-	cluster_filename = clip.outprefix+".clusters.bed"#clip.filepath.split("/")[-1].split(".")[0]
-	#Call R code and get result
-	epsilon = [0.01,0.15,0.1]
-	step = [0.1,0.08,0.05]
-	for index in range(len(epsilon)):
-		e = epsilon[index]
-		s = step[index]
-		r_args = ['Rscript','lib/ZTNB_tryCatch.R',cluster_filename,str(threshold),str(e),str(s)]
-		gc.collect()
-		p = subprocess.Popen(r_args)
-		stdout_value = p.communicate()[0]
-		try:
-			r_output_log = open(cluster_filename+".pipeclip.ztnblog","r")
-			#logging.debug("Log file opened")
-			flag = r_output_log.read(1)
-			if flag == "Y":#converged
-				break
-			elif flag=="N":
-				continue
-		except:
-			logging.info("No log file was produced by R code, continue regression using other parameters anyway.")
-			continue
-
-	#check ztnb file
-	try:
-		enrich_parameter = open(cluster_filename+".pipeclip.ztnb","r")
-	except IOError,message:
-		logging.error("Cannot open ztnb result file")
-		return False
-	nbDic = {}
-	for item in enrich_parameter:
-		buf = item.rstrip().split("\t")
-		if buf[0]!="#":
-			nb_key = "_".join(buf[0:2]) #reads_length as key
-			#logging.debug("NB_key %s" % nb_key)
-			if not nbDic.has_key(nb_key):
-				nbDic[nb_key]=(buf[2],buf[3])#pvalue and qvalue
-	#logging.info("There are %d read-length pairs" % (len(nbDic.keys())))
-	if len(nbDic.keys())==0:
-		logging.error("There are no read-length pairs found by ZTNB. Exit.")
-		return False
-	else:
-		for i in range(len(clip.clusters)):
-			r_key = str(clip.clusters[i].score)+"_"+str(clip.clusters[i].stop-clip.clusters[i].start)
-			#logging.debug("Keys from clip.clusters,%s" % r_key)
-			if nbDic.has_key(r_key):
-				clip.clusters[i].pvalue = nbDic[r_key][0]
-				clip.clusters[i].qvalue = nbDic[r_key][1]
-				clip.clusters[i].sig = True
-				clip.sigClusterCount += 1
-		nbDic = None
-		try:
-			os.remove(cluster_filename)
-		except:
-			pass
-		if clip.sigClusterCount == 0:
-			return False
-		else:
-			return True
-
-def clusterEnrich_outsource(clip, threshold=0.01):
-	cluster_filename = clip.outprefix+".clusters.bed"#clip.filepath.split("/")[-1].split(".")[0]
-	#Call R code and get result
-	#epsilon = [0.01,0.15,0.1]
-	#step = [0.1,0.08,0.05]
-	sh_args = ['sh','lib/runR1.sh',cluster_filename,str(threshold)]
-	p = subprocess.Popen(sh_args)
-	stdout_value = p.communicate()[0]
-	
-	#check ztnb file
-	try:
-		enrich_parameter = open(cluster_filename+".pipeclip.ztnb","r")
-	except IOError,message:
-		logging.error("Cannot open ztnb result file")
-		return False
-	nbDic = {}
-	for item in enrich_parameter:
-		buf = item.rstrip().split("\t")
-		if buf[0]!="#":
-			nb_key = "_".join(buf[0:2]) #reads_length as key
-			#logging.debug("NB_key %s" % nb_key)
-			if not nbDic.has_key(nb_key):
-				nbDic[nb_key]=(buf[2],buf[3])#pvalue and qvalue
-	#logging.info("There are %d read-length pairs" % (len(nbDic.keys())))
-	if len(nbDic.keys())==0:
-		logging.error("There are no read-length pairs found by ZTNB. Exit.")
-		return False
-	else:
-		for i in range(len(clip.clusters)):
-			r_key = str(clip.clusters[i].score)+"_"+str(clip.clusters[i].stop-clip.clusters[i].start)
-			#logging.debug("Keys from clip.clusters,%s" % r_key)
-			if nbDic.has_key(r_key):
-				clip.clusters[i].pvalue = nbDic[r_key][0]
-				clip.clusters[i].qvalue = nbDic[r_key][1]
-				clip.clusters[i].sig = True
-				clip.sigClusterCount += 1
-		nbDic = None
-		try:
-			os.remove(cluster_filename)
-		except:
-			pass
-		if clip.sigClusterCount == 0:
-			return False
-		else:
-			return True
-
-
-def fisherTest(clusterp,mutationp):
-	R = robject.r
-	min_mp = min(mutationp)
-	product = clusterp * min_mp
-	if product == 0:
-		fps = 0
-	else:
-		xsq = -2*math.log(clusterp * min_mp)
-		fp = R.pchisq(xsq,**{'df':4,'lower.tail':False,'log.p':True})[0]
-		fps = math.exp(fp)
-	return fps
-
-
diff --git a/lib/Mutation2.py b/lib/Mutation2.py
deleted file mode 100644
index 04bc4a6..0000000
--- a/lib/Mutation2.py
+++ /dev/null
@@ -1,295 +0,0 @@
-#!/usr/bin/python
-# programmer : bbc
-# usage:
-# output in BED format, the score is the offset of the mutation from 5' end
-
-import sys
-import re
-import random
-import string
-import copy
-import pysam
-from pysam import *
-import argparse as ap
-from Alignment import MutationBed
-import logging
-import OptValidator
-
-OptValidator.opt_validate()
-
-
-def countMatchNumber(b):
-	myList = b
-	m = 0
-	for i in myList:
-		if i[0]==0:
-			m += i[1]
-	return (m)
-
-def countInsertionNumber(b):
-	myList = b
-	m = 0
-	for i in myList:
-		if i[0]==1:
-			m += i[1]
-	return (m)
-
-def countDeletionNumber(b):
-	myList = b
-	m = 0
-	for i in myList:
-		if i[0]==2:
-			m += i[1]
-	return (m)
-
-def countMismatch(b):
-	myList = b
-	for i in myList:
-		if i[0]=="NM":
-			return (i[1])
-	return 0
-
-def survey(entry):
-	#logging.debug(entry.cigar)
-	mismatchNumber = countMismatch(entry.tags)
-	insertionNumber = countInsertionNumber(entry.cigar)
-	deletionNumber = countDeletionNumber(entry.cigar)
-	substitutionNumber = max(0,mismatchNumber-deletionNumber)
-	#print insertionNumber,deletionNumber
-	return ([insertionNumber,deletionNumber,substitutionNumber])
-
-
-def SBeforeFirstM(ci):
-	for index in range(len(ci)):
-		if ci[index][0] == 0:
-			if index == 0:
-				return 0
-			else:
-				s = 0
-				for i in range(0,index):
-					if ci[i][0] == 4:
-						s += ci[i][1]
-				return s
-def parseCIGAR(ci): #calculate match segment length list
-	matchSeg = []
-	preIndex = 0
-	for index in range(len(ci)):
-		if ci[index][0] == 0: #match tag
-			preIndex = index
-			matchSeg.append(ci[index][1])
-			if index > 0: #M is not the first tag
-				s = 0
-				for i in range(preIndex-1,index): #check all the tags before M
-					if ci[i][0]==4:
-						s += ci[i][1]
-				matchSeg[-1] += s 
-			#seach for the insertion behind (before the next match)
-			for insertionIndex in range(index+1,len(ci)):
-				inse = 0
-				if ci[insertionIndex][0]==0:
-					break
-				elif ci[insertionIndex][0]==1:#insertion exists
-					inse += ci[insertionIndex][1]
-			matchSeg[-1]+=inse
-	return matchSeg	
-
-def parseMD(b):
-	buf = []
-	myList = b
-	for j in myList:
-		if j[0]=="MD":
-			md = copy.deepcopy(j[1])
-			num = md.replace('A','^').replace('G','^').replace('T','^').replace('C','^').replace('^^','^').split('^')
-			for c in range(num.count("")):
-				num.remove("")
-			buf = [num[0]]
-
-			counter = 1
-			afterAlpha = 0
-			for i in j[1]:#walk thought MD string to record mutation and deletion
-				if i.isalpha() or i == '^':
-					buf.append(i)
-					afterAlpha = 1
-				else:
-					if afterAlpha and counter <= len(num)-1:
-						buf.append(num[counter])
-						afterAlpha = 0
-						counter += 1
-			break
-	return buf
-
-
-def insertionLocation(entry,num):#sam entry and insertion total number
-	insertionLoc = {} #key:ref_offset; value:list of seq_offset
-	ref_offset = 0
-	seq_offset = [0]
-	preInsertion = 0 #used to check if it is the last insertion tag
-	for i in entry.cigar:
-		if i[0]==0 or i[0]==4:#match or soft clip
-			ref_offset += i[1]
-			seq_offset[-1] += i[1]
-		elif i[0]== 2: #deletion counts on ref but not on read seq
-			ref_offset+=i[1]
-		elif i[0]==1:#record this insertion tag and prepare for the next one
-			preInsertion += i[1]
-			for ins in range(1,i[1]):
-				newloc = seq_offset[-1]+1
-				seq_offset.append(newloc)
-			st = []
-			for item in seq_offset:
-				st.append(item)
-			insertionLoc[ref_offset]=st
-			if preInsertion == num: #this is the last insertion tag
-				return insertionLoc
-			else:#insertion only count on read seq
-				seq_index = seq_offset[0]+i[1]
-
-def countInsertionBefore(seqLoc,insertLocList):
-	if len(insertLocList)==0:
-		return 0
-	else:
-		ins = 0
-		for i in insertLocList:
-			if i>seqLoc:
-				return ins
-			else:
-				ins += 1
-		return ins
-
-def  mutationLocation(entry,insertLoc):#return mutation location in 
-	mutations = []
-	match = entry
-	myList = match.tags
-	S_count = SBeforeFirstM(match.cigar)#hard clip will cut the seq, doesn't count
-	mis = countMismatch(myList)
-	mdlist = parseMD(myList)
-	mdtag = ''.join(mdlist)
-	counter = 0
-	if not mdtag.isdigit(): #insertions may exist
-		st_seq = 0
-		st_genome = 0
-		offset = 0
-		pre = ':'
-		set_insertion = 0 #recored insertion numbers already counted
-		for ch in mdlist:#i[1] is the MD tag
-			if ch.isdigit():
-				st_seq += int(ch)
-				st_genome += int(ch)
-				pre =  ch
-			elif ch == "^":
-				st_genome += 1
-				pre = ch
-			elif ch.isalpha():
-				if (not pre == '^') and (pre.isdigit()):
-					origin = ch
-					index = st_seq+S_count+offset
-					insertionBefore = countInsertionBefore(index,insertLoc)
-					loc = st_genome+match.pos+offset#-insertionBefore #0-based 
-					index += insertionBefore-set_insertion # add on 9 Oct,modify 9,9 2014
-					set_insertion = insertionBefore
-
-					#print index,len(match.seq)
-					mu = match.seq[index]
-					if mu.upper() != "N":
-						offset = index-S_count+1
-						st_seq = 0
-						st_genome = 0
-						if match.is_reverse:
-							chr = '-'
-							t = RC([origin,mu])
-							origin = t[0]
-							mu = t[1]
-						else:
-							chr = '+'
-						mutation = [str(loc),str(loc+1),match.qname,1,chr,origin+"->"+mu]#change offset into mutation count, which is 1
-						yield mutation
-				else:#this is a deletion
-					if pre in ["A","G","T","C"]:
-						st_genome += 1
-					loc = st_genome+match.pos+offset-1 #0-based 
-					if match.is_reverse:
-						strand = '-'
-						ch = RC([ch])[0]
-					else:
-						strand = '+'
-					index1 = loc - match.pos 
-					insertionBefore = countInsertionBefore(index1,insertLoc)
-					index1 += insertionBefore-set_insertion #added 9 Oct
-					set_insertion = insertionBefore
-					mutation = [str(loc),str(loc+1),match.qname,1,strand,"Deletion->"+ch] #change str(index1) to 1
-					yield mutation
-					pre = ch
-
-	return
-
-def RC(strList):
-	rc = []	
-	for item in strList:
-		st = ""
-		for chIndex in range(len(item)):
-			rcIndex = len(item)-1
-			if item[rcIndex].upper()== "A":
-				st += 'T'
-			elif item[rcIndex].upper()=="C":
-				st += 'G'
-			elif item[rcIndex].upper()=="T":
-				st += 'A'
-			elif item[rcIndex].upper()=="G":
-				st += 'C'
-			else:
-				st += 'N'
-		rc.append(st)
-	return(rc)
-
-def getMutations(infile,read):
-	#tmp = pysam.Samfile("demo.bam",'wb',template=infile)
-	#logging.debug("call getMutation function %s" % read)
-	mutationList = []
-	b=read.tags
-	
-	sur = survey(read)
-	insertion = sur[0]
-	deletion = sur[1]
-	substi = sur[2]
-	#logging.debug("insertio:,%d, deletion:%d,sub:%d" % (insertion,deletion,substi))
-	insertionSeqLoc = []
-	if insertion > 0:
-		insertionDic = insertionLocation(read,insertion)
-		for k in insertionDic.keys():
-			for loc_index in range(len(insertionDic[k])):
-				insertionSeqLoc.append(insertionDic[k][loc_index])
-				mu = read.seq[insertionDic[k][loc_index]]
-				loc = k+loc_index+read.pos
-				if read.tid >=0:
-					chr = infile.getrname(read.tid)
-				if read.is_reverse:
-					strand = '-'
-					mu = RC([mu])[0]
-				else:
-					strand = "+"
-				mutationList.append(MutationBed(chr,loc,loc+1,read.qname,1,strand,"Insertion->"+mu))#change insertionDic[k][loc_index] to 1
-		insertionSeqLoc.sort()
-	if deletion + substi > 0:
-		for mu in mutationLocation(read,insertionSeqLoc):
-			if read.tid>=0:
-				chr  = infile.getrname(read.tid)
-				newMu = MutationBed(chr,mu[0],mu[1],mu[2],mu[3],mu[4],mu[5])
-				mutationList.append(newMu)
-	#logging.debug(mutationList)
-	return mutationList
-
-
-def getTruncations(infile,read):
-	'''One read only have one truncation, but in order to be consistent with muations, return a list'''
-	if read.is_reverse:
-		strand = "-"
-		start = read.pos + read.alen
-	else:
-		strand = "+"
-		start = read.pos
-	stop = start + 1
-	if read.tid>=0:
-		chr = infile.getrname(read.tid)
-	newTr = MutationBed(chr,start,stop,read.qname,1,strand,"truncation")
-	return [newTr] 
-
diff --git a/lib/OptValidator.py b/lib/OptValidator.py
deleted file mode 100644
index 7ea9234..0000000
--- a/lib/OptValidator.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import sys
-import logging
-
-def opt_validate ():
-	#opt validation to be added
-	#set logging 
-	logging.basicConfig(format='%(levelname)s:%(message)s',level=logging.DEBUG)
diff --git a/lib/Utils.py b/lib/Utils.py
deleted file mode 100644
index fc3887d..0000000
--- a/lib/Utils.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/python
-# programmer: beibei.chen@utsouthwestern.edu
-# Usage: definition of utilities to handel pysam classes
-
-import sys
-import Alignment
-import subprocess
-import logging
-
-def is_sorted(header):
-	'''Get a BAM header, and check if this file is sorted or not
-	   Return: True: sorted, False: unsorted
-	   Header variable is a list
-	'''
-	for row in header:
-		buf = row.rstrip().split("\t")
-		if buf[0]=="@HD":
-			if buf[2] in ["SO:unsorted","SO:unknown"] :
-				return False
-			elif buf[2] in ["SO:coordinate","SO:queryname"]:
-				return True
-	#If no HD header contains SO info, return False
-	return False 
-
-def readQuaFilter(read,mlen,mis):
-	matchlen = 0
-	mismatch = 0
-	#logging.debug("read %s,cigar %s,tags %s" % (read.qname,read.cigar,read.tags))
-	try:
-		for i in read.cigar:
-			#logging.debug(i)
-			if i[0]==0:
-				matchlen += i[1]
-			elif i[0]==1:
-				mismatch += i[1]
-		for j in read.tags:
-			if j[0]=="NM":
-				mismatch += j[1]
-				break
-		#logging.debug("matchlen %d,mismatch %d" % (matchlen,mismatch))
-	except:
-		#logging.debug("Problematic read %s" % (read))
-		pass
-	if matchlen>=mlen and mismatch<=mis:
-		return (True,matchlen,mismatch)
-	else:
-		return (False,0,0)
-
-def rmdupKey_Start(read):
-	#print >>sys.stderr,"Remove duplicate by alignment start"
-	k = str(read.tid)+":"+str(read.pos)+":"+str(read.is_reverse)
-	#print >>sys.stderr,k
-	return k
-
-
-def rmdupKey_Seq(read):
-	k = str(read.tid)+":"+str(read.pos)+":"+str(read.is_reverse)+":"+read.seq
-	return k
-
-def filterMutations(mlist,feature,keep=True):
-	newList = []
-	for i in mlist:
-		if i.type==feature:
-			if keep:
-				newList.append(i)
-		else:
-			if not keep:
-				newList.append(i)
-	return newList
-
-def annotation(fileaddr,species):
-	'''Call Homer annotation script'''
-	outfile = open(fileaddr+".anno","w")
-	args = ['annotatePeaks.pl',fileaddr,species]
-	p = subprocess.Popen(args,stdout=outfile)
-	stdout_value = p.communicate()[0]
-	outfile.close()
-
-def makeWig(bamfile):
-	wig = {}
-	for r in bamfile.references:
-		wig[r] = {}
-		for pileupColumn in bamfile.pileup(r):
-			wig[r][str(pileupColumn.pos)]=pileupColumn.n
-	return wig
-	
-
-def makeWigByChr(bamfile,chr):
-	wig = {}
-	for pileupColumn in bamfile.pileup(chr):
-			wig[pileupColumn.pos]=pileupColumn.n
-	return wig
-
-
-def makeWigListByChr(bamfile,chr):
-	wig = Alignment.wiglist()
-	for pileupColumn in bamfile.pileup(chr):
-		wig.update(pileupColumn.pos,pileupColumn.n)
-	return wig
-
-
-def makeWigListByChr_array(bamfile,chr,chrlen):
-	wig = []
-	for i in range(chrlen):
-		wig.append(0)
-	for pileupColumn in bamfile.pileup(chr):
-		wig[pileupColumn.pos]=pileupColumn.n
-	return wig
-
-def bisort(alist,b):
-	'''List is a list of bed instances, b is also an instance'''
-	if isinstance(alist[0],Alignment.BED) and isinstance(b,Alignment.BED):
-		pass 
diff --git a/lib/ZTNB_tryCatch.R b/lib/ZTNB_tryCatch.R
deleted file mode 100755
index 710b81a..0000000
--- a/lib/ZTNB_tryCatch.R
+++ /dev/null
@@ -1,173 +0,0 @@
-#Require at least R3.0 and package VGAM and dependencies
-#Programmer: jonghyun.yun@utsouthwestern.edu and beibei.chen@utsouthwestern.edu 
-#Usage: Get all the reads count and length combinations of enriched clusters
-#Input: BED file with score column as reads number
-#Output: table with reads count, length, p value and fdr
-#Last modified: 20 Sep 2014
-
-#command line parameters:
-#1:input file name. After running the code, two output files: input.ztnb and input.ztnblog will be created
-#2:FDR cutoff
-#3:Regression epsilon
-#4:Regression step
-
-suppressMessages(require(MASS));
-suppressMessages(require(VGAM));
-
-#require(MASS)
-#require(VGAM)
-args = commandArgs(TRUE);
-data = read.table(args[1], sep = "\t");
-len = data[,3] - data[,2];
-read = data[,5];
-cut = as.numeric(as.character(args[2]));
-vglm_epsilon = as.numeric(as.character(args[3]));
-vglm_step = as.numeric(as.character(args[4]));
-# tabularize the data
-wts = table(read,len);
-temp.coln = as.integer(names(wts[1,]));
-temp.rown = as.integer(names(wts[,1]));
-drow = length(temp.rown);
-dcol = length(temp.coln);
-
-rown = matrix(rep(temp.rown,dcol),drow,dcol);
-coln = t(matrix(rep(temp.coln,drow),dcol,drow));
-
-wdx = which(wts>0);
-tread = rown[wdx];
-tlen = coln[wdx];
-twts = wts[wdx];
-#print("local polynomial regression")
-# local polynomial regression: read ~ len
-# smoothing parameter: 
-alp = 0.95
-# polynomial degree:
-pd = 1
-lregfit = loess(tread ~ tlen, weights = twts, span = alp, family ="symmetric", degree = pd);
-
-mu = lregfit$fitted;
-mu[mu<=0] = min(exp(-4),mu[mu>0]); # bounded mean function
-logmu = log(mu);
-
-
-# compute p-values using N(0,1) for (tread[-cdx] - mu[-cdx]) / sqrt(mu[-cdx])
-nb.pvalue=numeric(length(tread));
-zscore = (tread-mu)/sqrt(mu); 
-cdx = which(zscore < 4);
-nb.pvalue[-cdx] = pnorm(zscore[-cdx],lower.tail=FALSE);
-
-tread_fit = tread[cdx];
-tlen_fit = tlen[cdx];
-twts_fit = twts[cdx];
-
-lregfit = loess(tread_fit ~ tlen_fit, weights = twts_fit, span = alp, family ="symmetric", degree = pd);
-mu = lregfit$fitted;
-mu[mu<=0] = min(exp(-4),mu[mu>0]); # bounded mean function
-logmu = log(mu);
-
-# negative binomail regression with the known predictor log(mu)
-
-intercept1 = seq(-1,1,0.5);
-intercept2 = seq(-1,1,0.5);
-biggest_likelihood = -99999999;
-khat = 0;
-muhat = 0;
-converge_flag = FALSE;
-errmsg = paste("Error/warning messages for run using Epsilon:",vglm_epsilon,"and step size:",vglm_step,".");
-options(warn=1);
-
-
-#use default setting first to see if it could converge
-nb<-tryCatch({vglm(tread_fit ~ 1, posnegbinomial(), weight = twts_fit, maxit = 200, trace = FALSE, step = vglm_step,offset = logmu,epsilon=vglm_epsilon,silent=FALSE)},warning = function(w){
-    #print(paste("typeof warning",typeof(w["message"])))
-    warnmsg = strsplit(toString(w["message"])," iteration")[[1]][1];
-    if(grepl("convergence not obtained",warnmsg))
-    {
-      newmsg = paste("Convergence not obtained in 200 iterarions");
-      return(newmsg);
-    }
-    }, error = function(e){
-      newmsg = paste("Program failed to converge.");
-      return(newmsg)
-    }, finally = {
-    })# end of try-catch
-    if (length(nb)>0)
-    {
-      if(typeof(nb)=="S4")
-      {
-        #print("Converged, get regression class")
-        if(logLik(nb)>=biggest_likelihood & Coef(nb)["size"]<=100)
-        {
-          biggest_likelihood = logLik(nb);
-          khat <- Coef(nb)["size"];
-          muhat <- Coef(nb)["munb"];
-        }
-       
-      } else if (typeof(nb)=="character")
-      {
-        errmsg = paste(errmsg,nb,sep="\n")
-      }
-    }
-
-
-if((biggest_likelihood==-99999999) && (khat==0) && (muhat ==0))
-{#try different coef start
-for (i in intercept1)
-{
-  for (j in intercept2)
-  {
-    nb<-tryCatch({vglm(tread_fit ~ 1, posnegbinomial(), weight = twts_fit, maxit = 200, trace = FALSE, step = vglm_step,offset = logmu,epsilon=vglm_epsilon,silent=FALSE,coefstart=c(i,j))},warning = function(w){
-    warnmsg = strsplit(toString(w["message"])," iteration")[[1]][1];
-    if(grepl("convergence not obtained",warnmsg))
-    {
-      newmsg = paste("Using coefstart c(",i,",",j,"), convergence not obtained in 200 iterarions");
-      return(newmsg);
-    }
-    }, error = function(e){
-      newmsg = paste("Using coefstart c(",i,",",j,"), program failed to converge.");
-      return(newmsg)
-    }, finally = {
-    })# end of try-catch
-    if (length(nb)>0)
-    {
-      if(typeof(nb)=="S4")
-      {
-        #print("Converged, get regression class")
-        if(logLik(nb)>=biggest_likelihood & Coef(nb)["size"]<=100)
-        {
-          biggest_likelihood = logLik(nb);
-          khat <- Coef(nb)["size"];
-          muhat <- Coef(nb)["munb"];
-        }
-       
-      } else if (typeof(nb)=="character")
-      {
-        errmsg = paste(errmsg,nb,sep="\n")
-      }
-    }
-  }# end of loop intercept2
- }# end of loop intercept1
-}#end of try different coefstart
-
-if((biggest_likelihood==-99999999) && (khat==0) && (muhat ==0))
-{
-  #No model converged,exit
-  errmsg = paste("N:No model converged for this run.",errmsg,sep="\n");
-} else {
-  # model parameters
-	errmsg = paste("Y:Coverged for this run.",errmsg,sep="\n");
-	write(errmsg,paste(args[1],".Converge.txt",sep=""))
-  nbsize = khat * mu;
-  nbmu = muhat * mu;
-  nb.pvalue[cdx] = pnbinom(tread_fit - 1, size = nbsize, mu = nbmu, lower.tail = FALSE) / (1-dnbinom(0, size = nbsize, mu = nbmu))
-  nb.fdr = p.adjust(nb.pvalue,method='BH')
-  # output
-  nbdx = nb.fdr <= cut;
-  #sum(twts[nbdx]); # the number of clusters whose FDR <= cut
-  # read counts and cluster length of clusters whose FDR <= cut
-  nb.out = as.data.frame(matrix(c(tread[nbdx],tlen[nbdx],nb.pvalue[nbdx],nb.fdr[nbdx]),sum(nbdx),4))
-  names(nb.out) = c("read","length","p","fdr")
-  outname = paste(args[1],".pipeclip.ztnb",sep="")
-  write.table(nb.out,outname,sep="\t",quote=F,row.names=F);
-}#end of output
-write(errmsg,paste(args[1],".pipeclip.ztnblog",sep=""));
diff --git a/lib/__init__.py b/lib/__init__.py
deleted file mode 100644
index af5a554..0000000
--- a/lib/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-#indicate this folder contains packages
-import Alignment
-import Enrich
-import CLIP
-import Mutation2
-import Utils
-import OptValidator
diff --git a/lib/barcodeRemoval.py b/lib/barcodeRemoval.py
deleted file mode 100755
index 95c1adf..0000000
--- a/lib/barcodeRemoval.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/python
-# Programmer : beibei.chen@utsouthwestern.edu
-# Usage: remove PCR duplicates by comparing the sequences and return fastq file with barcode removed.
-# Input: fastq file
-# Output: fastq file
-# Last modified: 28 June 2014 - Beibei Chen <beibei.chen@utsouthwestern.edu>
-
-import sys
-import re
-import string
-
-class fastq:
-  def __init__(self,x):
-    self.id = x[0]
-    self.seq = x[1]
-    self.name = x[2]
-    self.quality = x[3]
-  
-  def __str__(self):
-    st = self.id+self.seq+self.name+self.quality
-    return st
-
-class fqList:
-  def __init__(self):
-    self.data = []
-    self.unique = {}
-
-  def readFq(self,fh,bl):
-    st = []
-    buf =  fh.readlines()
-    for i in range(len(buf)):
-      if buf[i][0]=="@" and buf[i+2][0]=="+":
-        st.append(buf[i])
-        st.append(buf[i+1])
-        st.append(buf[i+2])
-        st.append(buf[i+3].rstrip())
-        self.addFq(st)
-        if not self.unique.has_key(buf[i+1]):
-          self.addUniqFq(st,bl)
-        st = []
-
-  def addFq(self,s):
-    newFq = fastq(s)
-    self.data.append(newFq)
-
-  def addUniqFq(self,s,b):
-    seq_key = s[1]
-    offset = b
-    newFq = fastq([s[0],s[1][offset:],s[2],s[3][offset:]])
-    self.unique[seq_key] = newFq
-
-class barcodeRemover:
-  def __init__(self,infile,barLen):
-    self.infile = infile
-    self.barLen = barLen
-
-  def run(self):
-    myfq = fqList()
-    myfq.readFq(self.infile,self.barLen)
-    for item in myfq.unique.values():
-      if len(item.seq)>=15:
-        print item
-
-def barCodeRemovalMain(infilePath,barLenStr):
-  try:
-    infile = open(infilePath,"r+")
-  except IOError,message:
-    print >> sys.stderr, "cannot open file",message
-    sys.exit(1)  
-  try:
-    barLen = int(barLenStr)
-  except:
-    barLen = 5
-  barcodeRemover = barcodeRemover(infile,barLen)
-  barcodeRemover.run()
-
-def barCodeRemovalMain():
-  try:
-    infile = open(sys.argv[1],"r+")
-  except IOError,message:
-    print >> sys.stderr, "cannot open file",message
-    sys.exit(1)  
-  try:
-    barLen = int(sys.argv[2])
-  except:
-    barLen = 5
-  myBarcodeRemover = barcodeRemover(infile,barLen)
-  myBarcodeRemover.run()  
-
-if __name__=="__main__":
-  barCodeRemovalMain()
diff --git a/lib/pipeclip.py b/lib/pipeclip.py
deleted file mode 100755
index 3b3c4b9..0000000
--- a/lib/pipeclip.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#Main pipeline connects all the scripts together
-#Original Programmer: beibei.chen@utsouthwestern.edu
-#Refactored by: eric.roos@utsouthwestern.edu
-#Usage: python pipeclip.py input.sam output_prefix match_length mismatch_number pcr_rm fdr_cluster clip_type fdr_mutation species
-#Required packages: pysam, ghmm, pybedtools
-#Last modification: 18 Sep 2014
-
-#from lib import *
-
-import sys
-import argparse
-import logging
-from subprocess import call
-import os
-import gc
-import CLIP
-import Alignment
-import Utils
-import Enrich
-import OptValidator
-
-gc.enable()
-
-def prepare_argparser():
-	description = "Find mutations"
-	epilog = "For command line options of each command, type %(prog)s COMMAND -h"
-	argparser = argparse.ArgumentParser(description=description, epilog = epilog)
-	argparser.add_argument("-i","--input",dest = "infile", type = str, required = True, help = "input bam file")
-	argparser.add_argument("-o","--output",dest = "outfile", type = str,required = True, help = "output file, default is stdout")
-	argparser.add_argument("-l","--matchLength",dest = "matchLength", type = int ,required = True, help = "shorted matched segment length")
-	argparser.add_argument("-m","--mismatch",dest = "mismatch", type = int,required = True, help = "maximum mismatch number")
-	argparser.add_argument("-c","--clipType",dest = "clipType", type = int,required = True, help = "CLIP type (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP", choices=[0,1,2,3])
-	argparser.add_argument("-r","--rmdup",dest = "dupRemove", type = int,required = True, help = "Remove PCR duplicate (0)No removal; (1)Remove by read start; (2)Remove by sequence; ", choices=[0,1,2])
-	argparser.add_argument("-M","--fdrMutation",dest = "fdrMutation", type = float,required = True, help = "FDR for reliable mutations")
-	argparser.add_argument("-C","--fdrCluster",dest = "fdrCluster", type = float,required = True, help = "FDR for enriched clusters")
-	argparser.add_argument("-s","--species",dest = "species", type = str,required = True, help = "Species [\"mm10\",\"hg19\"]",choices=["mm10","hg19"])
-	return(argparser)
-
-def runPipeClip(infile,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluster,clipType,fdrReliableMutation,species):
-	myClip = CLIP.CLIP(infile,outputPrefix)
-	logging.info("Start to run")
-	if myClip.testInput():#check input
-		logging.info("Input file OK,start to run PIPE-CLIP")
-		logging.info("Species info %s" % species)
-		if myClip.readfile():
-			myClip.filter(matchLength,mismatch,clipType,rmdup,outputPrefix)
-			myClip.printMutations()
-			if len(myClip.clusters)>0:
-				logging.info("Get enriched clusters")
-				gc.collect()
-				status = Enrich.clusterEnrich(myClip,fdrEnrichedCluster)
-				if status:
-					logging.info("Found %d enriched clusters" % myClip.sigClusterCount)
-					myClip.printEnrichedClusters()
-				else:
-					logging.error("There is no enriched cluster found. Exit program")
-					sys.exit(1)
-			else:
-				logging.error("There is no clusters found. Please check input.Exit program.")
-				sys.exit(1)
-			
-			if len(myClip.mutations.keys())>0:
-				logging.info("Get reliable mutations")
-				Enrich.mutationEnrich(myClip,fdrReliableMutation)
-				myClip.printReliableMutations()
-			else:
-				logging.warning("There is no mutation found in this BAM file.")
-			#Start to get crosslinking sites
-			if myClip.sigClusterCount > 0 and myClip.sigMutationCount>0:
-				logging.info("Get cross-linking sites")
-				myClip.getCrosslinking()
-				if (len(myClip.crosslinking.keys())>0):
-					outfilelist = myClip.printCrosslinkingSites()
-					myClip.printCrosslinkingMutations()
-				else:
-					logging.warning("There is no crosslinking found. May be caused by no reliable mutations in enriched clusters. Print out enriched clusters instead.")
-					outfilelist = myClip.printEnrichClusters()
-			else:
-				if myClip.sigClusterCount <= 0:
-					logging.error("There is no enriched clusters for this sample, please check your input file. Exit.")
-					sys.exit(2)
-				elif myClip.sigMutationCount <=0:
-					logging.warning("There is no reliable mutations found. PIPE-CLIP will provide enriched clusters as crosslinking candidates.")
-					outfilelist = myClip.printEnrichClusters()
-			#annotation if possible
-		#logging.debug(outfilelist)
-		if species in ["mm10","mm9","hg19"]:
-			for name in outfilelist:
-				#logging.debug("Start to do annotation for %s" % name)
-				Utils.annotation(name,species)
-	else:
-		print >> sys.stderr, "File corruputed, program exit."
-		sys.exit(0)
-	
-	
-
-if __name__=="__main__":
-	arg_parser = prepare_argparser()
-	args = arg_parser.parse_args()
-	OptValidator.opt_validate()
-	infile = args.infile                  # Input SAM/BAM file
-	outputPrefix = args.outfile           # Output prefix
-	matchLength = args.matchLength        # Shorted matched segment length
-	mismatch = args.mismatch              # Maximum mismatch number
-	rmcode = args.dupRemove
-	fdrEnrichedCluster = args.fdrCluster  # FDR for enriched clusters
-	clipType =args.clipType               # CLIP type (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP
-	fdrReliableMutation = args.fdrMutation# FDR for reliable mutations
-	species = args.species                # Species ["mm10","hg19"]
-	runPipeClip(infile,outputPrefix,matchLength,mismatch,rmcode,fdrEnrichedCluster,clipType,fdrReliableMutation,species)
diff --git a/pipeclip.py b/pipeclip.py
deleted file mode 100755
index e61893e..0000000
--- a/pipeclip.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#Main pipeline connects all the scripts together
-#Original Programmer: beibei.chen@utsouthwestern.edu
-#Refactored by: eric.roos@utsouthwestern.edu
-#Usage: python pipeclip.py input.sam output_prefix match_length mismatch_number pcr_rm fdr_cluster clip_type fdr_mutation species
-#Required packages: pysam, ghmm, pybedtools
-
-
-import sys
-import argparse
-import logging
-import os
-from time import gmtime, strftime
-from lib import *
-
-def prepare_argparser():
-	description = "Find mutations"
-	epilog = "For command line options of each command, type %(prog)s COMMAND -h"
-	argparser = argparse.ArgumentParser(description=description, epilog = epilog)
-	argparser.add_argument("-i","--input",dest = "infile", type = str, required = True, help = "input bam file")
-	argparser.add_argument("-t","--control",dest="ctrlfile",type=str,required=False, help = "control bam file")
-	argparser.add_argument("-o","--output",dest = "outfile", type = str,required = True, help = "output file, default is stdout")
-	argparser.add_argument("-l","--matchLength",dest = "matchLength", type = int ,required = True, help = "shorted matched segment length")
-	argparser.add_argument("-m","--mismatch",dest = "mismatch", type = int,required = True, help = "maximum mismatch number")
-	argparser.add_argument("-c","--clipType",dest = "clipType", type = int,required = True, help = "CLIP type (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP", choices=[0,1,2,3])
-	argparser.add_argument("-r","--rmdup",dest = "dupRemove", type = int,required = True, help = "Remove PCR duplicate (0)No removal; (1)Remove by read start; (2)Remove by sequence; ", choices=[0,1,2])
-	argparser.add_argument("-M","--fdrMutation",dest = "fdrMutation", type = float,required = True, help = "FDR for reliable mutations")
-	argparser.add_argument("-C","--fdrCluster",dest = "fdrCluster", type = float,required = True, help = "FDR for enriched clusters")
-	argparser.add_argument("-s","--species",dest = "species", type = str, help = "Species [\"mm10\",\"hg19\"]",choices=["mm10","hg19"])
-	return(argparser)
-
-def runPipeClip(infile,control,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluster,clipType,fdrReliableMutation,species):
-	myClip = CLIP.CLIP(infile,outputPrefix)
-	controlFlag = False
-	if control != None:
-		controlClip = CLIP.CLIP(control,outputPrefix+"Control")
-	logging.info("Start to run")
-	if myClip.testInput():#check input
-		logging.info("Input file OK,start to run PIPE-CLIP")
-		logging.info("Species info %s" % species)
-		if control != None: #test control file
-			if controlClip.testInput():
-				logging.info("Control file OK. Use control in mutation enrichment.")
-				controlFlag = True
-			else:
-				logging.info("Control file format error. Continue without control.")
-		if myClip.readfile():
-			myClip.filter2(matchLength,mismatch,clipType,rmdup)
-			if controlFlag:
-				logging.info("Read in control file")
-				controlClip.readfile()
-				controlClip.filter2(matchLength,mismatch,clipType,rmdup)
-			#myClip.printClusters()
-			#myClip.printMutations()
-			if myClip.clusterCount>0:
-				logging.info("Get enriched clusters")
-				status = Enrich.clusterEnrich_outsource(myClip,fdrEnrichedCluster)
-				if status:
-					logging.info("Found %d enriched clusters" % myClip.sigClusterCount)
-					myClip.printEnrichedClusters()
-				else:
-					logging.error("There is no enriched cluster found. Exit program")
-					sys.exit(1)
-			else:
-				logging.error("There is no clusters found. Please check input.Exit program.")
-				sys.exit(1)
-			
-			if myClip.mutationCount>0:
-				logging.info("Get reliable mutations")
-				if controlFlag: #use control
-					Enrich.mutationEnrichWCtrl(myClip,controlClip,fdrReliableMutation)
-				else:
-					Enrich.mutationEnrich(myClip,fdrReliableMutation)
-				logging.info("There are %d reliable mutations" % myClip.sigMutationCount)
-				myClip.printReliableMutations()
-			else:
-				logging.warning("There is no mutation found in this BAM file.")
-			#Start to get crosslinking sites
-			if myClip.sigClusterCount > 0 and myClip.sigMutationCount>0:
-				logging.info("Get cross-linking sites")
-				myClip.getCrosslinking()
-				if (len(myClip.crosslinking.keys())>0):
-					outfilelist = myClip.printCrosslinkingSites()
-					myClip.printCrosslinkingMutations()
-				else:
-					logging.warning("There is no crosslinking found. May be caused by no reliable mutations in enriched clusters. Print out enriched clusters instead.")
-					outfilelist = myClip.printEnrichClusters()
-			else:
-				if myClip.sigClusterCount <= 0:
-					logging.error("There is no enriched clusters for this sample, please check your input file. Exit.")
-					sys.exit(2)
-				elif myClip.sigMutationCount <=0:
-					logging.warning("There is no reliable mutations found. PIPE-CLIP will provide enriched clusters as crosslinking candidates.")
-					outfilelist = myClip.printEnrichClusters()
-			#annotation if possible
-		if species in ["mm10","mm9","hg19"]:
-			logging.info("Started to annotate cross-linking sits using HOMER")
-			for name in outfilelist:
-				#logging.debug("Start to do annotation for %s" % name)
-				Utils.annotation(name,species)
-		#output a status log file
-		logfile = open(outputPrefix+".pipeclip.summary.log","w")
-		print >>logfile,"PIPE-CLIP run finished. Parameters are:"
-		print >> logfile,"Input BAM: %s \nOutput prefix: %s \nMinimum matched length: %d \nMaximum mismatch count: %d \nPCR duplicate removal code: %d \nFDR for enriched clusters: %f \nFDR for reliable mutations: %f" % (infile,outputPrefix,matchLength,mismatch,rmdup,fdrEnrichedCluster,fdrReliableMutation)
-		print >> logfile, "There are %d mapped reads in input BAM file. After filtering,%d reads left" % (myClip.originalMapped,myClip.filteredAlignment)
-		print >> logfile, "%d out of %d clusters are enriched." % (myClip.sigClusterCount,len(myClip.clusters))
-		print >> logfile, "%d out of %d mutations are reliable." % (myClip.sigMutationCount,myClip.mutationCount)
-		print >> logfile, "%d crosslinking site candidates are found, with %d supporting reliable mutations." % (len(myClip.crosslinking.keys()),len(myClip.crosslinkingMutations))
-		logfile.close()
-		logging.info("PIPE-CLIP finished the job, please check your results. :)")	
-	else:
-		logging.error("File corruputed, program exit.")
-		sys.exit(0)
-	
-	
-
-if __name__=="__main__":
-	arg_parser = prepare_argparser()
-	args = arg_parser.parse_args()
-	OptValidator.opt_validate()
-	infile = args.infile                  # Input SAM/BAM file
-	control = args.ctrlfile
-	outputPrefix = args.outfile           # Output prefix
-	matchLength = args.matchLength        # Shorted matched segment length
-	mismatch = args.mismatch              # Maximum mismatch number
-	rmcode = args.dupRemove
-	fdrEnrichedCluster = args.fdrCluster  # FDR for enriched clusters
-	clipType =args.clipType               # CLIP type (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP
-	fdrReliableMutation = args.fdrMutation# FDR for reliable mutations
-	species = args.species                # Species ["mm10","hg19"]
-	runPipeClip(infile,control,outputPrefix,matchLength,mismatch,rmcode,fdrEnrichedCluster,clipType,fdrReliableMutation,species)
diff --git a/pipeclip/Alignment.py b/pipeclip/Alignment.py
new file mode 100644
index 0000000..51815e6
--- /dev/null
+++ b/pipeclip/Alignment.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+# programmer: beibei.chen@utsouthwestern.edu
+# Usage: definition of alignment files including BED and BAM
+
+
+from . import Enrich
+
+
+class BED:
+    def __init__(self, chr, start, stop, name, score, strand):
+        self.chr = chr
+        self.start = int(start)
+        self.stop = int(stop)
+        self.name = name
+        self.score = int(score)
+        self.strand = strand
+
+    def __str__(self):
+        st = "\t".join(
+            [
+                str(self.chr),
+                str(self.start),
+                str(self.stop),
+                self.name,
+                str(self.score),
+                self.strand,
+            ]
+        )
+        return st
+
+    def merge(self, read):
+        self.stop = read.stop
+        self.score += 1
+
+    def overlap(self, read):
+        if self.chr == read.chr and self.strand == read.strand:
+            if self.start <= read.stop and self.stop >= read.start:
+                return True
+            else:
+                return False
+
+    def updateScore(self, s):
+        self.score = int(s)
+
+    def increaseScore(self):
+        self.score += 1
+
+
+class ClusterBed(BED):
+    def __init__(self, chr, start, stop, name, score, strand):
+        self.pvalue = 0
+        self.qvalue = 0
+        self.sig = False
+        BED.__init__(self, chr, start, stop, name, score, strand)
+
+
+class MutationBed(BED):
+    def __init__(self, chr, start, stop, name, score, strand, type):
+        self.type = type  # insertion,deletion,type of substitution
+        self.kvalue = 0
+        BED.__init__(self, chr, start, stop, name, score, strand)
+        self.pvalue = 0
+        self.qvalue = 0
+        self.sig = False
+
+    def updateK(self, k):
+        self.kvalue = k
+
+    def __str__(self):
+        st = BED.__str__(self)
+        st += "\t" + self.type + "\t" + str(self.kvalue)
+        return st
+
+
+class CrosslinkingBed(BED):
+    def __init__(
+        self,
+        chr,
+        start,
+        stop,
+        name,
+        score,
+        strand,
+        clusterP,
+        clusterQ,
+        mStart,
+        mName,
+        mP,
+    ):
+        self.fisherP = 0
+        self.clusterP = float(clusterP)
+        self.mutationP = [mP]
+        self.qvalue = float(clusterQ)
+        self.mutationStarts = [str(mStart)]
+        self.mutationNames = [mName]
+        BED.__init__(self, chr, start, stop, name, score, strand)
+
+    def addMutation(self, mu):  # mu is a instance
+        self.mutationNames.append(mu.name)
+        self.mutationStarts.append(str(mu.start))
+        self.mutationP.append(mu.pvalue)
+
+    def fishertest(self):
+        fp = Enrich.fisherTest(self.clusterP, self.mutationP)
+        self.fisherP = fp
+
+
+class wiglist:
+    def __init__(self):
+        self.pos = []
+        self.value = []
+
+    def valueByPos(self, p):
+        try:
+            return self.value[self.pos.index(p)]
+        except:
+            return False
+
+    def update(self, p, v):
+        self.pos.append(p)
+        self.value.append(v)
diff --git a/pipeclip/CLIP.py b/pipeclip/CLIP.py
new file mode 100644
index 0000000..ee4ec08
--- /dev/null
+++ b/pipeclip/CLIP.py
@@ -0,0 +1,678 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Define CLIP class."""
+
+import datetime
+import gc
+import gzip
+import random
+
+import pysam
+
+from . import Alignment, Utils, mutation
+
+LOGGER = Utils.get_logger(__name__)
+
+gc.enable()
+
+
+class CLIP:
+    def __init__(self, fileaddr, prefix):
+        self.filepath = fileaddr
+        self.originalBAM = None
+        self.originalMapped = 0
+        self.refInfo = []
+        self.posfilteredBAM = None
+        self.negfilteredBAM = None
+        self.filteredAlignment = 0
+        self.type = 0
+        self.outprefix = prefix
+        self.currentGroupKey = "None"
+        self.currentGroup = []  # Used in rmdup
+        self.previousQul = [0, 0, 0]  # for rmdup,[matchlen,mapq,mismatch]
+        self.clusters = []
+        self.clusters_plus = []
+        self.clusters_minus = []
+        self.clusterCount = 0
+        self.currentCluster_plus = Alignment.BED("", 0, 0, "", 0, "+")
+        self.currentCluster_minus = Alignment.BED("", 0, 0, "", 0, "-")
+        # self.sigClusters = {}
+        self.mutations = {}  # Dictionary of bed instance
+        self.mutationCount = 0
+        self.mutationList = []
+        self.sigMutations = {}  # same as sigClusters
+        self.kmpair = {}
+        self.sigMutationCount = 0
+        self.sigClusterCount = 0
+        self.coverage = 0  # "reads coverage of this sample"
+        self.bamheader = None
+        self.crosslinking = {}
+        self.crosslinkingMutations = []
+
+    def __str__(self):
+        pass
+
+    def testInput(self):
+        """Test the input file format, modify self.filepath and return bool
+        Status: True:file is ready to use; False: wrong file, program stop
+        """
+        # test if file has header
+        try:
+            self.header = pysam.view("-H", self.filepath)
+        except:
+            try:
+                self.header = pysam.view("-SH", self.filepath)
+            except:
+                LOGGER.error(
+                    "Input file does not have header, please check your file. Program quit"
+                )
+                return (False, "None")
+        # Header test passed, test if it is BAM
+        try:
+            infile = gzip.open(self.filepath)
+            infile.readline(10)
+        except:  # cannot read line, should be sam
+            LOGGER.info("Input is SAM, converting to BAM...")
+            bamout = ".".join(self.filepath.split(".")[0:-1]) + "." + "bam"
+            infile = pysam.Samfile(self.filepath, "r", header=self.header)
+            # print >> sys.stderr,pysam.view("-SH",infile)
+            outfile = pysam.Samfile(bamout, "wb", template=infile)
+            for i in infile.fetch():
+                outfile.write(i)
+            self.filepath = bamout
+        # Now the infile is BAM,check if it is sorted
+        if Utils.is_sorted(self.header):
+            pysam.index(self.filepath)
+            return True
+        else:  # sort the BAM
+            LOGGER.info("Input is not sorted, sorting file...")
+            bamsort = (
+                ".".join(self.filepath.split(".")[0:-1]) + "." + "sort.bam"
+            )
+            pysam.sort("-o", bamsort, self.filepath)
+            pysam.index(bamsort)
+            self.filepath = bamsort  # change input file path
+            self.header = pysam.view("-H", bamsort)
+            LOGGER.info("Input file sorted")
+            # if Utils.is_sorted(self.header):
+            # 	print >> sys.stderr, "The file is sorted"
+            return True
+
+    def readfile(self):
+        try:
+            self.originalBAM = pysam.Samfile(self.filepath, "rb")
+            self.refInfo = list(
+                zip(self.originalBAM.references, self.originalBAM.lengths)
+            )
+            return True
+        except IOError:
+            LOGGER.error("Cannot open input file")
+            return False
+
+    # 	def printFilteredReads(self):
+    # 		for i in self.filteredAlignment:
+    # 			print i
+
+    def printClusters(self):
+        outfile = open(self.outprefix + ".clusters.bed", "w")
+        for i in [self.clusters_plus, self.clusters_minus]:
+            for j in i:
+                print(j, file=outfile)
+        outfile.close()
+
+    def printMutations(self):
+        outfile = open(self.outprefix + ".mutations.bed", "w")
+        for i in list(self.mutations.values()):
+            print(i, file=outfile)
+        outfile.close()
+
+    def printMutations_chr(self):
+        fhs = {}  # file handle dic, key is chr, value is file handle
+        for chr, chrlen in self.refInfo:
+            outfile = open(self.outprefix + "." + chr + ".mutations.bed", "w")
+            fhs[chr] = outfile
+        for i in self.mutations.values():
+            print(i, file=fhs[i.chr])
+        # clean up
+        for fh in fhs.values():
+            fh.close()
+        del fhs
+
+    def printMutationSingleChr(self, chr):
+        outfile = open(self.outprefix + "." + chr + ".mutations.bed", "w")
+        for i in self.mutations.values():
+            print(i, file=outfile)
+        # clean up
+        outfile.close()
+
+    def printReliableMutations(self):
+        outfile = open(self.outprefix + ".reliableMutations.pipeclip.bed", "w")
+        header = "#chr\tstart\tstop\tmutation_name\tM_value\tstrand\ttype\tK_value\tp_value\tfdr"
+        print(header, file=outfile)
+        self.printEnrichedItem(self.sigMutations, outfile)
+
+    def printEnrichedClusters(self):
+        outfile = open(self.outprefix + ".enrichedClusters.pipeclip.bed", "w")
+        header = (
+            "#chr\tstart\tstop\tcluster_name\tread_count\tstrand\tp_value\tfdr"
+        )
+        print(header, file=outfile)
+        self.printReliableList(self.clusters, outfile)
+        return [self.outprefix + ".enrichedClusters.pipeclip.bed"]
+
+    def printCrosslinkingMutations(self):
+        outfile = open(
+            self.outprefix + ".crosslinkingMutations.pipeclip.bed", "w"
+        )
+        header = "#chr\tstart\tstop\tmutation_name\tM_value\tstrand\ttype\tK_value\tp_value\tfdr"
+        print(header, file=outfile)
+        self.printReliableList(self.crosslinkingMutations, outfile)
+
+    def printReliableList(self, mylist, fh):
+        for i in mylist:
+            if i.sig:
+                st = i.__str__()
+                st += "\t" + str(i.pvalue) + "\t" + str(i.qvalue)
+                print(st, file=fh)
+
+    def printList(self, mylist, fh):
+        for i in mylist:
+            print(i, file=fh)
+
+    def printEnrichedItem(self, dic, fh):
+        for k in list(dic.keys()):
+            for i in dic[k]:
+                st = i.__str__()
+                st += "\t" + str(i.pvalue) + "\t" + str(i.qvalue)
+                print(st, file=fh)
+
+    def printCrosslinkingSites(self):
+        output = self.outprefix
+        header = "#chr\tstart\tstop\tcluster_name\treads_count\tstrand\tcluster_fdr\tcrosslinking_fisherP\tmutation_pos\tmutation_name"
+        if self.type == 0:  # HITS-CLIP, three output
+            output_del = open(
+                output + "_deletion_crosslinking.pipeclip.txt", "w"
+            )
+            output_sub = open(
+                output + "_substitution_crosslinking.pipeclip.txt", "w"
+            )
+            output_ins = open(
+                output + "_insertion_crosslinking.pipeclip.txt", "w"
+            )
+            print(header, file=output_del)
+            print(header, file=output_sub)
+            print(header, file=output_ins)
+        else:
+            output_name = open(output + "_crosslinking.pipeclip.txt", "w")
+            print(header, file=output_name)
+        for k in list(self.crosslinking.keys()):
+            st = self.crosslinking[k].__str__()
+            st += "\t" + "\t".join(
+                [
+                    str(self.crosslinking[k].qvalue),
+                    str(self.crosslinking[k].fisherP),
+                ]
+            )
+            st += "\t" + ",".join(self.crosslinking[k].mutationStarts)
+            st += "\t" + ",".join(self.crosslinking[k].mutationNames)
+            if self.type == 0:
+                output_key = k.split("_")[-1]
+                if output_key == "Deletion":
+                    print(st, file=output_del)
+                elif output_key == "Insertion":
+                    print(st, file=output_ins)
+                elif output_key == "Substitution":
+                    print(st, file=output_sub)
+            else:
+                print(st, file=output_name)
+
+        if self.type == 0:
+            output_del.close()
+            output_sub.close()
+            output_ins.close()
+            return [
+                output + "_insertion_crosslinking.pipeclip.txt",
+                output + "_deletion_crosslinking.pipeclip.txt",
+                output + "_substitution_crosslinking.pipeclip.txt",
+            ]
+        else:
+            output_name.close()
+            return [output + "_crosslinking.pipeclip.txt"]
+
+    def updatePreviousQul(self, n, q, m):
+        self.previousQul[0] = n
+        self.previousQul[1] = q
+        self.previousQul[2] = m
+
+    def updateCurrentGroup(self, read, mlen, mis):
+        """Compare read to current duplication group parameters, determine to
+        add to, to drop or to replace, make sure duplication group only has
+        reads with best quality."""
+        if (
+            mlen >= self.previousQul[0]
+            and read.mapq >= self.previousQul[1]
+            and mis <= self.previousQul[2]
+        ):  # consider to append or replace only when read has no worse quality
+            if (
+                mlen > self.previousQul[0]
+                or read.mapq > self.previousQul[1]
+                or mis < self.previousQul[2]
+            ):  # read has better quality,replace
+                self.currentGroup = [read]
+                self.updatePreviousQul(mlen, read.mapq, mis)
+            else:
+                self.currentGroup.append(read)
+
+    def iniDupGroupInfo(self, read, group_key, mlength, mismatch):
+        self.currentGroupKey = group_key
+        self.currentGroup = [read]
+        self.updatePreviousQul(mlength, read.mapq, mismatch)
+
+    def rmdup(self):
+        """Return random one read of highest quality from list."""
+        # print "get one from group"
+        if len(self.currentGroup) == 1:
+            # print self.currentGroup[0]
+            return self.currentGroup[0]
+        else:
+            random.seed(1)
+            index = random.randint(0, len(self.currentGroup) - 1)
+            # print self.currentGroup[index]
+            return self.currentGroup[index]
+
+    def updateCluster(self, read):
+        """Cluster new read to known clusters and update cluster reads
+        count."""
+        strandDic = {"True": "-", "False": "+"}
+        clusterName = (
+            "cluster"
+            + "_"
+            + str(len(self.clusters_plus) + len(self.clusters_minus) + 1)
+        )
+        newRead = Alignment.ClusterBed(
+            self.originalBAM.getrname(read.tid),
+            read.pos,
+            read.pos + len(read.seq),
+            clusterName,
+            1,
+            strandDic[str(read.is_reverse)],
+        )
+        if read.is_reverse:
+            if self.currentCluster_minus.chr == "":  # Initiate cluster
+                self.currentCluster_minus = newRead
+                self.clusters_minus.append(self.currentCluster_minus)
+                self.clusterCount += 1
+            else:
+                if self.currentCluster_minus.overlap(newRead):
+                    self.currentCluster_minus.merge(newRead)
+                    self.clusters_minus[-1] = self.currentCluster_minus
+                else:  # New read is a new cluster
+                    # self.clusters.append(self.currentCluster)
+                    self.currentCluster_minus = newRead
+                    self.clusters_minus.append(self.currentCluster_minus)
+                    self.clusterCount += 1
+        else:
+            if self.currentCluster_plus.chr == "":  # Initiate cluster
+                self.currentCluster_plus = newRead
+                self.clusters_plus.append(self.currentCluster_plus)
+                self.clusterCount += 1
+            else:
+                if self.currentCluster_plus.overlap(newRead):
+                    self.currentCluster_plus.merge(newRead)
+                    self.clusters_plus[-1] = self.currentCluster_plus
+                else:  # New read is a new cluster
+                    # self.clusters.append(self.currentCluster)
+                    self.currentCluster_plus = newRead
+                    self.clusters_plus.append(self.currentCluster_plus)
+                    self.clusterCount += 1
+
+    def updateMutation(self, read, mis):
+        mutations = []
+        if self.type == 3:  # iclip,find truncation
+            mutations = mutation.getTruncations(self.originalBAM, read)
+        else:
+            mutations = mutation.getMutations(self.originalBAM, read)
+            if self.type == 1:
+                mutation_filter = Utils.filterMutations(
+                    mutations, "T->C", True
+                )
+                mutations = mutation_filter
+            elif self.type == 2:
+                mutation_filter = Utils.filterMutations(
+                    mutations, "G->A", True
+                )
+                mutations = mutation_filter
+        # LOGGER.debug("read %s " % read)
+        if len(mutations) > 0:
+            for m in mutations:
+                # print m
+                self.mutationCount += 1
+                m_key = "_".join([m.chr, str(m.start), m.strand, m.type])
+                if m_key in self.mutations:
+                    self.mutations[m_key].increaseScore()
+                else:
+                    self.mutations[m_key] = m
+
+    def updateCLIPinfo(self, read, matchlen, miscount):
+        """Update sample coverage info, clustering, mutation info."""
+        # LOGGER.debug("read %s, cigar %s,mismatch %d" % (read.qname,read.cigar,miscount))
+        # update sample coverage info
+        self.coverage += matchlen
+        # update cluster info
+        self.updateCluster(read)
+        # update mutation info
+        if miscount > 0:
+            self.updateMutation(read, miscount)
+
+    def addSigToDic(self, dic, mu):
+        """Add new mutation into the dictionary.Mutations should be sorted."""
+        if mu.chr in dic:
+            dic[mu.chr].append(mu)
+        else:
+            dic[mu.chr] = [mu]
+
+    def getCrosslinking(self):
+        """Merge enriched clusters and reliable mutations together Call
+        Enrich.fisherTest() to calculate joint p vlaue."""
+        for cluster in self.clusters:
+            # LOGGER.debug("P value of cluster is %f" % cluster.pvalue)
+            if cluster.sig and cluster.chr in self.sigMutations:
+                for mutation in self.sigMutations[cluster.chr]:
+                    # LOGGER.debug("Cluster loc %d,%d ; Mutation loc %d,%d, mutation type %s" % (cluster.start,cluster.stop,mutation.start,mutation.stop,mutation.type))
+                    if cluster.overlap(mutation):
+                        # LOGGER.debug("Overlapped")
+                        if self.type == 0:  # HITS-CLIP
+                            mutation_key = mutation.type.split("->")[0]
+                            if mutation_key in ["A", "C", "G", "T"]:
+                                mutation_key = "Substitution"
+                            cross_key = cluster.name + "_" + mutation_key
+                        else:
+                            cross_key = cluster.name
+                        if cross_key in self.crosslinking:
+                            # LOGGER.debug("Existing mutation pvalue:",self.crosslinking[cluster.name].mutationP)
+                            self.crosslinking[cross_key].addMutation(mutation)
+                            self.crosslinkingMutations.append(mutation)
+                        else:
+                            self.crosslinking[
+                                cross_key
+                            ] = Alignment.CrosslinkingBed(
+                                cluster.chr,
+                                cluster.start,
+                                cluster.stop,
+                                cluster.name,
+                                cluster.score,
+                                cluster.strand,
+                                cluster.pvalue,
+                                cluster.qvalue,
+                                mutation.start,
+                                mutation.name,
+                                mutation.pvalue,
+                            )
+                            self.crosslinkingMutations.append(mutation)
+        # start to calculate fisher test p value
+        for k in self.crosslinking.keys():
+            self.crosslinking[k].fishertest()
+
+    def filter(self, matchLen, mismatch, cliptype, duprm):
+        """Filter the input BAM file according to parameters.
+
+        Make clusters and mutations at the same time
+        """
+        LOGGER.info("Start to filter alignment using parameters:")
+        LOGGER.info("match length:%d" % (matchLen))
+        LOGGER.info("mismatch count: %d" % (mismatch))
+        LOGGER.info("CLIP type:%s" % (str(cliptype)))
+        LOGGER.info("Rmdup code:%s" % (str(duprm)))
+        LOGGER.info(
+            "There are %d reads in origianl input file"
+            % (self.originalBAM.mapped)
+        )
+        # print >> sys.stderr,zip(self.originalBAM.references,self.originalBAM.lengths)
+        # sys_info = os.system("free -m")
+        # LOGGER.debug(sys_info)
+        # outBAM = pysam.Samfile(outprefix+".filtered.bam","wb",template=self.originalBAM)
+        self.originalMapped = self.originalBAM.mapped
+        outBAM_pos = pysam.Samfile(
+            self.outprefix + ".pos.filtered.bam",
+            "wb",
+            template=self.originalBAM,
+        )
+        outBAM_neg = pysam.Samfile(
+            self.outprefix + ".neg.filtered.bam",
+            "wb",
+            template=self.originalBAM,
+        )
+        self.type = cliptype
+        if cliptype == 3:  # make sure there is no rmdup for iCLIP data
+            duprm = 0
+        count = 0
+        start_time = datetime.datetime.now()
+        for alignment in self.originalBAM:
+            # print "Now processing",alignment.qname
+            if not alignment.cigar:  # reads is unmapped
+                continue
+            count += 1
+            if count % 500000 == 0:
+                stop_time = datetime.datetime.now()
+                LOGGER.debug(
+                    "Processed %d reads in %s"
+                    % (count, str(stop_time - start_time))
+                )
+                start_time = stop_time
+            flag, mlen, mis = Utils.readQuaFilter(
+                alignment, matchLen, mismatch
+            )
+            if flag:
+                # print "Qualified read"
+                # print	alignment
+                # print "current Gourp key",self.currentGroupKey
+                if duprm > 0:
+                    # get group key
+                    if duprm == 1:
+                        groupkey = Utils.rmdupKey_Start(alignment)
+                    elif duprm == 2:
+                        groupkey = Utils.rmdupKey_Seq(alignment)
+                    # check current group
+                    if (
+                        groupkey == self.currentGroupKey
+                    ):  # overlap with current group, update group
+                        self.updateCurrentGroup(alignment, mlen, mis)
+                    else:  # get read from current group and discard it, use current read to start a new current group
+                        if (
+                            self.currentGroupKey != "None"
+                        ):  # current group exists
+                            keep = self.rmdup()
+                            # LOGGER.debug("Pop out read to keep %s" % keep)
+                            self.currentGroup = []
+                            self.filteredAlignment += 1
+                            flag, mlen, mis = Utils.readQuaFilter(
+                                keep, matchLen, mismatch
+                            )
+                            self.updateCLIPinfo(keep, mlen, mis)
+                            # outBAM.write(keep)
+                            if keep.is_reverse:
+                                outBAM_neg.write(keep)
+                            else:
+                                outBAM_pos.write(keep)
+                        self.iniDupGroupInfo(
+                            alignment, groupkey, mlen, mis
+                        )  # make new group using current alignment
+                else:  # there is no rmdup
+                    # LOGGER.debug("Good read, update clip info %s" % read.qname)
+                    self.filteredAlignment += 1
+                    self.updateCLIPinfo(alignment, mlen, mis)
+                    # outBAM.write(alignment)
+                    if alignment.is_reverse:
+                        outBAM_neg.write(alignment)
+                    else:
+                        outBAM_pos.write(alignment)
+        # clean up the final dupGroup, if rmdup==0, there is no final dupGroup
+        if len(self.currentGroup) > 0:
+            keep = self.rmdup()
+            self.currentGroup = []
+            self.filteredAlignment += 1
+            flag, mlen, mis = Utils.readQuaFilter(keep, matchLen, mismatch)
+            self.updateCLIPinfo(keep, mlen, mis)
+            # outBAM.write(alignment)
+            if keep.is_reverse:
+                outBAM_neg.write(keep)
+            else:
+                outBAM_pos.write(keep)
+        # LOGGER CLIP sample information
+        # outBAM.close()
+        outBAM_pos.close()
+        outBAM_neg.close()
+        # pysam.index(outprefix+".filtered.bam")
+        pysam.index(self.outprefix + ".pos.filtered.bam")
+        pysam.index(self.outprefix + ".neg.filtered.bam")
+        self.posfilteredBAM = self.outprefix + ".pos.filtered.bam"
+        self.negfilteredBAM = self.outprefix + ".neg.filtered.bam"
+        self.clusters = self.clusters_plus + self.clusters_minus
+        self.printClusters()
+        self.printMutations_chr()
+        # Clean up variables
+        self.originalBAM = None
+        self.clusters_plus = None
+        self.clusters_minus = None
+        self.mutations = None
+        gc.collect()
+        LOGGER.debug(
+            "After filtering, %d reads left" % (self.filteredAlignment)
+        )
+        LOGGER.debug("There are %d clusters in total" % (self.clusterCount))
+        LOGGER.debug("There are %d mutations in total" % (self.mutationCount))
+
+    def filter2(self, matchLen, mismatch, cliptype, duprm):
+        """Filter the input BAM file according to parameters.
+
+        Make clusters and mutations at the same time
+        """
+        LOGGER.info("Start to filter alignment using parameters:")
+        LOGGER.info("match length:%d" % (matchLen))
+        LOGGER.info("mismatch count: %d" % (mismatch))
+        LOGGER.info("CLIP type:%s" % (str(cliptype)))
+        LOGGER.info("Rmdup code:%s" % (str(duprm)))
+        LOGGER.info(
+            "There are %d reads in origianl input file"
+            % (self.originalBAM.mapped)
+        )
+
+        self.originalMapped = self.originalBAM.mapped
+        outBAM_pos = pysam.Samfile(
+            self.outprefix + ".pos.filtered.bam",
+            "wb",
+            template=self.originalBAM,
+        )
+        outBAM_neg = pysam.Samfile(
+            self.outprefix + ".neg.filtered.bam",
+            "wb",
+            template=self.originalBAM,
+        )
+        self.type = cliptype
+        if cliptype == 3:  # make sure there is no rmdup for iCLIP data
+            duprm = 0
+        count = 0
+        start_time = datetime.datetime.now()
+        currentChr = ""
+        for alignment in self.originalBAM:
+            if not alignment.cigar:  # reads is unmapped
+                continue
+            c_chr = self.originalBAM.getrname(alignment.tid)
+            if currentChr == "":
+                currentChr = c_chr
+            elif c_chr != currentChr:
+                # LOGGER.debug("Mutation sites for Current ref %s is %d" % (currentChr,len(self.mutations.keys())))
+                self.printMutationSingleChr(currentChr)
+                self.mutations = None
+                gc.collect()
+                self.mutations = {}
+                currentChr = c_chr
+            count += 1
+            if count % 5000000 == 0:
+                stop_time = datetime.datetime.now()
+                LOGGER.debug(
+                    "Processed %d reads in %s"
+                    % (count, str(stop_time - start_time))
+                )
+                start_time = stop_time
+            flag, mlen, mis = Utils.readQuaFilter(
+                alignment, matchLen, mismatch
+            )
+            if flag:
+                if duprm > 0:
+                    # get group key
+                    if duprm == 1:
+                        groupkey = Utils.rmdupKey_Start(alignment)
+                    elif duprm == 2:
+                        groupkey = Utils.rmdupKey_Seq(alignment)
+                    # check current group
+                    if (
+                        groupkey == self.currentGroupKey
+                    ):  # overlap with current group, update group
+                        self.updateCurrentGroup(alignment, mlen, mis)
+                    else:  # get read from current group and discard it, use current read to start a new current group
+                        if (
+                            self.currentGroupKey != "None"
+                        ):  # current group exists
+                            keep = self.rmdup()
+                            # LOGGER.debug("Pop out read to keep %s" % keep)
+                            self.currentGroup = []
+                            self.filteredAlignment += 1
+                            flag, mlen, mis = Utils.readQuaFilter(
+                                keep, matchLen, mismatch
+                            )
+                            self.updateCLIPinfo(keep, mlen, mis)
+                            # outBAM.write(keep)
+                            if keep.is_reverse:
+                                outBAM_neg.write(keep)
+                            else:
+                                outBAM_pos.write(keep)
+                        self.iniDupGroupInfo(
+                            alignment, groupkey, mlen, mis
+                        )  # make new group using current alignment
+                else:  # there is no rmdup
+                    # LOGGER.debug("Good read, update clip info %s" % read.qname)
+                    self.filteredAlignment += 1
+                    self.updateCLIPinfo(alignment, mlen, mis)
+                    # outBAM.write(alignment)
+                    if alignment.is_reverse:
+                        outBAM_neg.write(alignment)
+                    else:
+                        outBAM_pos.write(alignment)
+        # clean up the final dupGroup, if rmdup==0, there is no final dupGroup
+        if len(self.currentGroup) > 0:
+            keep = self.rmdup()
+            self.currentGroup = []
+            self.filteredAlignment += 1
+            flag, mlen, mis = Utils.readQuaFilter(keep, matchLen, mismatch)
+            self.updateCLIPinfo(keep, mlen, mis)
+            # outBAM.write(alignment)
+            if keep.is_reverse:
+                outBAM_neg.write(keep)
+            else:
+                outBAM_pos.write(keep)
+        # clean up current mutation hash
+        self.printMutationSingleChr(currentChr)
+        self.mutations = None
+        # LOGGER CLIP sample information
+        # outBAM.close()
+        outBAM_pos.close()
+        outBAM_neg.close()
+        pysam.index(self.outprefix + ".pos.filtered.bam")
+        pysam.index(self.outprefix + ".neg.filtered.bam")
+        self.posfilteredBAM = self.outprefix + ".pos.filtered.bam"
+        self.negfilteredBAM = self.outprefix + ".neg.filtered.bam"
+        self.clusters = self.clusters_plus + self.clusters_minus
+        self.printClusters()
+        # Clean up variables
+        self.originalBAM = None
+        self.clusters_plus = None
+        self.clusters_minus = None
+        self.mutations = None
+        gc.collect()
+        LOGGER.debug(
+            "After filtering, %d reads left" % (self.filteredAlignment)
+        )
+        LOGGER.debug("There are %d clusters in total" % (self.clusterCount))
+        LOGGER.debug("There are %d mutations in total" % (self.mutationCount))
diff --git a/pipeclip/Enrich.py b/pipeclip/Enrich.py
new file mode 100644
index 0000000..171d271
--- /dev/null
+++ b/pipeclip/Enrich.py
@@ -0,0 +1,484 @@
+#!/usr/bin/python
+# Programmer : beibei.chen@utsouthwestern.edu
+# Usage: Get reliable mutations using binomial distribution
+# Input: Filtered BAM, reads coverage (generated by SAMFilter.py), mutation file
+# Output: BED
+# Last modified: 18 Sep.2013
+
+
+import datetime
+import gc
+import math
+import os
+import subprocess
+import sys
+from collections import Counter
+from pathlib import Path
+
+import pysam
+import rpy2.robjects as robject
+from rpy2.robjects.packages import importr
+
+from . import Alignment, Utils
+
+LOGGER = Utils.get_logger(__name__)
+
+gc.enable()
+
+stats = importr("stats")
+
+
+def freqRank(readCount, rev=False):
+    key = sorted(list(readCount.keys()), reverse=rev)
+    r_rank = {}
+    rank = 0
+    for i in key:
+        rank += readCount[i]
+        r_rank[i] = rank
+    return r_rank
+
+
+def BH(pvalue, pRank, N):
+    a = N / float(pRank)
+    q = a * pvalue
+    qva = max(pvalue, q)
+    return qva
+
+
+# @profile
+def KMvalue(posmapfile, negmapfile, mufile):
+    """Calculate K(coverage) value for each mutation location Mutations are
+    already unique."""
+    km = []  # store mutations with updated k value
+    km_pair = {}  # Dic of count tuples of (k,m),key:"K_M"
+    count = 0
+    LOGGER.debug("make wig %s" % str(datetime.datetime.now()))
+    poswig = Utils.makeWig(posmapfile)
+    negwig = Utils.makeWig(negmapfile)
+    start_time = datetime.datetime.now()
+    LOGGER.debug("finish making wig %s" % str(start_time))
+    for item in mufile:
+        count += 1
+        if count % 5000 == 0:
+            stop_time = datetime.datetime.now()
+            LOGGER.debug(
+                "Counting K-M for %d mutation sites, using %s"
+                % (count, str(stop_time - start_time))
+            )
+            start_time = stop_time
+        st = []
+        strand = item.strand
+        M = item.score
+        K = 0
+        # LOGGER.debug("Time begin to pileup is %s" % (str(datetime.datetime.now())))
+        if strand == "+":
+            try:
+                K = poswig[item.chr][str(item.start)]
+            except:
+                continue
+
+        elif strand == "-":
+            try:
+                K = negwig[item.chr][str(item.start)]
+            except:
+                continue
+        if K >= M:
+            item.updateK(K)
+            # print item
+            # LOGGER.debug("K value for item %s is %d" % (item, K))
+            pair_name = str(K) + "_" + str(M)
+            if pair_name in km_pair:
+                km_pair[pair_name] += 1
+            else:
+                km_pair[pair_name] = 1
+            # km.append(item)
+    gc.collect()
+    return km_pair
+
+
+def KMvalue_test(clip, mutations, chr, chrlen):
+    """Calculate K(coverage) value for each mutation location Mutations are
+    already unique."""
+    km = []  # store mutations with updated k value
+    count = 0
+    # LOGGER.debug("make wig %s" % str(datetime.datetime.now()))
+    posBAM = pysam.Samfile(clip.posfilteredBAM, "rb")
+    negBAM = pysam.Samfile(clip.negfilteredBAM, "rb")
+    start_time = datetime.datetime.now()
+    poswig = Utils.makeWigByChr(posBAM, chr)
+    negwig = Utils.makeWigByChr(negBAM, chr)
+    stop_time = datetime.datetime.now()
+    # LOGGER.debug("Finished making wig for %s using %s" % (chr,str(stop_time-start_time)))
+    start_time = stop_time
+    for item in mutations:
+        count += 1
+        if count % 100000 == 0:
+            stop_time = datetime.datetime.now()
+            LOGGER.debug(
+                "Counting K-M for %d mutation sites, using %s"
+                % (count, str(stop_time - start_time))
+            )
+            start_time = stop_time
+        st = []
+        M = item.score
+        K = 0
+        strand = item.strand
+        if strand == "+":
+            try:
+                K = poswig[item.start]
+            except:
+                # log.warning("Did not find mutation in poswig")
+                continue
+        elif strand == "-":
+            try:
+                K = negwig[item.start]
+            except:
+                continue
+        if K >= M:
+            item.updateK(K)
+            pair_name = str(K) + "_" + str(M)
+            if pair_name in clip.kmpair:
+                clip.kmpair[pair_name] += 1
+            else:
+                clip.kmpair[pair_name] = 1
+    posBAM.close()
+    negBAM.close()
+
+
+def uniq(b):  # b is a list
+    uniqElements = []
+    for i in b:
+        if uniqElements.count(i) == 0:
+            uniqElements.append(i)
+    uniqElements.sort()
+    return uniqElements
+
+
+def mutationEnrich(clip, threshold=0.01):
+    coverage = clip.coverage * 1.0
+    totalMuCount = clip.mutationCount
+    mutations = []
+    total_test = 0
+    for chr, chrlen in clip.refInfo:
+        # LOGGER.debug(chr)
+        try:
+            mufile = open(clip.outprefix + "." + chr + ".mutations.bed")
+        except:
+            LOGGER.info(
+                "Cannot open mutation file %s , move on."
+                % (clip.outprefix + "." + chr + ".mutations.bed")
+            )
+            continue
+        for record in mufile:
+            total_test += 1
+            info = record.rstrip().split("\t")
+            new_mu = Alignment.MutationBed(
+                info[0],
+                int(info[1]),
+                int(info[2]),
+                info[3],
+                int(info[4]),
+                info[5],
+                info[6],
+            )
+            mutations.append(new_mu)
+            try:
+                pass
+                # os.remove(clip.outprefix+"."+chr+".mutations.bed")
+            except:
+                pass
+        LOGGER.debug(len(mutations))
+        KMvalue_test(
+            clip, mutations, chr, chrlen
+        )  # check after doing KM, if clip.mutations changed
+    try:
+        os.remove(clip.posfilteredBAM)
+        os.remove(clip.negfilteredBAM)
+        os.remove(clip.posfilteredBAM + ".bai")
+        os.remove(clip.negfilteredBAM + ".bai")
+    except:
+        pass
+    del clip.posfilteredBAM
+    del clip.negfilteredBAM
+    gc.collect()  # LOGGER.info("Finished K-M counting, starting fitting.")
+
+    R = robject.r
+    reliableList = []
+    P = totalMuCount / coverage
+    km_p = {}  # store km and corresponding p value
+    pvalues = []
+    for k in clip.kmpair:  # KM_test:
+        parameters = k.split("_")
+        p = R.pbinom(int(parameters[1]) - 1, int(parameters[0]), P, False)[0]
+        pvalues.append(p)
+        km_p[k] = p
+    pCount = dict(Counter(pvalues))
+    pRank = freqRank(pCount, True)
+    pqDic = {}
+    for i in list(pRank.keys()):
+        try:
+            p_rank = pRank[i]
+            q_value = BH(i, p_rank, total_test)
+            pqDic[i] = q_value
+        except:
+            print("Cannot find p value in dictionary", file=sys.stderr)
+            continue
+    count = 0
+    for mu in mutations:
+        name = str(mu.kvalue) + "_" + str(mu.score)
+        try:
+            mu.pvalue = km_p[name]
+        except:
+            # LOGGER.debug("Problem with %s" % mu)
+            continue
+        mu.qvalue = pqDic[mu.pvalue]
+        if mu.qvalue <= threshold:
+            count += 1
+            new_mutationName = "Mutation_" + str(count)
+            mu.name = new_mutationName
+            mu.sig = True
+            clip.sigMutationCount += 1
+            clip.addSigToDic(clip.sigMutations, mu)
+
+    mutations = None
+    gc.collect()
+
+
+def mutationEnrichWCtrl(clip, ctrlclip, threshold=0.01):
+    coverage = ctrlclip.coverage * 1.0
+    totalMuCount = ctrlclip.mutationCount
+    mutations = []
+    total_test = 0
+    for chr, chrlen in clip.refInfo:
+        try:
+            mufile = open(clip.outprefix + "." + chr + ".mutations.bed")
+        except:
+            LOGGER.info(
+                "Cannot open mutation file %s , move on."
+                % (clip.outprefix + "." + chr + ".mutations.bed")
+            )
+            continue
+        for record in mufile:
+            total_test += 1
+            info = record.rstrip().split("\t")
+            new_mu = Alignment.MutationBed(
+                info[0],
+                int(info[1]),
+                int(info[2]),
+                info[3],
+                int(info[4]),
+                info[5],
+                info[6],
+            )
+            mutations.append(new_mu)
+            try:
+                os.remove(clip.outprefix + "." + chr + ".mutations.bed")
+                os.remove(ctrlclip.outprefix + "." + chr + ".mutations.bed")
+            except:
+                pass
+        KM_test = KMvalue_test(
+            clip, mutations, chr, chrlen
+        )  # check after doing KM, if clip.mutations changed
+    try:
+        os.remove(clip.posfilteredBAM)
+        os.remove(clip.negfilteredBAM)
+        os.remove(clip.posfilteredBAM + ".bai")
+        os.remove(clip.negfilteredBAM + ".bai")
+    except:
+        pass
+    del clip.posfilteredBAM
+    del clip.negfilteredBAM
+    gc.collect()  # LOGGER.info("Finished K-M counting, starting fitting.")
+
+    R = robject.r
+    reliableList = []
+    P = totalMuCount / coverage
+    km_p = {}  # store km and corresponding p value
+    pvalues = []
+    for k in clip.kmpair:  # KM_test:
+        parameters = k.split("_")
+        p = R.pbinom(int(parameters[1]) - 1, int(parameters[0]), P, False)[0]
+        pvalues.append(p)
+        km_p[k] = p
+    pCount = dict(Counter(pvalues))
+    pRank = freqRank(pCount, True)
+    pqDic = {}
+    for i in list(pRank.keys()):
+        try:
+            p_rank = pRank[i]
+            q_value = BH(i, p_rank, total_test)
+            pqDic[i] = q_value
+        except:
+            print("Cannot find p value in dictionary", file=sys.stderr)
+            continue
+    count = 0
+    for mu in mutations:
+        name = str(mu.kvalue) + "_" + str(mu.score)
+        try:
+            mu.pvalue = km_p[name]
+        except:
+            # LOGGER.debug("Problem with %s" % mu)
+            continue
+        mu.qvalue = pqDic[mu.pvalue]
+        if mu.qvalue <= threshold:
+            count += 1
+            new_mutationName = "Mutation_" + str(count)
+            mu.name = new_mutationName
+            mu.sig = True
+            clip.sigMutationCount += 1
+            clip.addSigToDic(clip.sigMutations, mu)
+
+    mutations = None
+    gc.collect()
+
+
+def clusterEnrich(clip, threshold=0.01):
+    cluster_filename = (
+        clip.outprefix + ".clusters.bed"
+    )  # clip.filepath.split("/")[-1].split(".")[0]
+    # Call R code and get result
+    epsilon = [0.01, 0.15, 0.1]
+    step = [0.1, 0.08, 0.05]
+    for index in range(len(epsilon)):
+        e = epsilon[index]
+        s = step[index]
+        r_script = Path(__file__).parent.resolve() / "ZTNB_tryCatch.R"
+        r_args = [
+            "Rscript",
+            r_script,
+            cluster_filename,
+            str(threshold),
+            str(e),
+            str(s),
+        ]
+        gc.collect()
+        p = subprocess.Popen(r_args)
+        stdout_value = p.communicate()[0]
+        try:
+            r_output_log = open(cluster_filename + ".pipeclip.ztnblog", "r")
+            # LOGGER.debug("Log file opened")
+            flag = r_output_log.read(1)
+            if flag == "Y":  # converged
+                break
+            elif flag == "N":
+                continue
+        except:
+            LOGGER.info(
+                "No log file was produced by R code, continue regression using other parameters anyway."
+            )
+            continue
+
+    # check ztnb file
+    try:
+        enrich_parameter = open(cluster_filename + ".pipeclip.ztnb", "r")
+    except IOError as message:
+        LOGGER.error("Cannot open ztnb result file")
+        return False
+    nbDic = {}
+    for item in enrich_parameter:
+        buf = item.rstrip().split("\t")
+        if buf[0] != "#":
+            nb_key = "_".join(buf[0:2])  # reads_length as key
+            # LOGGER.debug("NB_key %s" % nb_key)
+            if nb_key not in nbDic:
+                nbDic[nb_key] = (buf[2], buf[3])  # pvalue and qvalue
+    # LOGGER.info("There are %d read-length pairs" % (len(nbDic.keys())))
+    if len(list(nbDic.keys())) == 0:
+        LOGGER.error("There are no read-length pairs found by ZTNB. Exit.")
+        return False
+    else:
+        for i in range(len(clip.clusters)):
+            r_key = (
+                str(clip.clusters[i].score)
+                + "_"
+                + str(clip.clusters[i].stop - clip.clusters[i].start)
+            )
+            # LOGGER.debug("Keys from clip.clusters,%s" % r_key)
+            if r_key in nbDic:
+                clip.clusters[i].pvalue = nbDic[r_key][0]
+                clip.clusters[i].qvalue = nbDic[r_key][1]
+                clip.clusters[i].sig = True
+                clip.sigClusterCount += 1
+        nbDic = None
+        try:
+            os.remove(cluster_filename)
+        except:
+            pass
+        if clip.sigClusterCount == 0:
+            return False
+        else:
+            return True
+
+
+def clusterEnrich_outsource(clip, threshold=0.01):
+    cluster_filename = clip.outprefix + ".clusters.bed"
+    # Call R code and get result
+    epsilon = [0.01, 0.15, 0.1]
+    step = [0.1, 0.08, 0.05]
+    for e in epsilon:
+        for s in step:
+            r_script = Path(__file__).parent.resolve() / "ZTNB_tryCatch.R"
+            r_args = [
+                "Rscript",
+                r_script,
+                cluster_filename,
+                str(threshold),
+                str(e),
+                str(s),
+            ]
+            p = subprocess.Popen(r_args)
+            stdout_value = p.communicate()[0]
+
+    # check ztnb file
+    try:
+        enrich_parameter = open(cluster_filename + ".pipeclip.ztnb", "r")
+    except IOError:
+        LOGGER.error("Cannot open ztnb result file")
+        return False
+    nbDic = {}
+    for item in enrich_parameter:
+        buf = item.rstrip().split("\t")
+        if buf[0] != "#":
+            nb_key = "_".join(buf[0:2])  # reads_length as key
+            # LOGGER.debug("NB_key %s" % nb_key)
+            if nb_key not in nbDic:
+                nbDic[nb_key] = (buf[2], buf[3])  # pvalue and qvalue
+    # LOGGER.info("There are %d read-length pairs" % (len(nbDic.keys())))
+    if len(list(nbDic.keys())) == 0:
+        LOGGER.error("There are no read-length pairs found by ZTNB. Exit.")
+        return False
+    else:
+        for i in range(len(clip.clusters)):
+            r_key = (
+                str(clip.clusters[i].score)
+                + "_"
+                + str(clip.clusters[i].stop - clip.clusters[i].start)
+            )
+            # LOGGER.debug("Keys from clip.clusters,%s" % r_key)
+            if r_key in nbDic:
+                clip.clusters[i].pvalue = nbDic[r_key][0]
+                clip.clusters[i].qvalue = nbDic[r_key][1]
+                clip.clusters[i].sig = True
+                clip.sigClusterCount += 1
+        nbDic = None
+        try:
+            os.remove(cluster_filename)
+        except:
+            pass
+        if clip.sigClusterCount == 0:
+            return False
+        else:
+            return True
+
+
+def fisherTest(clusterp, mutationp):
+    R = robject.r
+    min_mp = min(mutationp)
+    product = clusterp * min_mp
+    if product == 0:
+        fps = 0
+    else:
+        xsq = -2 * math.log(clusterp * min_mp)
+        fp = R.pchisq(xsq, **{"df": 4, "lower.tail": False, "log.p": True})[0]
+        fps = math.exp(fp)
+    return fps
diff --git a/pipeclip/Utils.py b/pipeclip/Utils.py
new file mode 100644
index 0000000..754e446
--- /dev/null
+++ b/pipeclip/Utils.py
@@ -0,0 +1,152 @@
+#!/usr/bin/python
+# programmer: beibei.chen@utsouthwestern.edu
+# Usage: definition of utilities to handel pysam classes
+
+import logging
+import subprocess
+import sys
+
+from . import Alignment
+
+try:
+    assert sys.version_info > (3, 6)
+except AssertionError:
+    raise RuntimeError("Requires Python 3.6+!")
+
+
+def get_logger(name: str) -> logging.Logger:
+    """global logging."""
+    logger: logging.Logger = logging.getLogger(name)
+    if not logger.handlers:
+        handler: logging.StreamHandler = logging.StreamHandler()
+        formatter: logging.Formatter = logging.Formatter(
+            "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
+        )
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+        #  logger.setLevel(logging.DEBUG)
+        logger.setLevel(logging.INFO)
+    return logger
+
+
+LOGGER: logging.Logger = get_logger(__name__)
+
+
+def is_sorted(header):
+    """Get a BAM header, and check if this file is sorted or not
+    Return: True: sorted, False: unsorted
+    Header variable is a list
+    """
+    for row in header.split("\n"):
+        buf = row.rstrip().split("\t")
+        if buf[0] == "@HD":
+            if buf[2] in ["SO:unsorted", "SO:unknown"]:
+                return False
+            elif buf[2] in ["SO:coordinate", "SO:queryname"]:
+                return True
+    # If no HD header contains SO info, return False
+    return False
+
+
+def readQuaFilter(read, mlen, mis):
+    matchlen = 0
+    mismatch = 0
+    # LOGGER.debug("read %s,cigar %s,tags %s" % (read.qname,read.cigar,read.tags))
+    try:
+        for i in read.cigar:
+            # LOGGER.debug(i)
+            if i[0] == 0:
+                matchlen += i[1]
+            elif i[0] == 1:
+                mismatch += i[1]
+        for j in read.tags:
+            if j[0] == "NM":
+                mismatch += j[1]
+                break
+        # LOGGER.debug("matchlen %d,mismatch %d" % (matchlen,mismatch))
+    except:
+        # LOGGER.debug("Problematic read %s" % (read))
+        pass
+    if matchlen >= mlen and mismatch <= mis:
+        return (True, matchlen, mismatch)
+    else:
+        return (False, 0, 0)
+
+
+def rmdupKey_Start(read):
+    # print >>sys.stderr,"Remove duplicate by alignment start"
+    k = str(read.tid) + ":" + str(read.pos) + ":" + str(read.is_reverse)
+    # print >>sys.stderr,k
+    return k
+
+
+def rmdupKey_Seq(read):
+    k = (
+        str(read.tid)
+        + ":"
+        + str(read.pos)
+        + ":"
+        + str(read.is_reverse)
+        + ":"
+        + read.seq
+    )
+    return k
+
+
+def filterMutations(mlist, feature, keep=True):
+    newList = []
+    for i in mlist:
+        if i.type == feature:
+            if keep:
+                newList.append(i)
+        else:
+            if not keep:
+                newList.append(i)
+    return newList
+
+
+def annotation(fileaddr, species):
+    """Call Homer annotation script."""
+    outfile = open(fileaddr + ".anno", "w")
+    args = ["annotatePeaks.pl", fileaddr, species]
+    p = subprocess.Popen(args, stdout=outfile)
+    stdout_value = p.communicate()[0]
+    outfile.close()
+
+
+def makeWig(bamfile):
+    wig = {}
+    for r in bamfile.references:
+        wig[r] = {}
+        for pileupColumn in bamfile.pileup(r):
+            wig[r][str(pileupColumn.pos)] = pileupColumn.n
+    return wig
+
+
+def makeWigByChr(bamfile, chr):
+    wig = {}
+    for pileupColumn in bamfile.pileup(chr):
+        wig[pileupColumn.pos] = pileupColumn.n
+    return wig
+
+
+def makeWigListByChr(bamfile, chr):
+    wig = Alignment.wiglist()
+    for pileupColumn in bamfile.pileup(chr):
+        wig.update(pileupColumn.pos, pileupColumn.n)
+    return wig
+
+
+def makeWigListByChr_array(bamfile, chr, chrlen):
+    wig = []
+    for _ in range(chrlen):
+        wig.append(0)
+    for pileupColumn in bamfile.pileup(chr):
+        wig[pileupColumn.pos] = pileupColumn.n
+    return wig
+
+
+def bisort(alist, b):
+    """List is a list of bed instances, b is also an instance."""
+    if isinstance(alist[0], Alignment.BED) and isinstance(b, Alignment.BED):
+        pass
diff --git a/pipeclip/ZTNB_tryCatch.R b/pipeclip/ZTNB_tryCatch.R
new file mode 100644
index 0000000..70f629c
--- /dev/null
+++ b/pipeclip/ZTNB_tryCatch.R
@@ -0,0 +1,188 @@
+#!/usr/bin/env Rscript
+# -*- coding: utf-8 -*-
+
+# Require at least R3.0 and package VGAM and dependencies
+# Programmer: jonghyun.yun@utsouthwestern.edu and beibei.chen@utsouthwestern.edu
+# Usage: Get all the reads count and length combinations of enriched clusters
+# Input: BED file with score column as reads number
+# Output: table with reads count, length, p value and fdr
+# Last modified: 20 Sep 2014
+
+# command line parameters:
+# 1:input file name. After running the code, two output files: input.ztnb and input.ztnblog will be created
+# 2:FDR cutoff
+# 3:Regression epsilon
+# 4:Regression step
+
+required_packages <- c("MASS", "VGAM")
+for (p in required_packages) {
+  if (!require(p, character.only = TRUE)) {
+    install.packages(p, repos = "https://cloud.r-project.org")
+    suppressMessages(require(p, character.only = TRUE))
+  }
+}
+
+args <- commandArgs(TRUE)
+data <- read.table(args[1], sep = "\t")
+len <- data[, 3] - data[, 2]
+read <- data[, 5]
+cut <- as.numeric(as.character(args[2]))
+vglm_epsilon <- as.numeric(as.character(args[3]))
+vglm_step <- as.numeric(as.character(args[4]))
+# tabularize the data
+wts <- table(read, len)
+temp_coln <- as.integer(names(wts[1, ]))
+temp_rown <- as.integer(names(wts[, 1]))
+drow <- length(temp_rown)
+dcol <- length(temp_coln)
+rown <- matrix(rep(temp_rown, dcol), drow, dcol)
+coln <- t(matrix(rep(temp_coln, drow), dcol, drow))
+wdx <- which(wts > 0)
+tread <- rown[wdx]
+tlen <- coln[wdx]
+twts <- wts[wdx]
+# print("local polynomial regression")
+# local polynomial regression: read ~ len
+# smoothing parameter:
+alp <- 0.95
+# polynomial degree:
+pd <- 1
+lregfit <- loess(
+  tread ~ tlen,
+  weights = twts,
+  span = alp,
+  family = "symmetric",
+  degree = pd
+)
+mu <- lregfit$fitted
+mu[mu <= 0] <- min(exp(-4), mu[mu > 0])
+# bounded mean function
+logmu <- log(mu)
+# compute p-values using N(0,1) for (tread[-cdx] - mu[-cdx]) / sqrt(mu[-cdx])
+nb.pvalue <- numeric(length(tread))
+zscore <- (tread - mu) / sqrt(mu)
+cdx <- which(zscore < 4)
+nb.pvalue[-cdx] <- pnorm(zscore[-cdx], lower.tail = FALSE)
+tread_fit <- tread[cdx]
+tlen_fit <- tlen[cdx]
+twts_fit <- twts[cdx]
+lregfit <- loess(
+  tread_fit ~ tlen_fit,
+  weights = twts_fit,
+  span = alp,
+  family = "symmetric",
+  degree = pd
+)
+mu <- lregfit$fitted
+mu[mu <= 0] <- min(exp(-4), mu[mu > 0])
+# bounded mean function
+logmu <- log(mu)
+# negative binomail regression with the known predictor log(mu)
+
+intercept1 <- seq(-1, 1, 0.5)
+intercept2 <- seq(-1, 1, 0.5)
+biggest_likelihood <- -99999999
+khat <- 0
+muhat <- 0
+converge_flag <- FALSE
+errmsg <- paste("Error/warning messages for run using Epsilon:", vglm_epsilon, "and step size:", vglm_step, ".")
+options(warn = 1)
+# use default setting first to see if it could converge
+nb <- tryCatch(
+  {
+    vglm(tread_fit ~ 1, posnegbinomial(), weight = twts_fit, maxit = 200, trace = FALSE, step = vglm_step, offset = logmu, epsilon = vglm_epsilon, silent = FALSE)
+  },
+  warning = function(w) {
+    # print(paste("typeof warning",typeof(w["message"])))
+    warnmsg <- strsplit(toString(w["message"]), " iteration")[[1]][1]
+    if (grepl("convergence not obtained", warnmsg)) {
+      newmsg <- paste("Convergence not obtained in 200 iterarions")
+      return(newmsg)
+    }
+  },
+  error = function(e) {
+    newmsg <- paste("Program failed to converge.")
+    return(newmsg)
+  },
+  finally = {
+  }
+) # end of try-catch
+if (length(nb) > 0) {
+  if (typeof(nb) == "S4") {
+    # print("Converged, get regression class")
+    if (logLik(nb) >= biggest_likelihood & Coef(nb)["size"] <= 100) {
+      biggest_likelihood <- logLik(nb)
+      khat <- Coef(nb)["size"]
+      muhat <- Coef(nb)["munb"]
+    }
+  } else if (typeof(nb) == "character") {
+    errmsg <- paste(errmsg, nb, sep = "\n")
+  }
+}
+
+
+if ((biggest_likelihood == -99999999) && (khat == 0) && (muhat == 0)) { # try different coef start
+  for (i in intercept1)
+  {
+    for (j in intercept2)
+    {
+      nb <- tryCatch(
+        {
+          vglm(tread_fit ~ 1, posnegbinomial(), weight = twts_fit, maxit = 200, trace = FALSE, step = vglm_step, offset = logmu, epsilon = vglm_epsilon, silent = FALSE, coefstart = c(i, j))
+        },
+        warning = function(w) {
+          warnmsg <- strsplit(toString(w["message"]), " iteration")[[1]][1]
+          if (grepl("convergence not obtained", warnmsg)) {
+            newmsg <- paste("Using coefstart c(", i, ",", j, "), convergence not obtained in 200 iterarions")
+            return(newmsg)
+          }
+        },
+        error = function(e) {
+          newmsg <- paste("Using coefstart c(", i, ",", j, "), program failed to converge.")
+          return(newmsg)
+        },
+        finally = {
+        }
+      ) # end of try-catch
+      if (length(nb) > 0) {
+        if (typeof(nb) == "S4") {
+          # print("Converged, get regression class")
+          if (logLik(nb) >= biggest_likelihood & Coef(nb)["size"] <= 100) {
+            biggest_likelihood <- logLik(nb)
+            khat <- Coef(nb)["size"]
+            muhat <- Coef(nb)["munb"]
+          }
+        } else if (typeof(nb) == "character") {
+          errmsg <- paste(errmsg, nb, sep = "\n")
+        }
+      }
+    } # end of loop intercept2
+  } # end of loop intercept1
+} # end of try different coefstart
+
+if ((biggest_likelihood == -99999999) && (khat == 0) && (muhat == 0)) {
+  # No model converged,exit
+  errmsg <- paste("N:No model converged for this run.", errmsg, sep = "\n")
+} else {
+  # model parameters
+  errmsg <- paste("Y:Coverged for this run.", errmsg, sep = "\n")
+  write(errmsg, paste(args[1], ".Converge.txt", sep = ""))
+  nbsize <- khat * mu
+  nbmu <- muhat * mu
+  nb.pvalue[cdx] <- pnbinom(
+    tread_fit - 1,
+    size = nbsize,
+    mu = nbmu,
+    lower.tail = FALSE
+  ) / (1 - dnbinom(0, size = nbsize, mu = nbmu))
+  nb.fdr <- p.adjust(nb.pvalue, method = "BH")
+  # output
+  nbdx <- nb.fdr <= cut
+  # sum(twts[nbdx]); # the number of clusters whose FDR <= cut
+  # read counts and cluster length of clusters whose FDR <= cut
+  nb.out <- as.data.frame(matrix(c(tread[nbdx], tlen[nbdx], nb.pvalue[nbdx], nb.fdr[nbdx]), sum(nbdx), 4))
+  names(nb.out) <- c("read", "length", "p", "fdr")
+  outname <- paste(args[1], ".pipeclip.ztnb", sep = "")
+  write.table(nb.out, outname, sep = "\t", quote = F, row.names = F)
+} # end of output
+write(errmsg, paste(args[1], ".pipeclip.ztnblog", sep = ""))
diff --git a/pipeclip/__init__.py b/pipeclip/__init__.py
new file mode 100644
index 0000000..2175132
--- /dev/null
+++ b/pipeclip/__init__.py
@@ -0,0 +1,2 @@
+# indicate this folder contains packages
+from . import CLIP, Alignment, Enrich, Utils, mutation
diff --git a/pipeclip/barcode_removal.py b/pipeclip/barcode_removal.py
new file mode 100644
index 0000000..d8de64f
--- /dev/null
+++ b/pipeclip/barcode_removal.py
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# Programmer : beibei.chen@utsouthwestern.edu
+# Usage: remove PCR duplicates by comparing the sequences and return fastq file with barcode removed.
+# Input: fastq file
+# Output: fastq file
+# Last modified: 28 June 2014 - Beibei Chen <beibei.chen@utsouthwestern.edu>
+
+import sys
+
+
+class fastq:
+    def __init__(self, x):
+        self.id = x[0]
+        self.seq = x[1]
+        self.name = x[2]
+        self.quality = x[3]
+
+    def __str__(self):
+        st = self.id + self.seq + self.name + self.quality
+        return st
+
+
+class fqList:
+    def __init__(self):
+        self.data = []
+        self.unique = {}
+
+    def readFq(self, fh, bl):
+        st = []
+        buf = fh.readlines()
+        for i in range(len(buf)):
+            if buf[i][0] == "@" and buf[i + 2][0] == "+":
+                st.append(buf[i])
+                st.append(buf[i + 1])
+                st.append(buf[i + 2])
+                st.append(buf[i + 3].rstrip())
+                self.addFq(st)
+                if buf[i + 1] not in self.unique:
+                    self.addUniqFq(st, bl)
+                st = []
+
+    def addFq(self, s):
+        newFq = fastq(s)
+        self.data.append(newFq)
+
+    def addUniqFq(self, s, b):
+        seq_key = s[1]
+        offset = b
+        newFq = fastq([s[0], s[1][offset:], s[2], s[3][offset:]])
+        self.unique[seq_key] = newFq
+
+
+class barcodeRemover:
+    def __init__(self, infile, barLen):
+        self.infile = infile
+        self.barLen = barLen
+
+    def run(self):
+        myfq = fqList()
+        myfq.readFq(self.infile, self.barLen)
+        for item in list(myfq.unique.values()):
+            if len(item.seq) >= 15:
+                print(item)
+
+
+def barCodeRemovalMain(infilePath, barLenStr):
+    try:
+        infile = open(infilePath, "r+")
+    except IOError as message:
+        print("cannot open file", message, file=sys.stderr)
+        sys.exit(1)
+    try:
+        barLen = int(barLenStr)
+    except:
+        barLen = 5
+    barcodeRemover = barcodeRemover(infile, barLen)
+    barcodeRemover.run()
+
+
+def barCodeRemovalMain():
+    try:
+        infile = open(sys.argv[1], "r+")
+    except IOError as message:
+        print("cannot open file", message, file=sys.stderr)
+        sys.exit(1)
+    try:
+        barLen = int(sys.argv[2])
+    except:
+        barLen = 5
+    myBarcodeRemover = barcodeRemover(infile, barLen)
+    myBarcodeRemover.run()
+
+
+if __name__ == "__main__":
+    barCodeRemovalMain()
diff --git a/pipeclip/main.py b/pipeclip/main.py
new file mode 100644
index 0000000..dcba40a
--- /dev/null
+++ b/pipeclip/main.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2022 Ye Chang yech1990@gmail.com
+# Distributed under terms of the GNU license.
+#
+# Created: 2022-04-26 18:14
+
+"""Fork from:
+
+-  Main pipeline connects all the scripts together
+-  Original Programmer: beibei.chen@utsouthwestern.edu
+-  Refactored by: eric.roos@utsouthwestern.edu
+-  Usage: python pipeclip.py input.sam output_prefix match_length mismatch_number pcr_rm fdr_cluster clip_type fdr_mutation species
+-  Required packages: pysam, pybedtools, rpy2
+"""
+
+
+import pathlib
+import sys
+
+import click
+
+from . import CLIP, Enrich, Utils
+
+LOGGER = Utils.get_logger(__name__)
+
+
+@click.command(help="PIPECLIP v2.0.10", no_args_is_help=True)
+@click.option(
+    "--infile", "-i", "infile", help="Input bam file.", required=True
+)
+@click.option(
+    "--control", "-t", "control", help="Control bam file.", required=False
+)
+@click.option(
+    "--output",
+    "-o",
+    "output_prefix",
+    help="Output file, default is stdout.",
+    required=True,
+)
+@click.option(
+    "--matchLength",
+    "-l",
+    "matchLength",
+    help="Minimum matched segment length.",
+    type=int,
+    required=True,
+)
+@click.option(
+    "--mismatch",
+    "-m",
+    "mismatch",
+    help="Maximum mismatch number.",
+    type=int,
+    required=True,
+)
+@click.option(
+    "--clipType",
+    "-c",
+    "clipType",
+    help="CLIP type (0)HITS-CLIP; (1)PAR-4SU; (2)PAR-6SG; (3)iCLIP",
+    type=click.IntRange(0, 3),
+    required=True,
+)
+@click.option(
+    "--rmdup",
+    "-r",
+    "rmdup",
+    help="Remove PCR duplicate (0)No removal; (1)Remove by read start; (2)Remove by sequence;",
+    type=click.IntRange(0, 2),
+    required=True,
+)
+@click.option(
+    "--fdrMutation",
+    "-M",
+    "fdrReliableMutation",
+    help="FDR for reliable mutations.",
+    type=float,
+    required=True,
+)
+@click.option(
+    "--fdrCluster",
+    "-C",
+    "fdrEnrichedCluster",
+    help="FDR for enriched clusters.",
+    type=float,
+    required=True,
+)
+@click.option(
+    "--species",
+    "-s",
+    "species",
+    help="Species (hg19, mm9, mm10...)",
+    type=click.Choice(["mm9", "mm10", "hg19"]),
+    required=False,
+)
+def runPipeClip(
+    infile,
+    control,
+    output_prefix,
+    matchLength,
+    mismatch,
+    rmdup,
+    fdrEnrichedCluster,
+    clipType,
+    fdrReliableMutation,
+    species,
+):
+    # make output directory
+    pathlib.Path(output_prefix).parent.mkdir(parents=True, exist_ok=True)
+    # run clip analysis
+    myClip = CLIP.CLIP(infile, output_prefix)
+    controlFlag = False
+    if control != None:
+        controlClip = CLIP.CLIP(control, output_prefix + "Control")
+    LOGGER.info("Start to run")
+    if myClip.testInput():  # check input
+        LOGGER.info("Input file OK,start to run PIPE-CLIP")
+        LOGGER.info("Species info %s" % species)
+        if control != None:  # test control file
+            if controlClip.testInput():
+                LOGGER.info(
+                    "Control file OK. Use control in mutation enrichment."
+                )
+                controlFlag = True
+            else:
+                LOGGER.info(
+                    "Control file format error. Continue without control."
+                )
+        if myClip.readfile():
+            myClip.filter2(matchLength, mismatch, clipType, rmdup)
+            if controlFlag:
+                LOGGER.info("Read in control file")
+                controlClip.readfile()
+                controlClip.filter2(matchLength, mismatch, clipType, rmdup)
+            # myClip.printClusters()
+            # myClip.printMutations()
+            if myClip.clusterCount > 0:
+                LOGGER.info("Get enriched clusters")
+                status = Enrich.clusterEnrich_outsource(
+                    myClip, fdrEnrichedCluster
+                )
+                if status:
+                    LOGGER.info(
+                        "Found %d enriched clusters" % myClip.sigClusterCount
+                    )
+                    myClip.printEnrichedClusters()
+                else:
+                    LOGGER.error(
+                        "There is no enriched cluster found. Exit program"
+                    )
+                    sys.exit(1)
+            else:
+                LOGGER.error(
+                    "There is no clusters found. Please check input.Exit program."
+                )
+                sys.exit(1)
+
+            if myClip.mutationCount > 0:
+                LOGGER.info("Get reliable mutations")
+                if controlFlag:  # use control
+                    Enrich.mutationEnrichWCtrl(
+                        myClip, controlClip, fdrReliableMutation
+                    )
+                else:
+                    Enrich.mutationEnrich(myClip, fdrReliableMutation)
+                LOGGER.info(
+                    "There are %d reliable mutations" % myClip.sigMutationCount
+                )
+                myClip.printReliableMutations()
+            else:
+                LOGGER.warning("There is no mutation found in this BAM file.")
+            # Start to get crosslinking sites
+            if myClip.sigClusterCount > 0 and myClip.sigMutationCount > 0:
+                LOGGER.info("Get cross-linking sites")
+                myClip.getCrosslinking()
+                if len(list(myClip.crosslinking.keys())) > 0:
+                    outfilelist = myClip.printCrosslinkingSites()
+                    myClip.printCrosslinkingMutations()
+                else:
+                    LOGGER.warning(
+                        "There is no crosslinking found. May be caused by no reliable mutations in enriched clusters. Print out enriched clusters instead."
+                    )
+                    outfilelist = myClip.printEnrichedClusters()
+            else:
+                if myClip.sigClusterCount <= 0:
+                    LOGGER.error(
+                        "There is no enriched clusters for this sample, please check your input file. Exit."
+                    )
+                    sys.exit(2)
+                elif myClip.sigMutationCount <= 0:
+                    LOGGER.warning(
+                        "There is no reliable mutations found. PIPE-CLIP will provide enriched clusters as crosslinking candidates."
+                    )
+                    outfilelist = myClip.printEnrichedClusters()
+            # annotation if possible
+        if species in ["mm10", "mm9", "hg19"]:
+            LOGGER.info("Started to annotate cross-linking sits using HOMER")
+            for name in outfilelist:
+                # LOGGER.debug("Start to do annotation for %s" % name)
+                Utils.annotation(name, species)
+        # output a status log file
+        logfile = open(output_prefix + ".pipeclip.summary.log", "w")
+        print("PIPE-CLIP run finished. Parameters are:", file=logfile)
+        print(
+            "Input BAM: %s \nOutput prefix: %s \nMinimum matched length: %d \nMaximum mismatch count: %d \nPCR duplicate removal code: %d \nFDR for enriched clusters: %f \nFDR for reliable mutations: %f"
+            % (
+                infile,
+                output_prefix,
+                matchLength,
+                mismatch,
+                rmdup,
+                fdrEnrichedCluster,
+                fdrReliableMutation,
+            ),
+            file=logfile,
+        )
+        print(
+            "There are %d mapped reads in input BAM file. After filtering,%d reads left"
+            % (myClip.originalMapped, myClip.filteredAlignment),
+            file=logfile,
+        )
+        print(
+            "%d out of %d clusters are enriched."
+            % (myClip.sigClusterCount, len(myClip.clusters)),
+            file=logfile,
+        )
+        print(
+            "%d out of %d mutations are reliable."
+            % (myClip.sigMutationCount, myClip.mutationCount),
+            file=logfile,
+        )
+        print(
+            "%d crosslinking site candidates are found, with %d supporting reliable mutations."
+            % (
+                len(list(myClip.crosslinking.keys())),
+                len(myClip.crosslinkingMutations),
+            ),
+            file=logfile,
+        )
+        logfile.close()
+        LOGGER.info(
+            "PIPE-CLIP finished the job, please check your results. :)"
+        )
+    else:
+        LOGGER.error("File corruputed, program exit.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    runPipeClip()
diff --git a/pipeclip/mutation.py b/pipeclip/mutation.py
new file mode 100644
index 0000000..1cdbda8
--- /dev/null
+++ b/pipeclip/mutation.py
@@ -0,0 +1,347 @@
+#!/usr/bin/python
+# programmer : bbc
+# usage:
+# output in BED format, the score is the offset of the mutation from 5' end
+
+import copy
+
+from . import Alignment, Utils
+
+LOGGER = Utils.get_logger(__name__)
+
+
+def countMatchNumber(b):
+    myList = b
+    m = 0
+    for i in myList:
+        if i[0] == 0:
+            m += i[1]
+    return m
+
+
+def countInsertionNumber(b):
+    myList = b
+    m = 0
+    for i in myList:
+        if i[0] == 1:
+            m += i[1]
+    return m
+
+
+def countDeletionNumber(b):
+    myList = b
+    m = 0
+    for i in myList:
+        if i[0] == 2:
+            m += i[1]
+    return m
+
+
+def countMismatch(b):
+    myList = b
+    for i in myList:
+        if i[0] == "NM":
+            return i[1]
+    return 0
+
+
+def survey(entry):
+    # logging.debug(entry.cigar)
+    mismatchNumber = countMismatch(entry.tags)
+    insertionNumber = countInsertionNumber(entry.cigar)
+    deletionNumber = countDeletionNumber(entry.cigar)
+    substitutionNumber = max(0, mismatchNumber - deletionNumber)
+    # print insertionNumber,deletionNumber
+    return [insertionNumber, deletionNumber, substitutionNumber]
+
+
+def SBeforeFirstM(ci):
+    for index in range(len(ci)):
+        if ci[index][0] == 0:
+            if index == 0:
+                return 0
+            else:
+                s = 0
+                for i in range(0, index):
+                    if ci[i][0] == 4:
+                        s += ci[i][1]
+                return s
+
+
+def parseCIGAR(ci):  # calculate match segment length list
+    matchSeg = []
+    preIndex = 0
+    for index in range(len(ci)):
+        if ci[index][0] == 0:  # match tag
+            preIndex = index
+            matchSeg.append(ci[index][1])
+            if index > 0:  # M is not the first tag
+                s = 0
+                for i in range(
+                    preIndex - 1, index
+                ):  # check all the tags before M
+                    if ci[i][0] == 4:
+                        s += ci[i][1]
+                matchSeg[-1] += s
+            # seach for the insertion behind (before the next match)
+            for insertionIndex in range(index + 1, len(ci)):
+                inse = 0
+                if ci[insertionIndex][0] == 0:
+                    break
+                elif ci[insertionIndex][0] == 1:  # insertion exists
+                    inse += ci[insertionIndex][1]
+            matchSeg[-1] += inse
+    return matchSeg
+
+
+def parseMD(b):
+    buf = []
+    myList = b
+    for j in myList:
+        if j[0] == "MD":
+            md = copy.deepcopy(j[1])
+            num = (
+                md.replace("A", "^")
+                .replace("G", "^")
+                .replace("T", "^")
+                .replace("C", "^")
+                .replace("^^", "^")
+                .split("^")
+            )
+            for c in range(num.count("")):
+                num.remove("")
+            buf = [num[0]]
+
+            counter = 1
+            afterAlpha = 0
+            for i in j[
+                1
+            ]:  # walk thought MD string to record mutation and deletion
+                if i.isalpha() or i == "^":
+                    buf.append(i)
+                    afterAlpha = 1
+                else:
+                    if afterAlpha and counter <= len(num) - 1:
+                        buf.append(num[counter])
+                        afterAlpha = 0
+                        counter += 1
+            break
+    return buf
+
+
+def insertionLocation(entry, num):  # sam entry and insertion total number
+    insertionLoc = {}  # key:ref_offset; value:list of seq_offset
+    ref_offset = 0
+    seq_offset = [0]
+    preInsertion = 0  # used to check if it is the last insertion tag
+    for i in entry.cigar:
+        if i[0] == 0 or i[0] == 4:  # match or soft clip
+            ref_offset += i[1]
+            seq_offset[-1] += i[1]
+        elif i[0] == 2:  # deletion counts on ref but not on read seq
+            ref_offset += i[1]
+        elif (
+            i[0] == 1
+        ):  # record this insertion tag and prepare for the next one
+            preInsertion += i[1]
+            for ins in range(1, i[1]):
+                newloc = seq_offset[-1] + 1
+                seq_offset.append(newloc)
+            st = []
+            for item in seq_offset:
+                st.append(item)
+            insertionLoc[ref_offset] = st
+            if preInsertion == num:  # this is the last insertion tag
+                return insertionLoc
+            else:  # insertion only count on read seq
+                seq_index = seq_offset[0] + i[1]
+
+
+def countInsertionBefore(seqLoc, insertLocList):
+    if len(insertLocList) == 0:
+        return 0
+    else:
+        ins = 0
+        for i in insertLocList:
+            if i > seqLoc:
+                return ins
+            else:
+                ins += 1
+        return ins
+
+
+def mutationLocation(entry, insertLoc):  # return mutation location in
+    mutations = []
+    match = entry
+    myList = match.tags
+    S_count = SBeforeFirstM(
+        match.cigar
+    )  # hard clip will cut the seq, doesn't count
+    mis = countMismatch(myList)
+    mdlist = parseMD(myList)
+    mdtag = "".join(mdlist)
+    counter = 0
+    if not mdtag.isdigit():  # insertions may exist
+        st_seq = 0
+        st_genome = 0
+        offset = 0
+        pre = ":"
+        set_insertion = 0  # recored insertion numbers already counted
+        for ch in mdlist:  # i[1] is the MD tag
+            if ch.isdigit():
+                st_seq += int(ch)
+                st_genome += int(ch)
+                pre = ch
+            elif ch == "^":
+                st_genome += 1
+                pre = ch
+            elif ch.isalpha():
+                if (not pre == "^") and (pre.isdigit()):
+                    origin = ch
+                    index = st_seq + S_count + offset
+                    insertionBefore = countInsertionBefore(index, insertLoc)
+                    loc = (
+                        st_genome + match.pos + offset
+                    )  # -insertionBefore #0-based
+                    index += (
+                        insertionBefore - set_insertion
+                    )  # add on 9 Oct,modify 9,9 2014
+                    set_insertion = insertionBefore
+
+                    try:
+                        mu = match.seq[index]
+                    except IndexError:
+                        mu = "N"
+                    if mu.upper() != "N":
+                        offset = index - S_count + 1
+                        st_seq = 0
+                        st_genome = 0
+                        if match.is_reverse:
+                            chr = "-"
+                            t = RC([origin, mu])
+                            origin = t[0]
+                            mu = t[1]
+                        else:
+                            chr = "+"
+                        mutation = [
+                            str(loc),
+                            str(loc + 1),
+                            match.qname,
+                            1,
+                            chr,
+                            origin + "->" + mu,
+                        ]  # change offset into mutation count, which is 1
+                        yield mutation
+                else:  # this is a deletion
+                    if pre in ["A", "G", "T", "C"]:
+                        st_genome += 1
+                    loc = st_genome + match.pos + offset - 1  # 0-based
+                    if match.is_reverse:
+                        strand = "-"
+                        ch = RC([ch])[0]
+                    else:
+                        strand = "+"
+                    index1 = loc - match.pos
+                    insertionBefore = countInsertionBefore(index1, insertLoc)
+                    index1 += insertionBefore - set_insertion  # added 9 Oct
+                    set_insertion = insertionBefore
+                    mutation = [
+                        str(loc),
+                        str(loc + 1),
+                        match.qname,
+                        1,
+                        strand,
+                        "Deletion->" + ch,
+                    ]  # change str(index1) to 1
+                    yield mutation
+                    pre = ch
+
+    return
+
+
+def RC(strList):
+    rc = []
+    for item in strList:
+        st = ""
+        for chIndex in range(len(item)):
+            rcIndex = len(item) - 1
+            if item[rcIndex].upper() == "A":
+                st += "T"
+            elif item[rcIndex].upper() == "C":
+                st += "G"
+            elif item[rcIndex].upper() == "T":
+                st += "A"
+            elif item[rcIndex].upper() == "G":
+                st += "C"
+            else:
+                st += "N"
+        rc.append(st)
+    return rc
+
+
+def getMutations(infile, read):
+    # tmp = pysam.Samfile("demo.bam",'wb',template=infile)
+    # logging.debug("call getMutation function %s" % read)
+    mutationList = []
+    b = read.tags
+
+    sur = survey(read)
+    insertion = sur[0]
+    deletion = sur[1]
+    substi = sur[2]
+    # logging.debug("insertio:,%d, deletion:%d,sub:%d" % (insertion,deletion,substi))
+    insertionSeqLoc = []
+    if insertion > 0:
+        insertionDic = insertionLocation(read, insertion)
+        for k in list(insertionDic.keys()):
+            for loc_index in range(len(insertionDic[k])):
+                insertionSeqLoc.append(insertionDic[k][loc_index])
+                mu = read.seq[insertionDic[k][loc_index]]
+                loc = k + loc_index + read.pos
+                if read.tid >= 0:
+                    chr = infile.getrname(read.tid)
+                if read.is_reverse:
+                    strand = "-"
+                    mu = RC([mu])[0]
+                else:
+                    strand = "+"
+                mutationList.append(
+                    Alignment.MutationBed(
+                        chr,
+                        loc,
+                        loc + 1,
+                        read.qname,
+                        1,
+                        strand,
+                        "Insertion->" + mu,
+                    )
+                )  # change insertionDic[k][loc_index] to 1
+        insertionSeqLoc.sort()
+    if deletion + substi > 0:
+        for mu in mutationLocation(read, insertionSeqLoc):
+            if read.tid >= 0:
+                chr = infile.getrname(read.tid)
+                newMu = Alignment.MutationBed(
+                    chr, mu[0], mu[1], mu[2], mu[3], mu[4], mu[5]
+                )
+                mutationList.append(newMu)
+    # logging.debug(mutationList)
+    return mutationList
+
+
+def getTruncations(infile, read):
+    """One read only have one truncation, but in order to be consistent with
+    muations, return a list."""
+    if read.is_reverse:
+        strand = "-"
+        start = read.pos + read.alen
+    else:
+        strand = "+"
+        start = read.pos
+    stop = start + 1
+    if read.tid >= 0:
+        chr = infile.getrname(read.tid)
+    newTr = Alignment.MutationBed(
+        chr, start, stop, read.qname, 1, strand, "truncation"
+    )
+    return [newTr]
diff --git a/lib/runR1.sh b/pipeclip/runR1.sh
old mode 100755
new mode 100644
similarity index 60%
rename from lib/runR1.sh
rename to pipeclip/runR1.sh
index 7d5a115..be13df2
--- a/lib/runR1.sh
+++ b/pipeclip/runR1.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+
 filename=$1
 pvalue=$2
 #declare -a epsilon
@@ -35,21 +36,21 @@ steps=(0.1 0.08 0.05)
 #Call R function
 r_status="1"
 count=1
-for e in "${epsilon[@]}"
-do
-	for s in "${steps[@]}"
-	do
-		echo "$e,$s"
-    if [ -s "$filename.Converge.txt" ]
-		then
-			echo
-		else
-			#echo "$e,$s"
-			Rscript lib/ZTNB_tryCatch.R $filename $pvalue $e $s
-		fi
-		#echo "$filename.$count"
-		count=$((count+1))
-	done
-done
-
+SCRIPTPATH="$(
+  cd -- "$(dirname "$0")" >/dev/null 2>&1
+  pwd -P
+)"
 
+for e in "${epsilon[@]}"; do
+  for s in "${steps[@]}"; do
+    echo "$e,$s"
+    if [ -s "$filename.Converge.txt" ]; then
+      echo
+    else
+      #echo "$e,$s"
+      Rscript ${SCRIPTPATH}/ZTNB_tryCatch.R $filename $pvalue $e $s
+    fi
+    #echo "$filename.$count"
+    count=$((count + 1))
+  done
+done
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..de8f360
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,321 @@
+[[package]]
+name = "cffi"
+version = "1.15.0"
+description = "Foreign Function Interface for Python calling C code."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+pycparser = "*"
+
+[[package]]
+name = "click"
+version = "8.1.2"
+description = "Composable command line interface toolkit"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.4"
+description = "Cross-platform colored terminal text."
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "jinja2"
+version = "3.1.1"
+description = "A very fast and expressive template engine."
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
+[[package]]
+name = "markupsafe"
+version = "2.1.1"
+description = "Safely add untrusted strings to HTML/XML markup."
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "pybedtools"
+version = "0.9.0"
+description = "Wrapper around BEDTools for bioinformatics work"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+pysam = "*"
+six = "*"
+
+[[package]]
+name = "pycparser"
+version = "2.21"
+description = "C parser in Python"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "pysam"
+version = "0.19.0"
+description = "pysam"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "pytz"
+version = "2022.1"
+description = "World timezone definitions, modern and historical"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "pytz-deprecation-shim"
+version = "0.1.0.post0"
+description = "Shims to make deprecation of pytz easier"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+
+[package.dependencies]
+tzdata = {version = "*", markers = "python_version >= \"3.6\""}
+
+[[package]]
+name = "rpy2"
+version = "3.5.1"
+description = "Python interface to the R language (embedded R)"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+cffi = ">=1.10.0"
+jinja2 = "*"
+pytz = "*"
+tzlocal = "*"
+
+[package.extras]
+all = ["pytest", "setuptools", "numpy", "pandas"]
+numpy = ["pandas"]
+pandas = ["numpy", "pandas"]
+setup = ["setuptools"]
+test = ["pytest"]
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "tzdata"
+version = "2022.1"
+description = "Provider of IANA time zone data"
+category = "main"
+optional = false
+python-versions = ">=2"
+
+[[package]]
+name = "tzlocal"
+version = "4.2"
+description = "tzinfo object for the local timezone"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pytz-deprecation-shim = "*"
+tzdata = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+devenv = ["black", "pyroma", "pytest-cov", "zest.releaser"]
+test = ["pytest-mock (>=3.3)", "pytest (>=4.3)"]
+
+[metadata]
+lock-version = "1.1"
+python-versions = "^3.9"
+content-hash = "22b48ea4b7dda9219b9a96f39e66f62dcd11cff3132b9fa698d5c69445fa761e"
+
+[metadata.files]
+cffi = [
+    {file = "cffi-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962"},
+    {file = "cffi-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0"},
+    {file = "cffi-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14"},
+    {file = "cffi-1.15.0-cp27-cp27m-win32.whl", hash = "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474"},
+    {file = "cffi-1.15.0-cp27-cp27m-win_amd64.whl", hash = "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6"},
+    {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27"},
+    {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023"},
+    {file = "cffi-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2"},
+    {file = "cffi-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e"},
+    {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7"},
+    {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3"},
+    {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c"},
+    {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962"},
+    {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382"},
+    {file = "cffi-1.15.0-cp310-cp310-win32.whl", hash = "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55"},
+    {file = "cffi-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0"},
+    {file = "cffi-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e"},
+    {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39"},
+    {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc"},
+    {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032"},
+    {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8"},
+    {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605"},
+    {file = "cffi-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e"},
+    {file = "cffi-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc"},
+    {file = "cffi-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636"},
+    {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4"},
+    {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997"},
+    {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b"},
+    {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2"},
+    {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7"},
+    {file = "cffi-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66"},
+    {file = "cffi-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029"},
+    {file = "cffi-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880"},
+    {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20"},
+    {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024"},
+    {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e"},
+    {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728"},
+    {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6"},
+    {file = "cffi-1.15.0-cp38-cp38-win32.whl", hash = "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c"},
+    {file = "cffi-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443"},
+    {file = "cffi-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a"},
+    {file = "cffi-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37"},
+    {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a"},
+    {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e"},
+    {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"},
+    {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df"},
+    {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8"},
+    {file = "cffi-1.15.0-cp39-cp39-win32.whl", hash = "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a"},
+    {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
+    {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
+]
+click = [
+    {file = "click-8.1.2-py3-none-any.whl", hash = "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e"},
+    {file = "click-8.1.2.tar.gz", hash = "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72"},
+]
+colorama = [
+    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
+    {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+]
+jinja2 = [
+    {file = "Jinja2-3.1.1-py3-none-any.whl", hash = "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119"},
+    {file = "Jinja2-3.1.1.tar.gz", hash = "sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9"},
+]
+markupsafe = [
+    {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"},
+    {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"},
+    {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"},
+    {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"},
+    {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"},
+    {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
+]
+pybedtools = [
+    {file = "pybedtools-0.9.0.tar.gz", hash = "sha256:9267c92cd764173449d9c31baedac0659b4eccc3d7c05e22ec378f86c0fc30a3"},
+]
+pycparser = [
+    {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
+    {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
+]
+pysam = [
+    {file = "pysam-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:44de1a3af7c7eb5f404d6337f0c9c4ee88c34c2d2fee1a7896ccd8e7d2aa475a"},
+    {file = "pysam-0.19.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0ceb07c6253598ec70fef6ac0c0f7ab0d299562c1a91e737adb07239afba22d6"},
+    {file = "pysam-0.19.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2057f3b8cc20562fd010e7971e83ab78978f17975563a711c94bca583ce8a2d3"},
+    {file = "pysam-0.19.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a1a9ee6cd6dfa50973dcb51cd2245ea7d4d64d4e962d60e5e1a088f7b790e3e"},
+    {file = "pysam-0.19.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b831170ff810bfd1242dbce4ddf8e693e95e39a4332d5903d416233d3d1be50a"},
+    {file = "pysam-0.19.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fa4a5d334986d0696522246820f295cbf6c18dc1b78798f800a2d57d56207789"},
+    {file = "pysam-0.19.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ab58d7d9b140e2e8a4918fc00661aa901ba461d9bccd4b499102b0828f2d897e"},
+    {file = "pysam-0.19.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1d964c29fedf55d2a5d095227d19d915c2e0e5e42b1e0bdf7fab30cd1d2a850"},
+    {file = "pysam-0.19.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8c78f9b84f7fe69530eaccf5b7f08636b3419f0017b5050aa7013f1c59d3d654"},
+    {file = "pysam-0.19.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ada92b376717ac4c9c9924a096af9186035115d29a113eaa997d1c020ce421b9"},
+    {file = "pysam-0.19.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:80b8f5b929c7f660b6e1497790a13c61f386b310a31ca54ad6ba110674d11c55"},
+    {file = "pysam-0.19.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30017bed8d002d41f83fa7e10569525c811a8e3860d73a880ee653ef29fd4fbc"},
+    {file = "pysam-0.19.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5e2e465522ba2c34cb96c013a28f9c9db303f8ab1350b4c11cca73677f1e9594"},
+    {file = "pysam-0.19.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2103b4b6d9b0cc0b4aaccf64e87a92bfdabb7dc92810cf84be35ffe78fafa002"},
+    {file = "pysam-0.19.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaf342b9c71ed83a63237df2000e3bc1f0236165d48fd7240c4c78b66f28d63b"},
+    {file = "pysam-0.19.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc0021b154206edfbaa13515cb67523c76c576b7a670e72a793f2da4f03139f4"},
+    {file = "pysam-0.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3101e0fcd2f6accbfa72a554a71baf83f1c096bb0f9045059b3ead35901ce128"},
+    {file = "pysam-0.19.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5118dcabac574638df43739941f0ee20cc4e9197aee4a8f10f195542a13f18e3"},
+    {file = "pysam-0.19.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4fe1f1fae0e559d3412625dc7d4d08b477e80211b3fe5b267ba341a84f78b8be"},
+    {file = "pysam-0.19.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2911e3760dd2b709c686f075146787b8bda35629093352c28a7e52ac00ddaffc"},
+    {file = "pysam-0.19.0.tar.gz", hash = "sha256:dcc052566f9509fd93b2a2664f094a13f016fd60cdd189e05fb4eafa0c89505b"},
+]
+pytz = [
+    {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"},
+    {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"},
+]
+pytz-deprecation-shim = [
+    {file = "pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl", hash = "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6"},
+    {file = "pytz_deprecation_shim-0.1.0.post0.tar.gz", hash = "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d"},
+]
+rpy2 = [
+    {file = "rpy2-3.5.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:eac176e65610a475bb375292db7fb571066d758b52ad523fe7b8f120947cbcef"},
+    {file = "rpy2-3.5.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:d35e62a4948b5b620cf2b47f6d87dc84a1e9d18f163caa0b98824c228138e24f"},
+    {file = "rpy2-3.5.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:782d2d2a1a0ef2f4b58bd9e56adeab82594281eb23358d5f77a5af12d213375b"},
+    {file = "rpy2-3.5.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:6b79f1e772b9ecdd287d963656e9653f7aca4b7df434d425abcde2cbe2271d19"},
+    {file = "rpy2-3.5.1.tar.gz", hash = "sha256:d35717489bd0754b556202a6b990ebc6f3a1c18c9c23b3a862a46a91fc265805"},
+]
+six = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+tzdata = [
+    {file = "tzdata-2022.1-py2.py3-none-any.whl", hash = "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9"},
+    {file = "tzdata-2022.1.tar.gz", hash = "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"},
+]
+tzlocal = [
+    {file = "tzlocal-4.2-py3-none-any.whl", hash = "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745"},
+    {file = "tzlocal-4.2.tar.gz", hash = "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..35de657
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.poetry]
+name = "pipeclip"
+version = "2.0.10"
+description = "PIPELINE FOR CLIP SEQ DATA"
+readme = "README.md"
+keywords = ["bioinformatics", "CLIP", "PAR-CLIP", "iCLIP", "RNA-protein interaction", "mutation", "truncation"]
+authors = ["Zhiqun Xie<Zhiqun.Xie@UTSouthwestern.edu>"]
+repository = "https://github.com/y9c/PIPE-CLIP"
+
+[tool.poetry.dependencies]
+python = "^3.6"
+pysam = "^0.19.0"
+pybedtools = "^0.9.0"
+click = "^8.1.2"
+rpy2 = "^3.5.1"
+colorama = "^0.4.4"
+
+[tool.poetry.dev-dependencies]
+
+[tool.poetry.scripts]
+pipeclip = 'pipeclip.main:runPipeClip'
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/test/resources/foo.txt b/test/resources/foo.txt
deleted file mode 100644
index 0527e6b..0000000
--- a/test/resources/foo.txt
+++ /dev/null
@@ -1 +0,0 @@
-This is a test
diff --git a/test/src/foo.txt b/test/src/foo.txt
deleted file mode 100644
index 190a180..0000000
--- a/test/src/foo.txt
+++ /dev/null
@@ -1 +0,0 @@
-123