Skip to content

Commit

Permalink
Merge remote-tracking branch 'ecogenomics/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Lamberton committed Aug 3, 2015
2 parents 70dc49e + 8e2fd0a commit 4b9e98d
Show file tree
Hide file tree
Showing 27 changed files with 265 additions and 34,166 deletions.
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ c/libcfu-0.03/libtool
c/libcfu-0.03/src/Makefile
c/libcfu-0.03/src/*.o
c/libcfu-0.03/src/libcfu.a
c/libcfu-0.03/aclocal.m4
c/libcfu-0.03/config.guess
c/libcfu-0.03/config.sub
c/libcfu-0.03/configure
c/libcfu-0.03/ltmain.sh
c/libcfu-0.03/src/.deps
c/libcfu-0.03/autom4te.cache/*
c/autom4te.cache/
c/htslib-1.2.1/test/*.o
Expand All @@ -45,3 +51,15 @@ c/htslib-1.2.1/test/test-vcf-api
c/htslib-1.2.1/test/test-vcf-sweep
c/htslib-1.2.1/test/test_view
c/htslib-1.2.1/version.h
bamm/libBamM.a
bamm/tests/modelling/contigs.fa
bamm/tests/modelling/contigs.mp.bam
bamm/tests/modelling/contigs.mp.bam.bai
bamm/tests/modelling/contigs.pe.1.bam
bamm/tests/modelling/contigs.pe.1.bam.bai
bamm/tests/modelling/contigs.up.bam
bamm/tests/modelling/contigs.up.bam.bai
bamm/tests/modelling/mp.fa
bamm/tests/modelling/pe.1.fa
bamm/tests/modelling/pe.2.fa
bamm/tests/modelling/up.fa
198 changes: 77 additions & 121 deletions bamm/bamMaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import os
import sys
import tempfile
import shutil

# local imports
from bammExceptions import InvalidParameterSetException
Expand Down Expand Up @@ -86,30 +87,15 @@ def __init__(self,
extraArguments='',
showCommands=False,
quiet = False,
silent=False
silent=False,
tmpdir=tempfile.gettempdir()
):
'''Default constructor.
Initializes a BamScheduler instance with the provided set of properties.
Inputs:
database - full path to fasta file of contigs (may be gzipped),
alignmentAlgorithm - one of BWA's alignment algorithms,
indexAlgorithm - one of BWA's index algorithms,
paired - [fileName pairs], always even numbered in length, in order
[R_A_1, R_A_2, R_B_1, R_B_2, ...]
interleaved - [fileNames], containing interleaved paired reads
singleEnded - [fileNames], containing single ended reads
keptFiles - == True -> indexes for the db already exist,
keepFiles - == True -> don't delete indexes at the end,
outputTam - == True -> you love text files to bits,
numThreads - int, the maximum number of threads to use
maxMemory - string, maximum memory program will use (samtools style)
forceOverwriting - == True -> force overwriting index files,
extraArguments - string, extra args to pass to BWA
showCommands - == True -> show all commands being run
quiet - == True -> suppress output from the mapper
silent - == True -> suppress all output
As per BamMaker
Outputs:
None
Expand Down Expand Up @@ -148,9 +134,9 @@ def __init__(self,
self.extraArguments = extraArguments
self.quiet = quiet
self.silent = silent
self.tmpdir = tmpdir
self.showCommands = showCommands


if self.maxMemory is None:
# default to 2GBs per number of threads
self.maxMemory = str(self.numThreads*2)+'G'
Expand Down Expand Up @@ -217,7 +203,8 @@ def __init__(self,
extraArguments=self.extraArguments,
quiet=self.quiet,
silent=self.silent,
showCommands=self.showCommands
showCommands=self.showCommands,
tmpdir=self.tmpdir
)
self.BMs.append(BM)

Expand Down Expand Up @@ -250,7 +237,8 @@ def __init__(self,
extraArguments=self.extraArguments,
quiet=self.quiet,
silent=self.silent,
showCommands=self.showCommands
showCommands=self.showCommands,
tmpdir=self.tmpdir
)
self.BMs.append(BM)

Expand Down Expand Up @@ -282,7 +270,8 @@ def __init__(self,
extraArguments=self.extraArguments,
quiet=self.quiet,
silent=self.silent,
showCommands=self.showCommands
showCommands=self.showCommands,
tmpdir=self.tmpdir
)
self.BMs.append(BM)

Expand Down Expand Up @@ -380,7 +369,8 @@ def __init__(self,
extraArguments='',
showCommands=False,
quiet=False,
silent=False
silent=False,
tmpdir=tempfile.gettempdir()
):
'''Default constructor.
Expand All @@ -405,6 +395,8 @@ def __init__(self,
showCommands - == True -> show all commands being run
quiet - == True -> suppress output from the mapper
silent - == True -> suppress all output
tmpdir - == tempfile.gettempdir() -> temporary directory for
intermediate files
Outputs:
None
Expand All @@ -418,6 +410,7 @@ def __init__(self,
self.quiet = quiet
self.showCommands = showCommands
self.silent = silent
self.tmpdir = tmpdir

self.errorOutput = ''
if self.quiet or self.silent:
Expand Down Expand Up @@ -584,7 +577,7 @@ def removeDatabase(self):
# main wrapper

def makeBam(self):
'''Use BWA ans samtools to make a BAM/TAM file
'''Use BWA and samtools to make a BAM/TAM file
Inputs:
None
Expand Down Expand Up @@ -628,6 +621,56 @@ def makeBam(self):
self.mem_single_to_sorted_indexed_bam()
else:
self.mem_to_sorted_indexed_bam()

def _sam_to_sorted_and_run(self, cmdline):
'''Given a cmdline that generates a SAM file on stdout, run that through
samtools view |samtools sort
Parameters
----------
cmdline: str
Command that outputs on stdout a SAM file
Returns
-------
Nothing'''

# On some systems where the sorted BAM file goes eventually is on a
# slower disk, and it would be faster to write to tmp and then
# move the sorted file to the correct location upon completion.
# It is maybe faster to do that, but people may run out of disk space
# so don't use temporary directory by default.
cmdline += ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
'| samtools sort -m',
self.maxMemory,
'-@',
str(self.numThreads),
'-'])
if self.tmpdir:
with tempfile.NamedTemporaryFile(prefix="bamm_make",
dir=self.tmpdir) as f:
cmdline += ' '+' '.join([f.name,
self.errorOutput])
self._run_cmd(cmdline)

# It would be preferable to use samtools sort -f, but that seems
# broken (at least in 0.1.19) for bam files that get split up.
shutil.move("%s.bam" % f.name,
"%s.bam" % self.outFileName)
else:
# no temporary directory specified, use regular output
cmdline += ' '+' '.join([self.outFileName,
self.errorOutput])
self._run_cmd(cmdline)


def _run_cmd(self, cmd):
if self.showCommands and not self.silent:
print "BamM: Running command: '%s'" % cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)


#---------------------------------------------------------------
# aln algorithm
Expand All @@ -654,11 +697,7 @@ def aln(self, readFile, saiFile):
'>',
saiFile,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._run_cmd(cmd)

def sampe(self):
'''call bwa sampe
Expand All @@ -683,11 +722,7 @@ def sampe(self):
'>',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._run_cmd(cmd)

def samse(self):
'''call bwa samse
Expand All @@ -710,11 +745,7 @@ def samse(self):
'>',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._run_cmd(cmd)

def sampe_to_sorted_indexed_bam(self):
'''call bwa sampe and sort + index the result
Expand All @@ -736,19 +767,7 @@ def sampe_to_sorted_indexed_bam(self):
self.sai2,
self.readFile1,
self.readFile2])

cmd += ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
'| samtools sort -m',
self.maxMemory,
'-',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._sam_to_sorted_and_run(cmd)
self.samtoolsIndex(self.outFileName)

def samse_to_sorted_indexed_bam(self):
Expand All @@ -769,20 +788,7 @@ def samse_to_sorted_indexed_bam(self):
self.database,
self.sai1,
self.readFile1])

cmd += ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
'| samtools sort -m',
self.maxMemory,
'-',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)

self._sam_to_sorted_and_run(cmd)
self.samtoolsIndex(self.outFileName)

#---------------------------------------------------------------
Expand All @@ -807,20 +813,7 @@ def mem_single_to_sorted_indexed_bam(self):
self.database,
self.readFile1,
self.errorOutput])

cmd += ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
' | samtools sort -m',
self.maxMemory,
'-',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)

self._sam_to_sorted_and_run(cmd)
self.samtoolsIndex(self.outFileName)

def mem_to_sorted_indexed_bam(self):
Expand Down Expand Up @@ -848,19 +841,7 @@ def mem_to_sorted_indexed_bam(self):
bwa_cmd += ' '.join(['-p',self.readFile1])
else:
bwa_cmd += ' '.join([self.readFile1,self.readFile2])

cmd = bwa_cmd + ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
' | samtools sort -m',
self.maxMemory,
'-',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._sam_to_sorted_and_run(bwa_cmd)
self.samtoolsIndex(self.outFileName)

#---------------------------------------------------------------
Expand Down Expand Up @@ -898,11 +879,7 @@ def bwasw(self):
'>',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._run_cmd(cmd)

def bwasw_to_sorted_indexed_bam(self):
'''call bwasw and sort and index the result
Expand All @@ -925,20 +902,7 @@ def bwasw_to_sorted_indexed_bam(self):

if not self.isSingleEnded:
cmd += ' ' + self.readFile2

cmd += ' '.join([' | samtools view -SubhF 4 -',
self.errorOutput,
'| samtools sort -m',
self.maxMemory,
'-',
self.outFileName,
self.errorOutput])

if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)

self._sam_to_sorted_and_run(cmd)
self.samtoolsIndex(self.outFileName)

#---------------------------------------------------------------
Expand All @@ -957,10 +921,7 @@ def samtoolsIndex(self, sortedBamFile):
cmd = ' '.join(['samtools index',
sortedBamFile+'.bam',
self.errorOutput])
if self.showCommands and not self.silent:
print cmd
sys.stdout.flush()
subprocess.check_call(cmd, shell=True)
self._run_cmd(cmd)

#---------------------------------------------------------------
# utilities
Expand Down Expand Up @@ -1011,8 +972,3 @@ def __str__(self):
(self.database, self.outFileName, suffix, self.numThreads)
str += " Alignment algorithm: %s" % self.alignmentAlgorithm
return str

###############################################################################
###############################################################################
###############################################################################
###############################################################################
1 change: 1 addition & 0 deletions bamm/bamParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from bamFile import BM_bamFile, BM_bamType, BM_fileInfo, BM_coverageType
from bammExceptions import (InvalidNumberOfTypesException,
BAMFileNotFoundException,
BAMIndexNotFoundException,
NoBAMSFoundException)

###############################################################################
Expand Down
Loading

0 comments on commit 4b9e98d

Please sign in to comment.