Skip to content

Commit

Permalink
Merge pull request #37 from MatthieuBeukers/master
Browse files Browse the repository at this point in the history
Updates to unittests.
  • Loading branch information
TDMedina authored Jul 5, 2019
2 parents 87f07e9 + de5a57e commit 408f5e6
Show file tree
Hide file tree
Showing 13 changed files with 396 additions and 273 deletions.
2 changes: 1 addition & 1 deletion OverlapContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_number_of_context_reads(self):
def get_context_bam_read_ids(self):
if self.context_bam_reads is None:
return [None]
return [x.get_bam_read_id() for x in self.context_bam_reads]
return list(set([x.get_bam_read_id() for x in self.context_bam_reads]))

# Returns a list of all left positions for all BAM reads.
def get_context_bam_read_starts(self):
Expand Down
74 changes: 24 additions & 50 deletions ParamChecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,18 @@ def __init__(self):
self.log_location = ""
self.variantlist_location = ""

# Check the logging parameter to determine where to write the
# logfile to.
# Check the logging parameter to determine where to write the logfile to.
def check_log(self, logparam):
logloc = "VaSeBuilder.log"

if logparam is not None:
# Check the location of the log file if the --log parameter
# has been set.
# Check the location of the log file if the --log parameter has been set.
if (not(os.path.isfile(logparam)) and (logparam.endswith(".log")
or logparam.endswith(".txt"))):
logloc = logparam

# Check to make sure the provided --log parameter value is
# not a directory. (Directories could be named "something.log").
# not a directory. (Directories could be named "something.log").
if os.path.isdir(logparam):
logloc = logparam + "/VaSeBuilder.log"
self.log_location = logloc
Expand All @@ -49,8 +47,7 @@ def check_folders_exist(self, paramvals, file_exts):
f"Using the folder {foldername} "
"as input folder")

# Check if the supplied value is a folder or not and
# contains any vcf/bam files.
# Check if the supplied value is a folder or not and contains any vcf/bam files.
if not (os.path.isdir(foldername)):
self.vaselogger.warning(f"Folder {foldername} was not found "
"and will therefore be skipped")
Expand All @@ -64,8 +61,7 @@ def check_folders_exist(self, paramvals, file_exts):
existing_folders.append(foldername)
return existing_folders

# Checks whether at least one file with a provided extension (.vcf
# or .bam) is present.
# Checks whether at least one file with a provided extension (.vcf or .bam) is present.
def check_folder_contents(self, folder_to_check, file_exts):
vb_count = 0
for vbfile in os.listdir(folder_to_check):
Expand All @@ -90,95 +86,76 @@ def check_file_exists(self, fileloc):
def is_valid_output_location(self, outfilename):
return os.path.isdir(os.path.dirname(outfilename))

# Checks whether the values of the parameters are correct (do
# files/folders exist for example).
# Checks whether the values of the parameters are correct (do files/folders exist for example).
# [Function should perhaps be split into smaller functions]
def check_parameters(self, vase_arg_vals):

# Loop over the provided parameters.
for param in vase_arg_vals:

# If the current parameter is vcfin, check whether there are
# any valid VCF folders to use.
# If the current parameter is donorvcf, check that the file containing the list of donor VCFs exists.
if param == "donorvcf":
if not os.path.isfile(vase_arg_vals["donorvcf"]):
self.vaselogger.critical("No VCF/BCF donor list file found")
return False
self.vcf_filelist = vase_arg_vals["donorvcf"]

# If the current parameter is bamin, check whether there are
# any valid BAM folders to use.
# If the current parameter is donorbam, check that the file containing the list of donor BAMs exists.
if param == "donorbam":
if not os.path.isfile(vase_arg_vals["donorbam"]):
self.vaselogger.critical("No BAM/CRAM donor list file found")
return False
self.bam_filelist = vase_arg_vals["donorbam"]

# If the current parameter is bam, check whether a valid
# BAM file is provided.
# If the current parameter is acceptorbam, check whether a valid BAM file is provided.
if param == "acceptorbam":
if not self.check_file_exists(vase_arg_vals[param]):
self.vaselogger.critical("No valid NIST BAM file supplied "
":(")
self.vaselogger.critical("No valid acceptor/template BAM file supplied :(")
return False
self.acceptorbam = vase_arg_vals[param]

# If the current parameter is valfastq1, check whether a
# valid R1 fastq file is provided.
# If the current parameter is valfastq1, check whether one or more valid R1 fastq files are provided.
if param == "templatefq1":
if not self.check_file_exists(vase_arg_vals[param]):
self.vaselogger.critical("Provided R1 FastQ input file "
"does not exist")
self.vaselogger.critical("No valid R1 FastQ file(s) provided")
return False
self.fastq_in1 = vase_arg_vals[param]

# If the current parameter is valfastq2, check whether a
# valid R2 fastq file is provided.
# If the current parameter is valfastq2, check whether one or more valid R2 fastq files are provided.
if param == "templatefq2":
if not self.check_file_exists(vase_arg_vals[param]):
self.vaselogger.critical("Provided R2 FastQ input file "
"does not exist")
self.vaselogger.critical("No valid R2 FastQ file(s) provided")
return False
self.fastq_in2 = vase_arg_vals[param]

# If the current parameter is out, check whether it is a
# valid output location.
# If the current parameter is out, check whether it is a valid output location.
if param == "out":
if not self.is_valid_output_location(vase_arg_vals[param]):
return False
self.outdir = vase_arg_vals[param]

# If the current parameters is fastqout, check if a name has
# been provided.
# If the current parameters is fastqout, check if a name has been provided.
if param == "fastqout":
self.fastq_out_location = self.get_output_name(vase_arg_vals[param],
"VaSe")
self.fastq_out_location = self.get_output_name(vase_arg_vals[param], "VaSe")

# If the current parameter is varcon, check whether a valid
# output location is provided.
# If the current parameter is varcon, check whether a valid output location is provided.
if param == "varcon":
self.varcon_out_location = self.get_output_name(vase_arg_vals[param],
"varcon.txt")
self.varcon_out_location = self.get_output_name(vase_arg_vals[param], "varcon.txt")

# Checks if the provided variant list file exists
if param == "variantlist":
if vase_arg_vals[param] is not None:
if self.check_file_exists(vase_arg_vals[param]):
self.variantlist_location = vase_arg_vals[param]

# Return the lists of valid VCF and BAM folders that can be used
# by the program.
return True

# Returns thename of the folder name of a parameter value (if the
# parameter value is ).
# Returns the name of the folder name of a parameter value (if the parameter value is ).
def get_folder_name(self, foldername):
if os.path.isfile(foldername) or (not os.path.isdir(foldername)):
return os.path.dirname(foldername)
return foldername

# Returns the name of an output file (is used for parameters
# fastqout, varcon, donorbread and acceptorbread).
# Returns the name of an output file (is used for parameters fastqout, varcon, donorbread and acceptorbread).
def get_output_name(self, outfilename, defaultoutname):
if outfilename is not None:
if "/" in outfilename:
Expand Down Expand Up @@ -208,22 +185,19 @@ def get_first_fastq_in_location(self):
def get_second_fastq_in_location(self):
return self.fastq_in2

# Returns the location(s) and names of the two (R1 and R2) fastq
# input files.
# Returns the location(s) and names of the two (R1 and R2) fastq input files.
def get_fastq_in_locations(self):
return [self.fastq_in1, self.fastq_in2]

# Returns the location to write the output to.
def get_out_dir_location(self):
return self.outdir

# Returns the location of the FastQ file that will be produced by
# VaSeBuilder.
# Returns the location of the FastQ file that will be produced by VaSeBuilder.
def get_fastq_out_location(self):
return self.outdir + "/" + self.fastq_out_location

# Returns the location of file that will contain the variants and
# their context start and stops.
# Returns the location of file that will contain the variants and their context start and stops.
def get_variant_context_out_location(self):
return self.outdir + "/" + self.varcon_out_location

Expand Down
5 changes: 3 additions & 2 deletions VaSeUtils/UtilParamCheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ def required_params_set(self, utiltorun, paramlist):
if utiltorun in self.required_util_params:
for reqparam in self.required_util_params[utiltorun]:
if paramlist[reqparam] is not None:
if not os.path.isfile(paramlist[reqparam]):
return False
if reqparam != "outfile":
if not os.path.isfile(paramlist[reqparam]):
return False
else:
return False
return True
Expand Down
4 changes: 2 additions & 2 deletions VariantContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_number_of_acceptor_reads(self):
def get_acceptor_read_ids(self):
if self.variant_context_areads is None:
return [None]
return [x.get_bam_read_id() for x in self.variant_context_areads]
return list(set([x.get_bam_read_id() for x in self.variant_context_areads]))

# Returns the list of left most acceptor read positions,
def get_acceptor_read_starts(self):
Expand Down Expand Up @@ -151,7 +151,7 @@ def get_number_of_donor_reads(self):
# Returns the identifiers of donor reads overlapping with the
# variant context.
def get_donor_read_ids(self):
return [x.get_bam_read_id() for x in self.variant_context_dreads]
return list(set([x.get_bam_read_id() for x in self.variant_context_dreads]))

# Returns the list of variant context donor read starting positions.
def get_donor_read_starts(self):
Expand Down
5 changes: 4 additions & 1 deletion VariantContextFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import statistics
from OverlapContext import OverlapContext
from VariantContext import VariantContext
from ReadIdObject import ReadIdObject


class VariantContextFile:
Expand Down Expand Up @@ -148,9 +149,11 @@ def read_variant_context_file(self, fileloc, samplefilter=None,
chrompass = self.passes_filter(filelinedata[2], chromfilter)

if samplepass and varconpass and chrompass:
acceptor_reads = [ReadIdObject(readid) for readid in filelinedata[11].split(";")]
donor_reads = [ReadIdObject(readid) for readid in filelinedata[12].split(";")]
varcon_obj = VariantContext(filelinedata[0], filelinedata[1], filelinedata[2],
int(filelinedata[3]), int(filelinedata[4]), int(filelinedata[5]),
filelinedata[11].split(";"), filelinedata[12].split(";"))
acceptor_reads, donor_reads)
if filelinedata[0] not in self.variant_contexts:
self.variant_contexts[filelinedata[0]] = varcon_obj
except IOError as ioe:
Expand Down
2 changes: 1 addition & 1 deletion VcfVariant.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ def get_variant_id(self):
# ToString method
def to_string(self):
return f"{self.vcf_variant_chrom}\t{self.vcf_variant_start}\t{self.vcf_variant_type}\t{self.vcf_variant_ref}" \
f"\t{self.vcf_variant_alts}"
f"\t{self.vcf_variant_alts}\t{self.vcf_variant_filter}"
3 changes: 1 addition & 2 deletions tests/TestOverlapContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ def test_get_number_of_context_reads(self):
f"of context reads should have been {num_of_context_reads_answer}")

def test_get_context_bam_read_ids(self):
context_read_ids_answer = ["HHKY2CCXX160108:1:2122:24160:2522", "HHKY2CCXX160108:1:2122:24160:2522",
"HHKY2CCXX160108:1:2122:24160:2522"]
context_read_ids_answer = ["HHKY2CCXX160108:1:2122:24160:2522"]
self.assertEqual(self.overlap_context.get_context_bam_read_ids(), context_read_ids_answer,
f"The list of context read ids should have been {context_read_ids_answer}")

Expand Down
Loading

0 comments on commit 408f5e6

Please sign in to comment.