From a3d1d49e602265ba80b27894ed38a88c64a4788d Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Wed, 25 Jan 2017 00:53:36 -0800 Subject: [PATCH] Clean up some INDEL bits. --- .../genotyping/BiallelicGenotyper.scala | 18 +++++++++++++++++- .../avocado/genotyping/DiscoverVariants.scala | 6 +++++- .../avocado/genotyping/Observer.scala | 10 +++++++--- .../avocado/models/Observation.scala | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala index 3011b521..125c05fa 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala @@ -259,10 +259,26 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging { } else if (isInsertion(variant)) { val insAllele = variant.getAlternateAllele.tail val insObserved = observed.filter(_._2 == insAllele) + println("for %s observed %s in %s".format(insAllele, observed.mkString(","), read)) if (observed.size == 2 && insObserved.size == 1) { Some((variant, insObserved.head._3.duplicate(Some(false)))) - } else if (observed.forall(_._3.isRef)) { + } else if (!observed.forall(_._3.isRef)) { + val nonRef = observed.filter(!_._3.isRef) + if (nonRef.size == 1) { + val (_, allele, nonRefObs) = nonRef.head + if (allele.length == insAllele.length) { + val matchingBases = allele.zip(insAllele).count(p => p._1 == p._2) + Some((variant, nonRefObs.scale(matchingBases, + insAllele.length, + Some(false)))) + } else { + Some((variant, observed.head._3.nullOut)) + } + } else { + Some((variant, observed.head._3.nullOut)) + } + } else if (read.getEnd != variant.getEnd) { Some((variant, observed.head._3.invert)) } else { Some((variant, observed.head._3.nullOut)) diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala index 11a46041..9b3acc4a 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala @@ -201,7 +201,11 @@ object DiscoverVariants extends Serializable with Logging { kv } case Insertion(length) => { - val insQuals = qual.substring(idx - 1, idx + length).map(_.toInt - 33).sum / length + val insQuals = if (length > 0) { + qual.substring(idx - 1, idx + length).map(_.toInt - 33).sum / length + } else { + 0 + } val newVar = if (insQuals >= phredThreshold) { Variant.newBuilder .setContigName(contigName) diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala index 144e7839..72e2f92d 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala @@ -131,9 +131,13 @@ private[genotyping] object Observer extends Serializable { // get bases and quals val bases = readSequence.substring(oldReadIdx, readIdx) - val qual = readQualities.substring(oldReadIdx, readIdx) - .map(_.toInt - 33) - .sum / length + val qual = if (length > 0) { + readQualities.substring(oldReadIdx, readIdx) + .map(_.toInt - 33) + .sum / length + } else { + 0 + } // the key is the (site, allele, sampleId) // insertions associate to the site to their left, hence the -1 diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/Observation.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/Observation.scala index 3cfcd653..eb1be137 100644 --- a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/Observation.scala +++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/Observation.scala @@ -91,6 +91,24 @@ case class Observation(alleleForwardStrand: Int, isRef = setRef.getOrElse(isRef)) } + /** + * @return Makes a copy where underlying arrays are not shared. + */ + def scale(num: Int, + denum: Int, + setRef: Option[Boolean] = None): Observation = { + val scaleBy = num.toDouble / denum.toDouble + Observation(alleleForwardStrand, + otherForwardStrand, + squareMapQ, + alleleLogLikelihoods.map(v => v * scaleBy), + otherLogLikelihoods.map(v => v * scaleBy), + alleleCoverage, + otherCoverage, + totalCoverage = totalCoverage, + isRef = setRef.getOrElse(isRef)) + } + /** * @return Returns this observation, but with allele/other swapped. *