From 6908161e046ba794c943c7a1df63cb3b50aa4a5f Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 25 Mar 2022 23:51:39 -0700 Subject: [PATCH] add lots o tests --- .../fulcrumgenomics/bam/api/SamRecord.scala | 116 ++++++----- .../bam/api/SamRecordTest.scala | 180 ++++++++++++++---- 2 files changed, 214 insertions(+), 82 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala index 7a07d2150..9fb0a3ea4 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala @@ -237,66 +237,46 @@ trait SamRecord { @inline final val transientAttrs: TransientAttrs = new TransientAttrs(this) /** Returns the 1-based reference position for the 1-based position in the read, or [[None]] if the reference does - * not map at the read position (e.g. insertion). + * not map at the read position (i.e. insertion or soft-clipping). + * + * If the requested read position is less than or equal to zero, or greater than the read length [[length()]], then + * an exception will be thrown. + * + * Hard-clipping (`H`) is ignored and not counted as part of the read. * * @param pos the 1-based read position to query * @param returnLastBaseIfInserted if the reference is an insertion, true to returned the previous reference base, * false to return None * */ - @inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = if (pos <= 0) None else { + @inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = { + require(this.mapped, s"read was not mapped: ${this}") + require(1 <= pos, s"position given '$pos' was less than one: ${this}") + require(pos <= cigar.lengthOnQuery , s"position given '$pos' was longer than the # of read bases '${cigar.lengthOnQuery}': ${this}") + var readPos = 1 var refPos = this.start var elementIndex = 0 val elems = this.cigar.elems - def continue(): Boolean = if (elementIndex >= elems.length) false else { + // skip leading clipping, ignoring hard-clipping + while (elems(elementIndex).operator.isClipping && readPos <= pos) { val elem = elems(elementIndex) - if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position - else { - if (pos < readPos) readPos = 0 // past the point in the read - else if (elem.operator == CigarOperator.INSERTION) { - if (!returnLastBaseIfInserted) refPos = 0 - else refPos -= 1 - } // in an insertion, no reference position - else refPos += pos - readPos // get the offset - false - } - } - - while (continue()) { - val elem = elems(elementIndex) - refPos += elem.lengthOnTarget - readPos += elem.lengthOnQuery + if (elem.operator == CigarOperator.SOFT_CLIP) readPos += elem.lengthOnQuery elementIndex += 1 } - if (this.start <= refPos && readPos <= this.end) Some(refPos) else None - } - - - /** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, - * [[None]] if the read does not map the position. - * - * @param pos the 1-based reference position to query - * @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to - * return None - * */ - @inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean = false): Option[Int] = { - if (this.unmapped || pos < this.start || this.end < pos) None - else { // overlaps - var readPos = 0 - var refPos = this.start - var elementIndex = 0 - val elems = this.cigar.elems - + // return None if the read was in a leading soft-clip + if (pos < readPos) None else { def continue(): Boolean = if (elementIndex >= elems.length) false else { val elem = elems(elementIndex) - if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position - else { // overlaps! - if (elem.operator == CigarOperator.DELETION) { // deletion - if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position - } else { - readPos += pos - refPos + 1 // get the offset + if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position + else { + if (elem.operator == CigarOperator.INSERTION) { + if (!returnLastBaseIfInserted) refPos = 0 // this cause us to return None later for the position + else refPos -= 1 // in an insertion, no reference position, so use the previous reference position + } + else { + refPos += pos - readPos // get the offset, for soft-clipping this will move refPos past this.end, and so return None } false } @@ -309,8 +289,54 @@ trait SamRecord { elementIndex += 1 } - if (0 < readPos && readPos <= this.length) Some(readPos) else None + if (this.start <= refPos && refPos <= this.end) Some(refPos) else None + } + } + + /** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, + * [[None]] if the read does not map the position. + * + * @param pos the 1-based reference position to query + * @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to + * return None + * @param returnLastBaseIfSkipped if the reference is a skip, true to returned the previous read base, false to + * return None + * */ + @inline final def readPosAtRefPos(pos: Int, + returnLastBaseIfDeleted: Boolean = false, + returnLastBaseIfSkipped: Boolean = false): Option[Int] = { + require(this.mapped, s"read was not mapped: ${this}") + require(this.start <= pos, s"position given '$pos' was before the start of the alignment '${this.start}': ${this}") + require(pos <= this.end , s"position given '$pos' was past the end of the alignment '${this.end}': ${this}") + + var readPos = 0 + var refPos = this.start + var elementIndex = 0 + val elems = this.cigar.elems + + def continue(): Boolean = if (elementIndex >= elems.length) false else { + val elem = elems(elementIndex) + if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position + else { // overlaps! + if (elem.operator == CigarOperator.DELETION) { // deletion + if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position + } else if (elem.operator == CigarOperator.SKIPPED_REGION) { // skip region + if (!returnLastBaseIfSkipped) readPos = 0 // don't return a read position, so zero out the current read position + } else { + readPos += pos - refPos + 1 // get the offset + } + false + } + } + + while (continue()) { + val elem = elems(elementIndex) + refPos += elem.lengthOnTarget + readPos += elem.lengthOnQuery + elementIndex += 1 } + + if (0 < readPos && readPos <= this.length) Some(readPos) else None } diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala index bfc833137..ac46802d7 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala @@ -290,47 +290,153 @@ class SamRecordTest extends UnitSpec with OptionValues { rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is } - "SamRecord.readPosAtRefPos" should "return the correct value" in { - case class ReadPosAtRefPos(cigar: String, refPos: Int, readPos: Option[Int], returnLastBaseIfDeleted: Boolean) - val testCases = Seq( - ReadPosAtRefPos("3S9M", 7, Some(10), false), - ReadPosAtRefPos("3S9M", 0, None, false), - ReadPosAtRefPos("3S9M", -1, None, false), - ReadPosAtRefPos("3S9M", 13, None, false), - ReadPosAtRefPos("4M1D6M", 4, Some(4), false), - ReadPosAtRefPos("4M1D6M", 4, Some(4), true), - ReadPosAtRefPos("4M1D6M", 5, None, false), - ReadPosAtRefPos("4M1D6M", 5, Some(4), true), - ReadPosAtRefPos("4M1I6M", 5, Some(6), false), - ReadPosAtRefPos("4M1I6M", 11, None, false) - ) - testCases.foreach { testCase => - val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value - val readPos = frag.readPosAtRefPos(pos=testCase.refPos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted) - readPos shouldBe testCase.readPos - readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(testCase.refPos, testCase.returnLastBaseIfDeleted) + /** A single test case for [[SamRecord.readPosAtRefPos()]] and/or [[SamRecord.refPosAtReadPos()]]. + * + * If `returnLastBaseIfInserted` is `true`, then [[SamRecord.refPosAtReadPos()]] should not be tested. Similarly, + * if either `returnLastBaseIfDeleted` or `returnLastBaseIfSkipped` is `true`, then [[SamRecord.readPosAtRefPos()]] + * should not be tested. + * + * @param cigar the cigar + * @param readPos the read position; empty if the reference position does not map to a read base + * @param refPos the reference position; epty if the read position doesnot map to a reference base + * @param returnLastBaseIfInserted passed to [[SamRecord.readPosAtRefPos()]] + * @param returnLastBaseIfDeleted passed to [[SamRecord.refPosAtReadPos()]] + * @param returnLastBaseIfSkipped passed to [[SamRecord.refPosAtReadPos()]] + */ + case class TestCase(cigar: String, readPos: Option[Int], refPos: Option[Int], + returnLastBaseIfInserted: Boolean = false, + returnLastBaseIfDeleted: Boolean = false, + returnLastBaseIfSkipped: Boolean = false) { + require(readPos.isDefined || refPos.isDefined) + val doRefPosAtReadPos: Boolean = !returnLastBaseIfDeleted && !returnLastBaseIfSkipped + val doReadPosAtRefPos: Boolean = !returnLastBaseIfInserted + require(doRefPosAtReadPos || doReadPosAtRefPos) + } + object TestCase { + def apply(cigar: String, readPos: Int, refPos: Int): TestCase = { + TestCase(cigar=cigar, readPos=Some(readPos), refPos=Some(refPos)) } } - "SamRecord.refPosAtReadPos" should "return the correct value" in { - case class RefPosAtReadPos(cigar: String, refPos: Option[Int], readPos: Int, returnLastBaseIfInserted: Boolean) - val testCases = Seq( - RefPosAtReadPos("3S9M", Some(7), 10, false), - RefPosAtReadPos("3S9M", None, 0, false), - RefPosAtReadPos("3S9M", None, 13, false), - RefPosAtReadPos("4M1D6M", Some(4), 4, false), - RefPosAtReadPos("4M1D6M", Some(6), 5, false), - RefPosAtReadPos("4M1I6M", None, 5, false), - RefPosAtReadPos("4M1I6M", Some(4), 5, true), - RefPosAtReadPos("4M1I6M", Some(5), 6, false), - ) - testCases.foreach { testCase => - val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value - val readPos = frag.refPosAtReadPos(pos=testCase.readPos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) - readPos shouldBe testCase.refPos - if (!testCase.returnLastBaseIfInserted) { - readPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(testCase.readPos) + /** Test cases for [[SamRecord.readPosAtRefPos()]] and [[SamRecord.refPosAtReadPos()]]. */ + private val testCases = Seq( + // all mapped bases + TestCase("10M", 10, 10), + TestCase("10M", 1, 1), + // leading soft-clipping + TestCase("3S9M", Some(1), None), // no reference base, start of soft-clipping + TestCase("3S9M", Some(3), None), // no reference base, end of soft-clipping + TestCase("3S9M", 4, 1), // start of aligned bases + TestCase("3S9M", 10, 7), // end of aligned bases + // leading hard-clipping + TestCase("3H9M", 1, 1), // start of aligned bases + TestCase("3H3S9M", 4, 1), // start of aligned bases + TestCase("3H3S9M", Some(3), None), // no reference base, end of soft-clipping + // leading padding + TestCase("3P9M", 1, 1), // start of aligned bases + TestCase("3P9M", 9, 9), // end of aligned bases + // leading skip + TestCase("3N9M", None, Some(1)), // no reference base, start of padding + TestCase("3N9M", None, Some(3)), // no reference base, end of padding + TestCase("3N9M", 1, 4), // start of aligned bases + TestCase("3N9M", 9, 12), // end of aligned bases + // deletions + TestCase("4M1D6M", 4, 4), // before the deletion + TestCase("4M1D6M", None, Some(5)), // in a deletion + TestCase("4M1D6M", 5, 6), // after the deletion + TestCase("4M1D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", 4, 4), // before the deletion + TestCase("4M2D6M", None, Some(5)), // in a deletion + TestCase("4M2D6M", None, Some(6)), // in a deletion + TestCase("4M2D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", Some(4), Some(6), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", 5, 7), // after the deletion + TestCase("20M10D20M", 21, 31), // first base of the deletion + // insertions + TestCase("4M1I6M", 4, 4), // before the insertion + TestCase("4M1I6M", Some(5), None), // in an insertion + TestCase("4M1I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M1I6M", 6, 5), // after the insertion + TestCase("4M2I6M", 4, 4), // before the insertion + TestCase("4M2I6M", Some(5), None), // in an insertion + TestCase("4M2I6M", Some(6), None), // in an insertion + TestCase("4M2I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M2I6M", Some(6), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M2I6M", 7, 5), // after the insertion + TestCase("20M10I20M", 31, 21), // first base of the insertion + // trailing soft-clipping + TestCase("9M3S", 1, 1), // first aligned base + TestCase("9M3S", 9, 9), // last aligned base + TestCase("9M3S", Some(10), None), // no reference base, first base of soft-clipping + TestCase("9M3S", Some(12), None), // no reference base, last base of soft-clipping + // trailing hard-clipping + TestCase("9M3H", 1, 1), // start of aligned bases + TestCase("9M3S3H", 9, 9), // end of aligned bases + TestCase("9M3S3H", Some(10), None), /// no reference base, first base of soft-clipping + TestCase("9M3S3H", Some(12), None), // no reference base, last base of soft-clipping + // trailing padding + TestCase("9M3P", 1, 1), // start of aligned bases + TestCase("9M3P", 9, 9), // end of aligned bases + // trailing skip + TestCase("9M3N", None, Some(10)), // no reference base, start of padding + TestCase("9M3N", None, Some(12)), // no reference base, end of padding + TestCase("9M3N", 9, 9), // end of aligned bases + ) + + "SamRecord.readPosAtRefPos" should "throw an exception when the read position is OOB" in { + val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value + an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=0) + an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=11) + + // request a base in the trailing hard-clipping should throw an exception + val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value + an[Exception] should be thrownBy frag2.readPosAtRefPos(pos=11) + + // request a base in the trailing soft-clipping should throw an exception + val frag3 = new SamBuilder(readLength=20).addFrag(start=1, cigar="10M10S").value + an[Exception] should be thrownBy frag3.readPosAtRefPos(pos=11) + } + + // now run the test cases + testCases.filter(_.doReadPosAtRefPos).foreach { testCase => + testCase.refPos.foreach { pos => + it should s"return read position ${testCase.readPos} for reference position ${testCase.refPos} with cigar ${testCase.cigar}" in { + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val readPos = frag.readPosAtRefPos(pos=pos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted, testCase.returnLastBaseIfSkipped) + withClue(testCase) { + readPos shouldBe testCase.readPos + if (!testCase.returnLastBaseIfSkipped) { + readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(pos, testCase.returnLastBaseIfDeleted) + } + } } } } + + "SamRecord.refPosAtReadPos" should "throw an exception when the read position is OOB" in { + val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value + an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=0) + an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=11) + + // request a base in the trailing hard-clipping should throw an exception + val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value + an[Exception] should be thrownBy frag2.refPosAtReadPos(pos=11) + } + + // now run the test cases + testCases.filter(_.doRefPosAtReadPos).foreach { testCase => + testCase.readPos.foreach { pos: Int => + it should s"return ref position ${testCase.refPos} for read position ${testCase.readPos} with cigar ${testCase.cigar}" in { + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val refPos = frag.refPosAtReadPos(pos=pos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) + withClue(testCase) { + refPos shouldBe testCase.refPos + if (!testCase.returnLastBaseIfInserted) { + refPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(pos) + } + } + } + } + } + }