Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kcha committed Apr 18, 2019
1 parent 6190e0d commit 655e17d
Show file tree
Hide file tree
Showing 8 changed files with 310 additions and 0 deletions.
17 changes: 17 additions & 0 deletions tests/R/test_apa_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
context("Test APA_ID generation")

x <- c("ENSG00000001", "ENSG00000001", "ENSG00000999", "ENSG00000001")

test_that("A number is added to Ensembl ID", {
expect_equal(apa_id(x), paste(x, c(1,2,1,1), sep="_"))
expect_equal(apa_id(sort(x)), paste(sort(x), c(1:3,1), sep="_"))
expect_equal(apa_id(x[3]), paste(x[3], "1", sep="_"))
})

context("Test APA_ID suffix update")

test_that("A suffix is added for single UTR", {
expect_equal(update_apa_id(x[3], 5, 10), paste(x[3], "S", sep="_"))
expect_equal(update_apa_id(x[1:2], c(5, 5), c(10, 9)), paste(x[1:2], c("D", "P"), sep="_"))
expect_equal(update_apa_id(x[1:2], c(5, 5), c(4, 3)), paste(x[1:2], c("P", "D"), sep="_"))
})
80 changes: 80 additions & 0 deletions tests/R/test_format_multi_ensembl_ids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
library(stringr)
context("Test formatting of single-Ensembl IDs")

id <- "ENSMUST0000011043_ENSMUSG00000111044_mm9_chr1"

test_that("A single Ensembl ID remains unchanged", {
expect_equal(format_multi_ensembl_ids(id), id)
})

test_that("A vector of single Ensembl IDs remains unchanged", {
expect_equal(format_multi_ensembl_ids(c(id, id)), c(id, id))
})

context("Test formatting of multi-Ensembl IDs")

id <- "ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1"
expected <- "ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1"

test_that("A suffix is added for single UTR", {
expect_equal(format_multi_ensembl_ids(id), expected)
})

test_that("A vector of multi-Ensembl IDs is re-formatted", {
expect_equal(format_multi_ensembl_ids(c(id, id)), c(expected, expected))
})

context("Test vector of mixed (single and multi) Ensembl IDs")

id <- c("ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1",
"ENSMUST00000100011_ENSMUSG00000048481_hg19_chr2",
"ENSMUST00000111043_ENSMUSG00000048480,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1")

test_that("A vector of mixed Ensembl IDs", {
expect_equal(format_multi_ensembl_ids(id),
c("ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1",
id[2],
"ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048480,ENSMUSG00000048482_hg19_chr1")
)
})


context("Test non-Ensembl IDs")

test_that("Non-Ensembl transcript ID is accepted", {
id <- c("XY.00000027036_000..2_mm10_chr1",
"FF.22_1.z,0101_1.z_mm10_chr1",
"1_.,2_.,3_._mm10_chr1")
expect_equal(format_multi_ensembl_ids(id),
c("XY.00000027036_000..2_mm10_chr1",
"FF.22,0101_1.z_mm10_chr1",
"1,2,3_._mm10_chr1"))
})

test_that("Underscore at beginning of ID will fail", {
expect_error(format_multi_ensembl_ids("_ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1"))
})

context("Test chromosomes without chr prefix")
test_that("Chromosome without chr prefix is accepted", {
id <- c("ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_1",
"ENSMUST00000100011_ENSMUSG00000048481_hg19_2",
"ENSMUST00000111043_ENSMUSG00000048480,ENSMUST00000111044_ENSMUSG00000048482_hg19_z")
expect_equal(format_multi_ensembl_ids(id),
c("ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_hg19_1",
id[2],
"ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048480,ENSMUSG00000048482_hg19_z")
)
})

context("Test unk species")
test_that("Non-hg19 and non-mm10 species are allowed as unk", {
expect_equal(format_multi_ensembl_ids("1_0,2_0_unk_chr1"),
"1,2_0_unk_chr1")
})

test_that("Complex unknown species and non-standard chr is accepted", {
expect_equal(format_multi_ensembl_ids("1_0,2_0_ut_z"),
"1,2_0_ut_z")
})

30 changes: 30 additions & 0 deletions tests/R/test_get_first_sample_ix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
context("Test getting the index of the first sample in merged data")

set.seed(123)
nr <- 10
N <- 3
df <- data.frame(APA_ID = letters[1:nr],
Ensembl_Gene = sample(letters, nr),
Gene_Name = rainbow(nr),
Chr = "chr1",
Start = round(runif(nr, 1000, 2000)),
End = round(runif(nr, 3000, 5000)),
Strand = "+",
Length = round(runif(nr, 100, 500)),
SampleA = rnorm(nr),
SampleB = rnorm(nr),
SampleC = rnorm(nr)
)

test_that("Gets index of first sample column in data frame", {
expected <- 9
expect_equal(get_first_sample_ix(df), expected)
})


context("Test getting the number of samples from merged data")

test_that("Can calculate number of samples in data frame", {
expected <- N
expect_equal(get_num_samples(df), expected)
})
55 changes: 55 additions & 0 deletions tests/R/test_separate_ensembl_field.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
context("Test splitting of underscore-delimited input ensembl field")

set.seed(123)
df <- data.table(Transcript=
c("ENSMUST00000027036_Lypla1_mm10_chr1_4844962_4846739_+_utr_4845016_4846739::chr1:4844962-4846739(+)",
"ENSMUST00000081551_Tcea1,ENSMUST00000165720_Tcea1_mm10_chr1_4896355_4897910_+_utr_4896364_4897910::chr1:4896355-4897910(+)"),
Length=c(100, 1000),
SampleA=runif(2),
SampleB=runif(2),
SampleC=rnorm(2),
stringsAsFactors=FALSE
)

test_that("'Transcript' field is split into components for 1-row data frame", {
exp.df <- df[1,]
separate_ensembl_field(exp.df)
# expect_equal(ncol(exp.df), 12)
# expect_match(exp.df$Transcript[1], "^ENS.*\\d$")
expected_cols <- union(c("Transcript", "Gene", "Chr", "LastExon.Start",
"LastExon.End", "Strand", "UTR3.Start", "UTR3.End"),
colnames(df))
expect_true(all(expected_cols %in% colnames(exp.df)))
})

test_that("'Transcript' field is split into components for multi-line data frame", {
exp.df <- copy(df)
separate_ensembl_field(exp.df)
# expect_equal(ncol(exp.df), 10)
expect_is(exp.df$Transcript, "character")
expected_cols <- union(c("Transcript", "Gene", "Chr", "LastExon.Start",
"LastExon.End", "Strand", "UTR3.Start", "UTR3.End"),
colnames(df))
expect_true(all(expected_cols %in% colnames(exp.df)))
})

context("Test non-Ensembl IDs")
set.seed(123)
df <- data.table(Transcript=
c("XY.00000027036_000..2_mm10_chr1_4844962_4846739_+_utr_4845016_4846739::chr1:4844962-4846739(+)",
"FF.22_1.z,0101_1.z_mm10_chr1_4896355_4897910_+_utr_4896364_4897910::chr1:4896355-4897910(+)"),
Length=c(100, 1000),
SampleA=runif(2),
SampleB=runif(2),
SampleC=rnorm(2),
stringsAsFactors=FALSE
)

test_that("Non-Ensembl transcript ID is accepted", {
exp.df <- copy(df)
separate_ensembl_field(exp.df)
expect_equal(exp.df$Gene, c("000..2", "1.z"), "Genes do not match")
expect_equal(exp.df$Transcript, c("XY.00000027036", "FF.22,0101"),
"Transcripts do not match")
})

42 changes: 42 additions & 0 deletions tests/python/Row_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest
import sys
from qapa import extract as ex

class RowTestCase(unittest.TestCase):

def setUp(self):
example_row = '143 ENSMUST00000100750.9 chrX_random - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,'
self.row = ex.Row(example_row)

def test_3utr_length(self):
target = self.row.get_3utr_length()
expected = self.row.cdsStart - self.row.txStart
self.assertEqual(target, expected)


def test_random_chromosome(self):
target = self.row.is_on_random_chromosome()
self.assertTrue(target)

def test_random_chromosome_no_chr(self):
my_row = '143 ENSMUST00000100750.9 X - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,'
my_row = ex.Row(my_row)
target = my_row.is_on_random_chromosome()
self.assertFalse(target)

def test_chrY(self):
my_row = '143 ENSMUST00000100750.9 chrY - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,'
my_row = ex.Row(my_row)
target = my_row.is_on_random_chromosome()
self.assertFalse(target)


def test_extract_last_exon(self):
target = self.row.extract_last_exon(n=1, min_utr_length=0)
self.assertEqual(target[1], 74026591, "Start coord not equal")
self.assertEqual(target[2], 74036494, "End coord not equal")


if __name__ == '__main__':
#print sys.argv[0]
unittest.main()
50 changes: 50 additions & 0 deletions tests/python/annotate_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest
import sys
import pybedtools
from qapa import annotate as anno

class AnnotateTestCase(unittest.TestCase):

def setUp(self):
example = "chrX 74026591 74036494 ENSMUST00000100750_Mecp2 8825 - 74035416 74036494 Mecp2 74026591,74036981,74079908,74085509 74036494,74037332,74080032,74085669"
self.bed = pybedtools.BedTool(example, from_string=True)


def test_extend_feature(self):
l = 1
feature = self.bed[0]
target = anno.extend_feature(feature, length=l)
self.assertEqual(target.start, 74026591 - l)
self.assertEqual(target.end, 74036494)


def test_gene_at_beginning_of_chr_1(self):
example = "chr17_KI270861v1_alt 0 5793 ENST00000634102_SLC43A2 5631 - 5631 5793 SLC43A2 0,6624,8277,13231,15940,20829,21296,21593,23214,43222,45006,46589,57742,58821 5793,6748,8351,13364,16079,20976,21499,21727,23307,43299,45062,46797,57948,58864"
bed = pybedtools.BedTool(example, from_string=True)
l = 24
feature = bed[0]
target = anno.extend_feature(feature, length=l)
self.assertEqual(target.start, 0)
self.assertEqual(target.end, 5793)

target = anno.restore_feature(target, length=l)
self.assertEqual(target.start, 0)

def test_gene_at_beginning_of_chr_2(self):
example = "chr17_KI270861v1_alt 24 5793 ENST00000634102_SLC43A2 5631 - 5631 5793 SLC43A2 24,6624,8277,13231,15940,20829,21296,21593,23214,43222,45006,46589,57742,58821 5793,6748,8351,13364,16079,20976,21499,21727,23307,43299,45062,46797,57948,58864"
bed = pybedtools.BedTool(example, from_string=True)
l = 24
feature = bed[0]
target = anno.extend_feature(feature, length=l)
self.assertEqual(target.start, 0)
self.assertEqual(target.end, 5793)

target = anno.restore_feature(target, length=l)
self.assertEqual(target.start, 24)




if __name__ == '__main__':
#print sys.argv[0]
unittest.main()
28 changes: 28 additions & 0 deletions tests/run_R_tests.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env Rscript

# https://stackoverflow.com/a/29132294
source_funcs <- function(x) {
cmds <- parse(x)
assign.funs <- sapply(cmds, function(x) {
if(x[[1]]=="<-") {
if(x[[3]][[1]]=="function") {
return(TRUE)
}
}
return(FALSE)
})
return(cmds[assign.funs])
}

files <- c("../scripts/create_merged_data.R",
"../scripts/compute_pau.R")
for (f in files) {
cmds <- source_funcs(f)
eval(cmds)
}

suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(dplyr))
library(testthat)
test_dir("R/")
8 changes: 8 additions & 0 deletions tests/run_py_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

for i in python/*_test.py
do
python $i
echo -e "\n"
done

0 comments on commit 655e17d

Please sign in to comment.