-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
310 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
context("Test APA_ID generation") | ||
|
||
x <- c("ENSG00000001", "ENSG00000001", "ENSG00000999", "ENSG00000001") | ||
|
||
test_that("A number is added to Ensembl ID", { | ||
expect_equal(apa_id(x), paste(x, c(1,2,1,1), sep="_")) | ||
expect_equal(apa_id(sort(x)), paste(sort(x), c(1:3,1), sep="_")) | ||
expect_equal(apa_id(x[3]), paste(x[3], "1", sep="_")) | ||
}) | ||
|
||
context("Test APA_ID suffix update") | ||
|
||
test_that("A suffix is added for single UTR", { | ||
expect_equal(update_apa_id(x[3], 5, 10), paste(x[3], "S", sep="_")) | ||
expect_equal(update_apa_id(x[1:2], c(5, 5), c(10, 9)), paste(x[1:2], c("D", "P"), sep="_")) | ||
expect_equal(update_apa_id(x[1:2], c(5, 5), c(4, 3)), paste(x[1:2], c("P", "D"), sep="_")) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
library(stringr) | ||
context("Test formatting of single-Ensembl IDs") | ||
|
||
id <- "ENSMUST0000011043_ENSMUSG00000111044_mm9_chr1" | ||
|
||
test_that("A single Ensembl ID remains unchanged", { | ||
expect_equal(format_multi_ensembl_ids(id), id) | ||
}) | ||
|
||
test_that("A vector of single Ensembl IDs remains unchanged", { | ||
expect_equal(format_multi_ensembl_ids(c(id, id)), c(id, id)) | ||
}) | ||
|
||
context("Test formatting of multi-Ensembl IDs") | ||
|
||
id <- "ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1" | ||
expected <- "ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1" | ||
|
||
test_that("A suffix is added for single UTR", { | ||
expect_equal(format_multi_ensembl_ids(id), expected) | ||
}) | ||
|
||
test_that("A vector of multi-Ensembl IDs is re-formatted", { | ||
expect_equal(format_multi_ensembl_ids(c(id, id)), c(expected, expected)) | ||
}) | ||
|
||
context("Test vector of mixed (single and multi) Ensembl IDs") | ||
|
||
id <- c("ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1", | ||
"ENSMUST00000100011_ENSMUSG00000048481_hg19_chr2", | ||
"ENSMUST00000111043_ENSMUSG00000048480,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1") | ||
|
||
test_that("A vector of mixed Ensembl IDs", { | ||
expect_equal(format_multi_ensembl_ids(id), | ||
c("ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1", | ||
id[2], | ||
"ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048480,ENSMUSG00000048482_hg19_chr1") | ||
) | ||
}) | ||
|
||
|
||
context("Test non-Ensembl IDs") | ||
|
||
test_that("Non-Ensembl transcript ID is accepted", { | ||
id <- c("XY.00000027036_000..2_mm10_chr1", | ||
"FF.22_1.z,0101_1.z_mm10_chr1", | ||
"1_.,2_.,3_._mm10_chr1") | ||
expect_equal(format_multi_ensembl_ids(id), | ||
c("XY.00000027036_000..2_mm10_chr1", | ||
"FF.22,0101_1.z_mm10_chr1", | ||
"1,2,3_._mm10_chr1")) | ||
}) | ||
|
||
test_that("Underscore at beginning of ID will fail", { | ||
expect_error(format_multi_ensembl_ids("_ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_chr1")) | ||
}) | ||
|
||
context("Test chromosomes without chr prefix") | ||
test_that("Chromosome without chr prefix is accepted", { | ||
id <- c("ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_hg19_1", | ||
"ENSMUST00000100011_ENSMUSG00000048481_hg19_2", | ||
"ENSMUST00000111043_ENSMUSG00000048480,ENSMUST00000111044_ENSMUSG00000048482_hg19_z") | ||
expect_equal(format_multi_ensembl_ids(id), | ||
c("ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_hg19_1", | ||
id[2], | ||
"ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048480,ENSMUSG00000048482_hg19_z") | ||
) | ||
}) | ||
|
||
context("Test unk species") | ||
test_that("Non-hg19 and non-mm10 species are allowed as unk", { | ||
expect_equal(format_multi_ensembl_ids("1_0,2_0_unk_chr1"), | ||
"1,2_0_unk_chr1") | ||
}) | ||
|
||
test_that("Complex unknown species and non-standard chr is accepted", { | ||
expect_equal(format_multi_ensembl_ids("1_0,2_0_ut_z"), | ||
"1,2_0_ut_z") | ||
}) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
context("Test getting the index of the first sample in merged data") | ||
|
||
set.seed(123) | ||
nr <- 10 | ||
N <- 3 | ||
df <- data.frame(APA_ID = letters[1:nr], | ||
Ensembl_Gene = sample(letters, nr), | ||
Gene_Name = rainbow(nr), | ||
Chr = "chr1", | ||
Start = round(runif(nr, 1000, 2000)), | ||
End = round(runif(nr, 3000, 5000)), | ||
Strand = "+", | ||
Length = round(runif(nr, 100, 500)), | ||
SampleA = rnorm(nr), | ||
SampleB = rnorm(nr), | ||
SampleC = rnorm(nr) | ||
) | ||
|
||
test_that("Gets index of first sample column in data frame", { | ||
expected <- 9 | ||
expect_equal(get_first_sample_ix(df), expected) | ||
}) | ||
|
||
|
||
context("Test getting the number of samples from merged data") | ||
|
||
test_that("Can calculate number of samples in data frame", { | ||
expected <- N | ||
expect_equal(get_num_samples(df), expected) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
context("Test splitting of underscore-delimited input ensembl field") | ||
|
||
set.seed(123) | ||
df <- data.table(Transcript= | ||
c("ENSMUST00000027036_Lypla1_mm10_chr1_4844962_4846739_+_utr_4845016_4846739::chr1:4844962-4846739(+)", | ||
"ENSMUST00000081551_Tcea1,ENSMUST00000165720_Tcea1_mm10_chr1_4896355_4897910_+_utr_4896364_4897910::chr1:4896355-4897910(+)"), | ||
Length=c(100, 1000), | ||
SampleA=runif(2), | ||
SampleB=runif(2), | ||
SampleC=rnorm(2), | ||
stringsAsFactors=FALSE | ||
) | ||
|
||
test_that("'Transcript' field is split into components for 1-row data frame", { | ||
exp.df <- df[1,] | ||
separate_ensembl_field(exp.df) | ||
# expect_equal(ncol(exp.df), 12) | ||
# expect_match(exp.df$Transcript[1], "^ENS.*\\d$") | ||
expected_cols <- union(c("Transcript", "Gene", "Chr", "LastExon.Start", | ||
"LastExon.End", "Strand", "UTR3.Start", "UTR3.End"), | ||
colnames(df)) | ||
expect_true(all(expected_cols %in% colnames(exp.df))) | ||
}) | ||
|
||
test_that("'Transcript' field is split into components for multi-line data frame", { | ||
exp.df <- copy(df) | ||
separate_ensembl_field(exp.df) | ||
# expect_equal(ncol(exp.df), 10) | ||
expect_is(exp.df$Transcript, "character") | ||
expected_cols <- union(c("Transcript", "Gene", "Chr", "LastExon.Start", | ||
"LastExon.End", "Strand", "UTR3.Start", "UTR3.End"), | ||
colnames(df)) | ||
expect_true(all(expected_cols %in% colnames(exp.df))) | ||
}) | ||
|
||
context("Test non-Ensembl IDs") | ||
set.seed(123) | ||
df <- data.table(Transcript= | ||
c("XY.00000027036_000..2_mm10_chr1_4844962_4846739_+_utr_4845016_4846739::chr1:4844962-4846739(+)", | ||
"FF.22_1.z,0101_1.z_mm10_chr1_4896355_4897910_+_utr_4896364_4897910::chr1:4896355-4897910(+)"), | ||
Length=c(100, 1000), | ||
SampleA=runif(2), | ||
SampleB=runif(2), | ||
SampleC=rnorm(2), | ||
stringsAsFactors=FALSE | ||
) | ||
|
||
test_that("Non-Ensembl transcript ID is accepted", { | ||
exp.df <- copy(df) | ||
separate_ensembl_field(exp.df) | ||
expect_equal(exp.df$Gene, c("000..2", "1.z"), "Genes do not match") | ||
expect_equal(exp.df$Transcript, c("XY.00000027036", "FF.22,0101"), | ||
"Transcripts do not match") | ||
}) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import unittest | ||
import sys | ||
from qapa import extract as ex | ||
|
||
class RowTestCase(unittest.TestCase): | ||
|
||
def setUp(self): | ||
example_row = '143 ENSMUST00000100750.9 chrX_random - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,' | ||
self.row = ex.Row(example_row) | ||
|
||
def test_3utr_length(self): | ||
target = self.row.get_3utr_length() | ||
expected = self.row.cdsStart - self.row.txStart | ||
self.assertEqual(target, expected) | ||
|
||
|
||
def test_random_chromosome(self): | ||
target = self.row.is_on_random_chromosome() | ||
self.assertTrue(target) | ||
|
||
def test_random_chromosome_no_chr(self): | ||
my_row = '143 ENSMUST00000100750.9 X - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,' | ||
my_row = ex.Row(my_row) | ||
target = my_row.is_on_random_chromosome() | ||
self.assertFalse(target) | ||
|
||
def test_chrY(self): | ||
my_row = '143 ENSMUST00000100750.9 chrY - 74026591 74085669 74035416 74079934 4 74026591,74036981,74079908,74085509, 74036494,74037332,74080032,74085669, 0 Mecp2 cmpl cmpl 2,2,0,-1,' | ||
my_row = ex.Row(my_row) | ||
target = my_row.is_on_random_chromosome() | ||
self.assertFalse(target) | ||
|
||
|
||
def test_extract_last_exon(self): | ||
target = self.row.extract_last_exon(n=1, min_utr_length=0) | ||
self.assertEqual(target[1], 74026591, "Start coord not equal") | ||
self.assertEqual(target[2], 74036494, "End coord not equal") | ||
|
||
|
||
if __name__ == '__main__': | ||
#print sys.argv[0] | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import unittest | ||
import sys | ||
import pybedtools | ||
from qapa import annotate as anno | ||
|
||
class AnnotateTestCase(unittest.TestCase): | ||
|
||
def setUp(self): | ||
example = "chrX 74026591 74036494 ENSMUST00000100750_Mecp2 8825 - 74035416 74036494 Mecp2 74026591,74036981,74079908,74085509 74036494,74037332,74080032,74085669" | ||
self.bed = pybedtools.BedTool(example, from_string=True) | ||
|
||
|
||
def test_extend_feature(self): | ||
l = 1 | ||
feature = self.bed[0] | ||
target = anno.extend_feature(feature, length=l) | ||
self.assertEqual(target.start, 74026591 - l) | ||
self.assertEqual(target.end, 74036494) | ||
|
||
|
||
def test_gene_at_beginning_of_chr_1(self): | ||
example = "chr17_KI270861v1_alt 0 5793 ENST00000634102_SLC43A2 5631 - 5631 5793 SLC43A2 0,6624,8277,13231,15940,20829,21296,21593,23214,43222,45006,46589,57742,58821 5793,6748,8351,13364,16079,20976,21499,21727,23307,43299,45062,46797,57948,58864" | ||
bed = pybedtools.BedTool(example, from_string=True) | ||
l = 24 | ||
feature = bed[0] | ||
target = anno.extend_feature(feature, length=l) | ||
self.assertEqual(target.start, 0) | ||
self.assertEqual(target.end, 5793) | ||
|
||
target = anno.restore_feature(target, length=l) | ||
self.assertEqual(target.start, 0) | ||
|
||
def test_gene_at_beginning_of_chr_2(self): | ||
example = "chr17_KI270861v1_alt 24 5793 ENST00000634102_SLC43A2 5631 - 5631 5793 SLC43A2 24,6624,8277,13231,15940,20829,21296,21593,23214,43222,45006,46589,57742,58821 5793,6748,8351,13364,16079,20976,21499,21727,23307,43299,45062,46797,57948,58864" | ||
bed = pybedtools.BedTool(example, from_string=True) | ||
l = 24 | ||
feature = bed[0] | ||
target = anno.extend_feature(feature, length=l) | ||
self.assertEqual(target.start, 0) | ||
self.assertEqual(target.end, 5793) | ||
|
||
target = anno.restore_feature(target, length=l) | ||
self.assertEqual(target.start, 24) | ||
|
||
|
||
|
||
|
||
if __name__ == '__main__': | ||
#print sys.argv[0] | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# https://stackoverflow.com/a/29132294 | ||
source_funcs <- function(x) { | ||
cmds <- parse(x) | ||
assign.funs <- sapply(cmds, function(x) { | ||
if(x[[1]]=="<-") { | ||
if(x[[3]][[1]]=="function") { | ||
return(TRUE) | ||
} | ||
} | ||
return(FALSE) | ||
}) | ||
return(cmds[assign.funs]) | ||
} | ||
|
||
files <- c("../scripts/create_merged_data.R", | ||
"../scripts/compute_pau.R") | ||
for (f in files) { | ||
cmds <- source_funcs(f) | ||
eval(cmds) | ||
} | ||
|
||
suppressPackageStartupMessages(library(stringr)) | ||
suppressPackageStartupMessages(library(data.table)) | ||
suppressPackageStartupMessages(library(dplyr)) | ||
library(testthat) | ||
test_dir("R/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
for i in python/*_test.py | ||
do | ||
python $i | ||
echo -e "\n" | ||
done | ||
|