-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsnpInfo.R
86 lines (74 loc) · 4.49 KB
/
snpInfo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
snpInfo <- function(chr="chr7", start=29616705, end=29629223, accession=NULL, mutType=NULL) {
if ( exists("fetchSnp") ){
}else{
source("fetchSnp.R")
}
if (exists("snp.lst")){
}else{
snp.lst <- read.table("./data/snp.RData.lst", head=T, as.is=T, sep="\t")
}
snp.info <- fetchSnp(chr=chr, start=start, end=end,
accession = accession, mutType = mutType)
chr.size <- chrInfo$size[chrInfo$chr == chr]
start <- max(0, start)
end <- min(end, chr.size)
start <- as.numeric(start)
end <- as.numeric(end)
reg.gr <- IRanges::IRanges(start, end)
snp.lst.chr <- snp.lst[snp.lst$chr==chr, ]
snp.lst.gr <- IRanges::IRanges(start=snp.lst.chr$start, end=snp.lst.chr$end)
snp.fls <- snp.lst.chr$file[unique(S4Vectors::queryHits(IRanges::findOverlaps(snp.lst.gr, reg.gr)))]
snpeff.fls <- gsub("snp", "snpeff", snp.fls)
snpeff.fls.lst <- lapply(snpeff.fls, function(x){
load(x)
return(snpeff)
})
snpeff <- do.call(rbind, snpeff.fls.lst)
snpeff <- snpeff[order(as.numeric(snpeff[, 1])), ]
snpeff.info <- snpeff[snpeff[,1] %in% rownames(snp.info[[1]]), , drop=FALSE]
snpeff.info[,"eff"] <- gsub("IT", "Intergenic", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("IR", "Intron", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("IG", "Start_gained", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("IL", "Start_lost", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SG", "Stop_gained", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SL", "Stop_lost", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("Up", "Upstream", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("Dn", "Downstream", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("U3", "three_prime_UTR", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("U5", "five_prime_UTR", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SSA", "Splice_site_acceptor", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SSD", "Splice_site_donor", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("NSC", "Non_synonymous_coding", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("NSS", "Non_synonymous_start", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SC", "Synonymous_coding", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("SS", "Synonymous_stop", snpeff.info[,"eff"])
snpeff.info[,"eff"] <- gsub("IA", "Intergenic", snpeff.info[,"eff"])
colnames(snpeff.info) <- c("snpID", "reference", "alternative", "effect")
if (!is.null(mutType) && length(mutType)>=1 && length(mutType)!=16) {
snpeff.info <- cbind(snpeff.info, eff="")
snpeff.info[,"eff"][grepl("Intergenic", snpeff.info[,"effect"])] <- "Intergenic"
snpeff.info[,"eff"][grepl("Intron", snpeff.info[,"effect"])] <- "Intron"
snpeff.info[,"eff"][grepl("Start_gained", snpeff.info[,"effect"])] <- "Start_gained"
snpeff.info[,"eff"][grepl("Start_lost", snpeff.info[,"effect"])] <- "Start_lost"
snpeff.info[,"eff"][grepl("Stop_gained", snpeff.info[,"effect"])] <- "Stop_gained"
snpeff.info[,"eff"][grepl("Stop_lost", snpeff.info[,"effect"])] <- "Stop_lost"
snpeff.info[,"eff"][grepl("Upstream", snpeff.info[,"effect"])] <- "Upstream"
snpeff.info[,"eff"][grepl("Downstream", snpeff.info[,"effect"])] <- "Downstream"
snpeff.info[,"eff"][grepl("three_prime_UTR", snpeff.info[,"effect"])] <- "three_prime_UTR"
snpeff.info[,"eff"][grepl("five_prime_UTR", snpeff.info[,"effect"])] <- "five_prime_UTR"
snpeff.info[,"eff"][grepl("Splice_site_acceptor", snpeff.info[,"effect"])] <- "Splice_site_acceptor"
snpeff.info[,"eff"][grepl("Splice_site_donor", snpeff.info[,"effect"])] <- "Splice_site_donor"
snpeff.info[,"eff"][grepl("Non_synonymous_coding", snpeff.info[,"effect"])] <- "Non_synonymous_coding"
snpeff.info[,"eff"][grepl("Non_synonymous_start", snpeff.info[,"effect"])] <- "Non_synonymous_start"
snpeff.info[,"eff"][grepl("Synonymous_coding", snpeff.info[,"effect"])] <- "Synonymous_coding"
snpeff.info[,"eff"][grepl("Synonymous_stop", snpeff.info[,"effect"])] <- "Synonymous_stop"
snpeff.info[,"eff"][grepl("Intergenic", snpeff.info[,"effect"])] <- "Intergenic"
snpeff.info <- snpeff.info[snpeff.info[, "eff"] %in% mutType, , drop=FALSE]
snpeff.info <- snpeff.info[, c("snpID", "reference", "alternative", "effect") , drop=FALSE]
}
snp.allele <- as.data.frame(snp.info[[2]], stringsAsFactors=FALSE)
snp.allele$snpID <- rownames(snp.allele)
rownames(snp.allele) <- NULL
dat.res <- merge(snp.allele, snpeff.info, by="snpID")
return(list(snp.info, dat.res))
}