-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Misordered UTR Lengths #3
Comments
Hmm. This looks like it has to do with the underlying GENCODE transcripts, which has alternative splicing and lead to longer UTRs. However, because the Spc24.1 and Spc24.2 transcripts ends were less than 200 nts apart, they get merged in the final quantification, so we lose the information on which one we are really talking about. May need to flag such genes. |
Here is more specific code to pull them out: read_tsv("../atlas/data/utrs/20201111-txs_utr_metadata_lengths.tsv",
col_types='ccc____l_l__d___d____d_c_il',
col_names=c("transcript_id", "gene_id", "gene_symbol", "is_expressed", "is_ipa", "utr_usage_no_ipa",
"ncelltypes", "n_utrs", "utr_type", "utr_length", "improper"), skip=1) %>%
filter(utr_type == 'multi', is_expressed, (utr_usage_no_ipa >= 0.1) || (ncelltype > 0)) %>%
mutate(utr_pos=str_extract(transcript_id, "[0-9]+$") %>% as.integer,
utr_length=ifelse(is.na(utr_length), 0, utr_length)) %>%
group_by(gene_id) %>%
mutate(is_distal=utr_pos == max(utr_pos),
is_longest=utr_length == max(ifelse(is_ipa, 0, utr_length)),
is_consistent=is_distal == is_longest) %>%
filter(!is_consistent, !is_ipa) %>%
ungroup() %>%
select(transcript_id, gene_symbol, utr_usage_no_ipa, ncelltypes, n_utrs, utr_length, is_distal, is_longest) %>%
print(n=Inf) Output # A tibble: 189 x 8
transcript_id gene_symbol utr_usage_no_ipa ncelltypes n_utrs utr_length is_distal is_longest
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
1 9130019O22Rik.1 9130019O22Rik 0.800 12 2 2312 FALSE TRUE
2 9130019O22Rik.2 9130019O22Rik 0.200 12 2 2040 TRUE FALSE
3 Abcg3.1 Abcg3 0.106 4 2 909 FALSE TRUE
4 Abcg3.3 Abcg3 0.894 7 2 896 TRUE FALSE
5 Abhd17b.2 Abhd17b 0.611 71 3 1351 FALSE TRUE
6 Abhd17b.3 Abhd17b 0.336 71 3 1323 TRUE FALSE
7 Adgrl2.7 Adgrl2 0.338 34 5 2065 FALSE TRUE
8 Adgrl2.8 Adgrl2 0.0124 0 5 1594 TRUE FALSE
9 Ampd1.1 Ampd1 0.271 2 2 811 FALSE TRUE
10 Ampd1.2 Ampd1 0.729 3 2 25 TRUE FALSE
11 Arhgef33.4 Arhgef33 0.125 0 2 508 FALSE TRUE
12 Arhgef33.5 Arhgef33 0.875 0 2 356 TRUE FALSE
13 Arrb2.2 Arrb2 0.929 55 2 540 FALSE TRUE
14 Arrb2.3 Arrb2 0.0706 4 2 94 TRUE FALSE
15 Bola2.2 Bola2 0.972 79 2 416 FALSE TRUE
16 Bola2.4 Bola2 0.0276 1 2 407 TRUE FALSE
17 Brsk2.6 Brsk2 0.117 4 2 2358 FALSE TRUE
18 Brsk2.7 Brsk2 0.883 6 2 2018 TRUE FALSE
19 Bub3.2 Bub3 0.00817 0 2 2442 FALSE TRUE
20 Bub3.4 Bub3 0.211 79 2 1007 TRUE FALSE
21 Ccdc159.1 Ccdc159 0.541 0 2 183 FALSE TRUE
22 Ccdc159.2 Ccdc159 0.459 0 2 75 TRUE FALSE
23 Cd59b.3 Cd59b 0.931 29 2 202 FALSE TRUE
24 Cd59b.4 Cd59b 0.0694 13 2 182 TRUE FALSE
25 Cdh18.2 Cdh18 0.673 0 2 189 FALSE TRUE
26 Cdh18.3 Cdh18 0.327 0 2 3 TRUE FALSE
27 Cpne6.1 Cpne6 0.118 2 2 345 FALSE TRUE
28 Cpne6.2 Cpne6 0.882 2 2 241 TRUE FALSE
29 Csnk2a2.5 Csnk2a2 0.830 24 3 5396 FALSE TRUE
30 Csnk2a2.6 Csnk2a2 0.0333 2 3 2343 TRUE FALSE
31 Ctbp1.3 Ctbp1 0.00835 0 2 2195 FALSE TRUE
32 Ctbp1.4 Ctbp1 0.946 97 2 780 TRUE FALSE
33 Dapk1.1 Dapk1 0.000868 0 2 937 FALSE TRUE
34 Dapk1.3 Dapk1 0.911 35 2 855 TRUE FALSE
35 Esrp1.1 Esrp1 0.00200 0 2 2062 FALSE TRUE
36 Esrp1.6 Esrp1 0.789 7 2 1523 TRUE FALSE
37 Exosc9.2 Exosc9 0.0110 0 2 348 FALSE TRUE
38 Exosc9.3 Exosc9 0.741 41 2 223 TRUE FALSE
39 Fam171a1.1 Fam171a1 0.312 33 2 2424 FALSE TRUE
40 Fam171a1.2 Fam171a1 0.688 33 2 1299 TRUE FALSE
41 Fam71e1.2 Fam71e1 0.397 8 2 461 FALSE TRUE
42 Fam71e1.3 Fam71e1 0.603 9 2 119 TRUE FALSE
43 Fdx1.5 Fdx1 0.961 72 2 3888 FALSE TRUE
44 Fdx1.6 Fdx1 0.0390 1 2 501 TRUE FALSE
45 Gm10719.1 Gm10719 0.455 0 2 0 FALSE TRUE
46 Gm10719.2 Gm10719 0.455 0 2 0 FALSE TRUE
47 Gm11168.3 Gm11168 0.288 0 2 0 FALSE TRUE
48 Gm14403.1 Gm14403 0.346 0 2 3 FALSE TRUE
49 Gm525.1 Gm525 0.434 2 2 308 FALSE TRUE
50 Gm525.2 Gm525 0.566 2 2 177 TRUE FALSE
51 Gng5.2 Gng5 0.954 55 2 2165 FALSE TRUE
52 Gng5.4 Gng5 0.0332 2 2 256 TRUE FALSE
53 Gpr137b.1 Gpr137b 0.633 45 2 2324 FALSE TRUE
54 Gpr137b.2 Gpr137b 0.367 45 2 1695 TRUE FALSE
55 Gps2.1 Gps2 0.103 27 2 161 FALSE TRUE
56 Gps2.2 Gps2 0.897 67 2 91 TRUE FALSE
57 Grn.2 Grn 0.884 82 2 340 FALSE TRUE
58 Grn.3 Grn 0.114 51 2 232 TRUE FALSE
59 Grpel2.2 Grpel2 0.729 48 3 3601 FALSE TRUE
60 Grpel2.3 Grpel2 0.111 31 3 3342 TRUE FALSE
61 Gtf3a.2 Gtf3a 0.839 29 2 324 FALSE TRUE
62 Gtf3a.3 Gtf3a 0.161 24 2 82 TRUE FALSE
63 H2-Q7.1 H2-Q7 0.124 21 2 429 FALSE TRUE
64 H2-Q7.2 H2-Q7 0.876 38 2 423 TRUE FALSE
65 Hcfc2.3 Hcfc2 0.248 14 2 2104 FALSE TRUE
66 Hcfc2.4 Hcfc2 0.752 15 2 1119 TRUE FALSE
67 Hspb1.1 Hspb1 0.0181 4 2 448 FALSE TRUE
68 Hspb1.2 Hspb1 0.982 33 2 136 TRUE FALSE
69 Hyal1.3 Hyal1 0.0627 8 2 1567 FALSE TRUE
70 Hyal1.4 Hyal1 0.937 44 2 1240 TRUE FALSE
71 Icam4.1 Icam4 0.502 16 2 242 FALSE TRUE
72 Icam4.2 Icam4 0.498 16 2 104 TRUE FALSE
73 Ifnar2.2 Ifnar2 0.262 42 2 1625 FALSE TRUE
74 Ifnar2.3 Ifnar2 0.738 42 2 1178 TRUE FALSE
75 Il21r.2 Il21r 0.235 48 2 970 FALSE TRUE
76 Il21r.3 Il21r 0.765 68 2 582 TRUE FALSE
77 Immt.1 Immt 0.345 0 2 153 FALSE TRUE
78 Immt.5 Immt 0.655 0 2 37 TRUE FALSE
79 Iscu.3 Iscu 0.0198 0 2 464 FALSE TRUE
80 Iscu.4 Iscu 0.945 40 2 373 TRUE FALSE
81 Kank3.1 Kank3 0.344 31 2 512 FALSE TRUE
82 Kank3.2 Kank3 0.656 34 2 196 TRUE FALSE
83 Katna1.2 Katna1 0.0403 7 2 326 FALSE TRUE
84 Katna1.3 Katna1 0.960 37 2 164 TRUE FALSE
85 Kcnn4.2 Kcnn4 0.0311 1 2 577 FALSE TRUE
86 Kcnn4.3 Kcnn4 0.940 15 2 496 TRUE FALSE
87 Klhl2.2 Klhl2 0.112 30 2 2324 FALSE TRUE
88 Klhl2.4 Klhl2 0.888 41 2 1274 TRUE FALSE
89 Larp1b.3 Larp1b 0.0559 4 3 2162 FALSE TRUE
90 Larp1b.5 Larp1b 0.417 50 3 1108 TRUE FALSE
91 Lrch4.4 Lrch4 0.125 11 2 952 FALSE TRUE
92 Lrch4.5 Lrch4 0.875 36 2 233 TRUE FALSE
93 Mcoln1.1 Mcoln1 0.156 18 2 283 FALSE TRUE
94 Mcoln1.2 Mcoln1 0.844 24 2 185 TRUE FALSE
95 Mon1a.1 Mon1a 0.0455 1 2 1173 FALSE TRUE
96 Mon1a.2 Mon1a 0.954 23 2 140 TRUE FALSE
97 Mrpl15.6 Mrpl15 0.956 38 2 2192 FALSE TRUE
98 Mrpl15.7 Mrpl15 0.0444 3 2 1249 TRUE FALSE
99 Msrb1.1 Msrb1 0.0432 3 2 837 FALSE TRUE
100 Msrb1.2 Msrb1 0.957 31 2 472 TRUE FALSE
101 N6amt1.2 N6amt1 0.265 68 2 1400 FALSE TRUE
102 N6amt1.3 N6amt1 0.735 76 2 1174 TRUE FALSE
103 Nfatc4.2 Nfatc4 0.484 2 2 1105 FALSE TRUE
104 Nfatc4.3 Nfatc4 0.502 2 2 322 TRUE FALSE
105 Nop56.2 Nop56 0.0889 32 3 826 FALSE TRUE
106 Nop56.3 Nop56 0.734 63 3 93 TRUE FALSE
107 Per1.2 Per1 0.0923 15 2 806 FALSE TRUE
108 Per1.3 Per1 0.908 47 2 604 TRUE FALSE
109 Pgbd1.1 Pgbd1 0.468 4 2 1780 FALSE TRUE
110 Pgbd1.2 Pgbd1 0.532 5 2 1503 TRUE FALSE
111 Phf3.1 Phf3 0.194 45 3 2606 FALSE TRUE
112 Phf3.3 Phf3 0.138 47 3 1463 TRUE FALSE
113 Pigh.1 Pigh 0.431 41 2 2341 FALSE TRUE
114 Pigh.2 Pigh 0.569 41 2 1743 TRUE FALSE
115 Plch2.4 Plch2 0.548 6 2 1618 FALSE TRUE
116 Plch2.5 Plch2 0.449 6 2 550 TRUE FALSE
117 Plekhn1.2 Plekhn1 0.00156 0 2 993 FALSE TRUE
118 Plekhn1.4 Plekhn1 0.435 23 2 257 TRUE FALSE
119 Plpbp.4 Plpbp 0.904 66 3 5254 FALSE TRUE
120 Plpbp.5 Plpbp 0.0157 2 3 2721 TRUE FALSE
121 Ppp1r14b.2 Ppp1r14b 0.0292 1 2 616 FALSE TRUE
122 Ppp1r14b.4 Ppp1r14b 0.971 48 2 277 TRUE FALSE
123 Ppp2r3d.4 Ppp2r3d 0.653 8 3 2459 FALSE TRUE
124 Ppp2r3d.6 Ppp2r3d 0.194 5 3 0 TRUE FALSE
125 Psmd8.2 Psmd8 0.381 92 2 879 FALSE TRUE
126 Psmd8.5 Psmd8 0.619 92 2 425 TRUE FALSE
127 Pus3.1 Pus3 0.196 17 2 983 FALSE TRUE
128 Pus3.3 Pus3 0.0330 0 2 485 TRUE FALSE
129 Rell2.3 Rell2 0.171 1 2 771 FALSE TRUE
130 Rell2.4 Rell2 0.829 1 2 451 TRUE FALSE
131 Rfx5.3 Rfx5 0.886 16 2 3413 FALSE TRUE
132 Rfx5.4 Rfx5 0.0422 0 2 2196 TRUE FALSE
133 Rif1.1 Rif1 0.588 31 2 1694 FALSE TRUE
134 Rif1.2 Rif1 0.412 31 2 1183 TRUE FALSE
135 Rnd3.1 Rnd3 0.0864 11 3 2125 FALSE TRUE
136 Rnd3.3 Rnd3 0.801 33 3 1859 TRUE FALSE
137 Rnf19a.2 Rnf19a 0.0605 0 2 1509 FALSE TRUE
138 Rnf19a.3 Rnf19a 0.856 50 2 1439 TRUE FALSE
139 Rnf19b.2 Rnf19b 0.173 2 2 218 FALSE TRUE
140 Rnf19b.3 Rnf19b 0.827 2 2 76 TRUE FALSE
141 Rp1.2 Rp1 0.872 0 2 3 FALSE TRUE
142 Rp2.2 Rp2 0.543 23 2 3121 FALSE TRUE
143 Rp2.3 Rp2 0.457 23 2 102 TRUE FALSE
144 Saxo2.2 Saxo2 0.511 2 2 1638 FALSE TRUE
145 Saxo2.3 Saxo2 0.489 1 2 1507 TRUE FALSE
146 Smarca1.2 Smarca1 0.682 5 2 1504 FALSE TRUE
147 Smarca1.3 Smarca1 0.318 5 2 763 TRUE FALSE
148 Spc24.2 Spc24 0.0596 17 2 1281 FALSE TRUE
149 Spc24.3 Spc24 0.940 100 2 738 TRUE FALSE
150 Ss18l2.2 Ss18l2 0.0304 3 2 2112 FALSE TRUE
151 Ss18l2.3 Ss18l2 0.970 50 2 272 TRUE FALSE
152 Stx4a.1 Stx4a 0.227 80 2 351 FALSE TRUE
153 Stx4a.2 Stx4a 0.773 80 2 287 TRUE FALSE
154 Tcf25.6 Tcf25 0.0833 24 2 2893 FALSE TRUE
155 Tcf25.7 Tcf25 0.917 79 2 683 TRUE FALSE
156 Tpm1.8 Tpm1 0.108 30 2 1240 FALSE TRUE
157 Tpm1.9 Tpm1 0.892 65 2 772 TRUE FALSE
158 Tpp2.2 Tpp2 0.586 51 2 878 FALSE TRUE
159 Tpp2.3 Tpp2 0.414 51 2 810 TRUE FALSE
160 Trim30b.3 Trim30b 0.220 9 3 3404 FALSE TRUE
161 Trim30b.4 Trim30b 0.235 9 3 1773 TRUE FALSE
162 Trmt112.1 Trmt112 0.0227 3 2 301 FALSE TRUE
163 Trmt112.2 Trmt112 0.977 85 2 193 TRUE FALSE
164 Tsga10.1 Tsga10 0.824 0 2 810 FALSE TRUE
165 Tsga10.6 Tsga10 0.176 0 2 724 TRUE FALSE
166 Txndc9.6 Txndc9 0.858 83 2 2808 FALSE TRUE
167 Txndc9.9 Txndc9 0.0150 0 2 2476 TRUE FALSE
168 Vps50.2 Vps50 0.863 23 2 1936 FALSE TRUE
169 Vps50.3 Vps50 0.137 18 2 755 TRUE FALSE
170 Wdr19.1 Wdr19 0.485 1 2 2197 FALSE TRUE
171 Wdr19.3 Wdr19 0.515 1 2 321 TRUE FALSE
172 Wdr33.2 Wdr33 0.110 21 2 1378 FALSE TRUE
173 Wdr33.6 Wdr33 0.890 38 2 405 TRUE FALSE
174 Zbbx.1 Zbbx 0.257 0 2 65 FALSE TRUE
175 Zbbx.3 Zbbx 0.743 0 2 59 TRUE FALSE
176 Zfp101.1 Zfp101 0.222 17 2 5785 FALSE TRUE
177 Zfp101.3 Zfp101 0.778 24 2 690 TRUE FALSE
178 Zfp329.2 Zfp329 0.413 25 2 3624 FALSE TRUE
179 Zfp329.3 Zfp329 0.587 25 2 1532 TRUE FALSE
180 Zfp595.4 Zfp595 0.181 20 3 2067 FALSE TRUE
181 Zfp595.5 Zfp595 0.107 9 3 1946 TRUE FALSE
182 Zfp616.1 Zfp616 0.5 0 2 99 FALSE TRUE
183 Zfp616.2 Zfp616 0.5 0 2 3 TRUE FALSE
184 Zfp672.1 Zfp672 0.128 15 2 1700 FALSE TRUE
185 Zfp672.3 Zfp672 0.872 32 2 966 TRUE FALSE
186 Zfp931.2 Zfp931 0.862 14 2 117 FALSE TRUE
187 Zfp931.4 Zfp931 0.138 8 2 3 TRUE FALSE
188 Zkscan7.1 Zkscan7 0.0941 1 2 2790 FALSE TRUE
189 Zkscan7.2 Zkscan7 0.906 2 2 2497 TRUE FALSE |
The UTRs for the Spc24 gene appear to have switched UTR lengths.
The text was updated successfully, but these errors were encountered: