-
Notifications
You must be signed in to change notification settings - Fork 2
/
load_argos.R
106 lines (81 loc) · 3.12 KB
/
load_argos.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
suppressPackageStartupMessages({
library(readr)
library(dplyr)
library(purrr)
library(jsonlite)
})
source("create_tables.R")
tibble_to_named_list<-function(tbl,col) {
nl=transpose(tbl)
names(nl)=map(nl,col) %>% unlist
nl
}
get_with_default<-function(ll,key) {
if(key %in% names(ll)) {
return(ll[[key]])
} else {
return(NULL)
}
}
get_sample_list<-function(odir) {
read_tsv(
file.path(pdir,"data_clinical_sample.txt"),
comment="#",
col_types = cols(.default = "c"),
progress=F
) %>%
distinct(SAMPLE_ID) %>%
pull(SAMPLE_ID)
}
load_argos<-function(inputs) {
#
# inputs must be a list (or any object)
# with two properties
# - portal_dir => Path to argos/helix portal directory
# - analysis_dir => Path to argos/helix analysis directory
#
pdir=inputs$portal_dir
adir=inputs$analysis_dir
dpt=read_tsv(file.path(pdir,"data_clinical_patient.txt"),comment="#",col_types = cols(.default = "?", SEX = "character"),show_col_types = TRUE)
#making default=character breaks other things
maf=read_tsv(fs::dir_ls(adir,regex=".muts.maf$"),comment="#",col_types = cols(.default = "?", Chromosome = "character", Tumor_Seq_Allele2 = "character"))
if(!"t_var_freq" %in% names(maf)) {
if("t_depth" %in% names(maf)) {
maf$t_var_freq=maf$t_alt_count/maf$t_depth
} else {
maf$t_var_freq=maf$t_alt_count/(maf$t_alt_count+maf$t_ref_count)
}
}
normal_id=gsub("_","-",inputs$normal_id) %>% gsub("^s-","",.)
# pairingTable= maf %>%
# distinct(SAMPLE_ID=Tumor_Sample_Barcode,NORMAL_ID=Matched_Norm_Sample_Barcode) %>%
# mutate(NORMAL_ID=gsub("_","-",NORMAL_ID) %>% gsub("^s-","",.))
#if the output has 0 mutations, above doesn't work
pairingTable <- tribble(
~SAMPLE_ID,~NORMAL_ID,
inputs$tumor_id, normal_id
)
maf=maf %>% group_split(Tumor_Sample_Barcode)
names(maf)=map(maf,\(x){x$Tumor_Sample_Barcode[1]}) %>% unlist
sampleTbl=read_tsv(file.path(pdir,"data_clinical_sample.txt"),comment="#") %>%
left_join(dpt,by="PATIENT_ID")
sampleTbl=left_join(sampleTbl,pairingTable) %>%
rowwise %>%
mutate(MATCHED=ifelse(grepl("POOLED",NORMAL_ID),"UnMatched","Matched")) %>%
ungroup
sampleData=tibble_to_named_list(sampleTbl,"SAMPLE_ID")
fusions=read_tsv(file.path(pdir,"data_fusions.txt"),comment="#") %>%
group_split(Tumor_Sample_Barcode)
names(fusions)=map(fusions,\(x){x$Tumor_Sample_Barcode[1]}) %>% unlist
cnv=read_tsv(fs::dir_ls(adir,regex=".gene.cna.txt"),comment="#") %>%
mutate(Tumor_Sample_Barcode=gsub("_[^_]*$","",Tumor_Sample_Barcode)) %>%
group_split(Tumor_Sample_Barcode)
names(cnv)=map(cnv,\(x){x$Tumor_Sample_Barcode[1]}) %>% unlist
for(si in names(sampleData)) {
# sampleData[[si]]$pMAF=get_with_default(pmaf,si)
sampleData[[si]]$MAF=get_with_default(maf,si)
sampleData[[si]]$CNV=get_with_default(cnv,si)
sampleData[[si]]$Fusions=get_with_default(fusions,si)
}
sampleData
}