Skip to content

Commit

Permalink
Making some changes to the latest version.
Browse files Browse the repository at this point in the history
Removing legacy/harmonized from folder structure

Fixing #573
Fixing #574

Needs more checking with CNV and CPTAC-3 projects
  • Loading branch information
Tiago Silva authored and Tiago Silva committed May 18, 2023
1 parent d0a43d3 commit b2bc706
Show file tree
Hide file tree
Showing 11 changed files with 232 additions and 196 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ importFrom(dplyr,mutate_all)
importFrom(dplyr,pull)
importFrom(dplyr,row_number)
importFrom(dplyr,slice)
importFrom(dplyr,summarise)
importFrom(grDevices,dev.list)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CHANGES IN VERSION 2.29.1
-------------------------

* Removing support to legacy archive since it will be shutdown by GDC soon.
* When saving files we will not include folders prefix legacy/harmonized anymore

CHANGES IN VERSION 2.21.1
-------------------------
Expand Down
36 changes: 21 additions & 15 deletions R/clinical.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ TCGAquery_MatchedCoupledSampleTypes <- function(barcode,typesample){
#' @export
#' @importFrom data.table rbindlist as.data.table
#' @importFrom jsonlite fromJSON
#' @importFrom dplyr summarise
#' @examples
#' clinical <- GDCquery_clinic(
#' project = "TCGA-ACC",
Expand Down Expand Up @@ -348,7 +349,10 @@ GDCquery_clinic <- function(
# we will collapse them into one single row
# concatanating all columns using ;
aux <- x %>% dplyr::group_by(submitter_id) %>%
summarise(across(everything(),~ paste(unique(.), collapse = ";")))
dplyr::summarise(
across(everything(),~ paste(unique(.), collapse = ";"))
)

aux$treatments <- list(dplyr::bind_rows(x$treatments))
aux
}
Expand Down Expand Up @@ -440,7 +444,7 @@ GDCquery_clinic <- function(
#' query <- GDCquery(
#' project = "TCGA-COAD",
#' data.category = "Clinical",
#' file.type = "xml",
#' data.format = "bcr xml",
#' barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
#' )
#' GDCdownload(query)
Expand All @@ -452,7 +456,7 @@ GDCquery_clinic <- function(
#' query <- GDCquery(
#' project = "TCGA-COAD",
#' data.category = "Biospecimen",
#' file.type = "xml",
#' data.format = "bcr xml",
#' data.type = "Biospecimen Supplement",
#' barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
#' )
Expand Down Expand Up @@ -503,9 +507,9 @@ GDCprepare_clinic <- function(
}

# Get all the clincal xml files
source <- "harmonized"

files <- file.path(
query$results[[1]]$project, source,
query$results[[1]]$project,
gsub(" ","_",query$results[[1]]$data_category),
gsub(" ","_",query$results[[1]]$data_type),
gsub(" ","_",query$results[[1]]$file_id),
Expand Down Expand Up @@ -586,16 +590,18 @@ GDCprepare_clinic <- function(
message("Updating days_to_last_followup and vital_status from follow_up information using last entry")
followup <- parseFollowup(files,xpath,clinical.info)

followup_last <- followup %>% dplyr::group_by(bcr_patient_barcode) %>% dplyr::summarise(
days_to_last_followup = max(as.numeric(days_to_last_followup),na.rm = TRUE),
vital_status = vital_status[
ifelse(
any(followup$days_to_last_followup %in% ""),
which(followup$days_to_last_followup %in% ""),
which.max(days_to_last_followup)
)
]
)
followup_last <- followup %>%
dplyr::group_by(bcr_patient_barcode) %>%
dplyr::summarise(
days_to_last_followup = max(as.numeric(days_to_last_followup),na.rm = TRUE),
vital_status = vital_status[
ifelse(
any(followup$days_to_last_followup %in% ""),
which(followup$days_to_last_followup %in% ""),
which.max(days_to_last_followup)
)
]
)
clin$days_to_last_followup <- followup_last$days_to_last_followup[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
clin$vital_status <- followup_last$vital_status[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
}
Expand Down
4 changes: 1 addition & 3 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ GDCdownload <- function(
stop("We can only download one data type. Please use data.type argument in GDCquery to filter results.")
}

source <- "harmonized"

dir.create(directory, showWarnings = FALSE, recursive = TRUE)
for(proj in unique(unlist(query$project))){
message("Downloading data for project ", proj)
Expand All @@ -77,7 +75,7 @@ GDCdownload <- function(

path <- unique(
file.path(
proj, source,
proj,
gsub(" ","_", results$data_category),
gsub(" ","_",results$data_type))
)
Expand Down
Loading

0 comments on commit b2bc706

Please sign in to comment.