Skip to content

Commit

Permalink
Add Aneuploidy Score computation #374
Browse files Browse the repository at this point in the history
  • Loading branch information
ShixiangWang committed Aug 2, 2021
1 parent ed57a90 commit faf0fa1
Show file tree
Hide file tree
Showing 9 changed files with 411 additions and 74 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export(enquo)
export(enquos)
export(enrich_component_strand_bias)
export(expr)
export(get_Aneuploidy_score)
export(get_adj_p)
export(get_bayesian_result)
export(get_cn_freq_table)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# sigminer 2.0.4

- Implemented Cohen-Sharir method-like Aneuploidy Score.
- Enhanced error handling in `show_sig_feature_corrplot()` (#376).
- Fixed INDEL classification.
- Fixed end position determination in `read_vcf()`.
Expand Down
74 changes: 0 additions & 74 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,80 +275,6 @@ get_LengthFraction <- function(CN_data,
segTab[, c(colnames(arm_data)[-1], "flag") := NULL]
segTab


# .annot_fun <- function(chrom, start, end, p_start, p_end, p_length, q_start, q_end, q_length, total_size) {
# if (end <= p_end & start >= p_start) {
# ## 1L
# location <- paste0(sub("chr", "", chrom), "p")
# annotation <- "short arm"
# fraction <- (end - start + 1) / (p_end - p_start + 1)
# } else if (end <= q_end &
# start >= q_start) {
# ## 2L
# location <- paste0(sub("chr", "", chrom), "q")
# annotation <- "long arm"
# fraction <- (end - start + 1) / (q_end - q_start + 1)
# } else if (start >= p_start &
# start <= p_end &
# end >= q_start & end <= q_end) {
# ## 3L
# location <- paste0(sub("chr", "", chrom), "pq") # across p and q arm
# annotation <- "across short and long arm"
# fraction <- 2 * ((end - start + 1) / total_size)
# } else if (start < p_end & end < q_start) {
# ## 4L
# location <- paste0(sub("chr", "", chrom), "p")
# annotation <- "short arm intersect with centromere region"
# # only calculate region does not intersect
# fraction <- (end - start + 1 - (end - p_end)) / (p_end - p_start + 1)
# } else if (start > p_end &
# start < q_start & end > q_start) {
# ## 5L
# location <- paste0(sub("chr", "", chrom), "q")
# annotation <- "long arm intersect with centromere region"
# # only calculate region does not intersect
# fraction <- (end - start + 1 - (start - q_start)) / (q_end - q_start + 1)
# } else {
# ## 6L
# location <- paste0(sub("chr", "", chrom), "pq") # suppose as pq
# annotation <- "segment locate in centromere region"
# fraction <- 2 * ((end - start + 1) / total_size)
# }
#
# dplyr::tibble(location = location, annotation = annotation, fraction = fraction)
# }
#
# annot_fun <- function(chrom, start, end, p_start, p_end, p_length, q_start,
# q_end, q_length, total_size, .pb = NULL) {
# if (.pb$i < .pb$n) .pb$tick()$print()
# .annot_fun(
# chrom, start, end, p_start, p_end, p_length, q_start,
# q_end, q_length, total_size
# )
# }
#
# pb <- progress_estimated(nrow(segTab), 0)
#
# annot <- purrr::pmap_df(
# list(
# chrom = segTab$chromosome,
# start = segTab$start,
# end = segTab$end,
# p_start = segTab$p_start,
# p_end = segTab$p_end,
# p_length = segTab$p_length,
# q_start = segTab$q_start,
# q_end = segTab$q_end,
# q_length = segTab$q_length,
# total_size = segTab$total_size
# ), annot_fun,
# .pb = pb
# )
#
# cbind(
# data.table::as.data.table(segTab)[, colnames(arm_data)[-1] := NULL],
# data.table::as.data.table(annot)
# )
}


Expand Down
105 changes: 105 additions & 0 deletions R/get_Aneuploidy_score.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' Get Aneuploidy Score from Copy Number Profile
#'
#' This implements a Cohen-Sharir method (see reference) like "Aneuploidy Score" computation.
#' You can read the source code to see how it works. Basically, it follows
#' the logic of Cohen-Sharir method but with some difference in detail implementation.
#' Their results should be counterpart, but with no data validation for now.
#' **Please raise an issue if you find problem/bugs in this function**.
#'
#' @inheritParams read_copynumber
#' @param data a CopyNumber object or a `data.frame` containing at least
#' 'chromosome', 'start', 'end', 'segVal', 'sample' these columns.
#' @param ploidy_df default is `NULL`, compute ploidy by segment-size weighted copy number
#' aross autosome, see [get_cn_ploidy]. You can also provide a `data.frame` with 'sample'
#' and 'ploidy' columns.
#' @references
#' - Cohen-Sharir, Y., McFarland, J. M., Abdusamad, M., Marquis, C., Bernhard, S. V., Kazachkova, M., ... & Ben-David, U. (2021). Aneuploidy renders cancer cells vulnerable to mitotic checkpoint inhibition. Nature, 1-6.
#' - Logic reference: <https://github.com/quevedor2/aneuploidy_score/>.
#'
#' @return A `data.frame`
#' @export
#'
#' @examples
#' # Load copy number object
#' load(system.file("extdata", "toy_copynumber.RData",
#' package = "sigminer", mustWork = TRUE
#' ))
#'
#' df <- get_Aneuploidy_score(cn)
#' df
#'
#' df2 <- get_Aneuploidy_score(cn@data)
#' df2
#'
#' df3 <- get_Aneuploidy_score(cn@data,
#' ploidy_df = get_cn_ploidy(cn@data)
#' )
#' df3
#' @testexamples
#' expect_equal(nrow(df), 10L)
#' expect_equal(nrow(df2), 10L)
#' expect_equal(nrow(df3), 10L)
get_Aneuploidy_score <- function(data, ploidy_df = NULL, genome_build = "hg19") {
stopifnot(is.data.frame(data) | inherits(data, "CopyNumber"))
if (is.data.frame(data)) {
nc_cols <- c("chromosome", "start", "end", "segVal", "sample")
if (!all(nc_cols %in% colnames(data))) {
stop("Invalid input, it must contain columns: ", paste(nc_cols, collapse = " "))
}
annot <- get_LengthFraction(data,
genome_build = genome_build,
seg_cols = c("chromosome", "start", "end", "segVal")
)
} else {
if (nrow(data@annotation) == 0L) {
annot <- get_LengthFraction(data@data,
genome_build = data@genome_build,
seg_cols = c("chromosome", "start", "end", "segVal")
)
} else {
annot <- data@annotation
}
data <- data@data
}

if (is.null(ploidy_df)) {
ploidy_df <- get_cn_ploidy(data)
} else {
ploidy_df <- data.table::as.data.table(ploidy_df)
}

annot <- merge(annot, ploidy_df, by = "sample", all.x = TRUE)
annot$type <- sub("[0-9]+", "", annot$location)

# For segments spans the centromere, remove it if length < 50%, i.e. 1 here.
# We assign average weights to all arms.
df <- annot[!(type == "pq" & round(fraction, 2) < 1) & (chromosome %in% paste0("chr", 1:22)), ] %>%
dplyr::mutate(
fraction = ifelse(type == "pq", .data$fraction / 2, .data$fraction),
type = ifelse(type == "pq", "p,q", type)
) %>%
tidyr::separate_rows(type, sep = ",") %>%
data.table::as.data.table()

df <- df[, .(flag = as.integer(round(sum(segVal * fraction) - ploidy[1]))),
# overage = round(sum(fraction), 3)),
by = .(sample, chromosome, type)
]

df %>%
dplyr::mutate(
flag = dplyr::case_when(
.data$flag > 1 ~ 1L,
.data$flag < -1 ~ -1L,
TRUE ~ .data$flag
),
cytoband = paste(.data$chromosome, .data$type, sep = "-")
) %>%
dplyr::select(-c("chromosome", "type")) %>%
tidyr::pivot_wider(id_cols = "sample", names_from = "cytoband", values_from = "flag", values_fill = 0L) %>%
dplyr::mutate(AScore = rowSums(abs(.[, -1]))) %>%
dplyr::select(c("sample", "AScore", dplyr::everything())) %>%
data.table::as.data.table()
}

utils::globalVariables(c("ploidy"))
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ reference:
contents:
- transform_seg_table
- get_cn_ploidy
- get_Aneuploidy_score
- scoring
- show_cn_profile
- show_cn_circos
Expand Down
Loading

0 comments on commit faf0fa1

Please sign in to comment.