diff --git a/NAMESPACE b/NAMESPACE index 5e27d465..89bd2ea9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(enquo) export(enquos) export(enrich_component_strand_bias) export(expr) +export(get_Aneuploidy_score) export(get_adj_p) export(get_bayesian_result) export(get_cn_freq_table) diff --git a/NEWS.md b/NEWS.md index 15209a9d..6040afea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # sigminer 2.0.4 +- Implemented Cohen-Sharir method-like Aneuploidy Score. - Enhanced error handling in `show_sig_feature_corrplot()` (#376). - Fixed INDEL classification. - Fixed end position determination in `read_vcf()`. diff --git a/R/get.R b/R/get.R index 95867d96..b3c21681 100644 --- a/R/get.R +++ b/R/get.R @@ -275,80 +275,6 @@ get_LengthFraction <- function(CN_data, segTab[, c(colnames(arm_data)[-1], "flag") := NULL] segTab - - # .annot_fun <- function(chrom, start, end, p_start, p_end, p_length, q_start, q_end, q_length, total_size) { - # if (end <= p_end & start >= p_start) { - # ## 1L - # location <- paste0(sub("chr", "", chrom), "p") - # annotation <- "short arm" - # fraction <- (end - start + 1) / (p_end - p_start + 1) - # } else if (end <= q_end & - # start >= q_start) { - # ## 2L - # location <- paste0(sub("chr", "", chrom), "q") - # annotation <- "long arm" - # fraction <- (end - start + 1) / (q_end - q_start + 1) - # } else if (start >= p_start & - # start <= p_end & - # end >= q_start & end <= q_end) { - # ## 3L - # location <- paste0(sub("chr", "", chrom), "pq") # across p and q arm - # annotation <- "across short and long arm" - # fraction <- 2 * ((end - start + 1) / total_size) - # } else if (start < p_end & end < q_start) { - # ## 4L - # location <- paste0(sub("chr", "", chrom), "p") - # annotation <- "short arm intersect with centromere region" - # # only calculate region does not intersect - # fraction <- (end - start + 1 - (end - p_end)) / (p_end - p_start + 1) - # } else if (start > p_end & - # start < q_start & end > q_start) { - # ## 5L - # location <- paste0(sub("chr", "", chrom), "q") - # annotation <- "long arm intersect with centromere region" - # # only calculate region does not intersect - # fraction <- (end - start + 1 - (start - q_start)) / (q_end - q_start + 1) - # } else { - # ## 6L - # location <- paste0(sub("chr", "", chrom), "pq") # suppose as pq - # annotation <- "segment locate in centromere region" - # fraction <- 2 * ((end - start + 1) / total_size) - # } - # - # dplyr::tibble(location = location, annotation = annotation, fraction = fraction) - # } - # - # annot_fun <- function(chrom, start, end, p_start, p_end, p_length, q_start, - # q_end, q_length, total_size, .pb = NULL) { - # if (.pb$i < .pb$n) .pb$tick()$print() - # .annot_fun( - # chrom, start, end, p_start, p_end, p_length, q_start, - # q_end, q_length, total_size - # ) - # } - # - # pb <- progress_estimated(nrow(segTab), 0) - # - # annot <- purrr::pmap_df( - # list( - # chrom = segTab$chromosome, - # start = segTab$start, - # end = segTab$end, - # p_start = segTab$p_start, - # p_end = segTab$p_end, - # p_length = segTab$p_length, - # q_start = segTab$q_start, - # q_end = segTab$q_end, - # q_length = segTab$q_length, - # total_size = segTab$total_size - # ), annot_fun, - # .pb = pb - # ) - # - # cbind( - # data.table::as.data.table(segTab)[, colnames(arm_data)[-1] := NULL], - # data.table::as.data.table(annot) - # ) } diff --git a/R/get_Aneuploidy_score.R b/R/get_Aneuploidy_score.R new file mode 100644 index 00000000..b8258332 --- /dev/null +++ b/R/get_Aneuploidy_score.R @@ -0,0 +1,105 @@ +#' Get Aneuploidy Score from Copy Number Profile +#' +#' This implements a Cohen-Sharir method (see reference) like "Aneuploidy Score" computation. +#' You can read the source code to see how it works. Basically, it follows +#' the logic of Cohen-Sharir method but with some difference in detail implementation. +#' Their results should be counterpart, but with no data validation for now. +#' **Please raise an issue if you find problem/bugs in this function**. +#' +#' @inheritParams read_copynumber +#' @param data a CopyNumber object or a `data.frame` containing at least +#' 'chromosome', 'start', 'end', 'segVal', 'sample' these columns. +#' @param ploidy_df default is `NULL`, compute ploidy by segment-size weighted copy number +#' aross autosome, see [get_cn_ploidy]. You can also provide a `data.frame` with 'sample' +#' and 'ploidy' columns. +#' @references +#' - Cohen-Sharir, Y., McFarland, J. M., Abdusamad, M., Marquis, C., Bernhard, S. V., Kazachkova, M., ... & Ben-David, U. (2021). Aneuploidy renders cancer cells vulnerable to mitotic checkpoint inhibition. Nature, 1-6. +#' - Logic reference: . +#' +#' @return A `data.frame` +#' @export +#' +#' @examples +#' # Load copy number object +#' load(system.file("extdata", "toy_copynumber.RData", +#' package = "sigminer", mustWork = TRUE +#' )) +#' +#' df <- get_Aneuploidy_score(cn) +#' df +#' +#' df2 <- get_Aneuploidy_score(cn@data) +#' df2 +#' +#' df3 <- get_Aneuploidy_score(cn@data, +#' ploidy_df = get_cn_ploidy(cn@data) +#' ) +#' df3 +#' @testexamples +#' expect_equal(nrow(df), 10L) +#' expect_equal(nrow(df2), 10L) +#' expect_equal(nrow(df3), 10L) +get_Aneuploidy_score <- function(data, ploidy_df = NULL, genome_build = "hg19") { + stopifnot(is.data.frame(data) | inherits(data, "CopyNumber")) + if (is.data.frame(data)) { + nc_cols <- c("chromosome", "start", "end", "segVal", "sample") + if (!all(nc_cols %in% colnames(data))) { + stop("Invalid input, it must contain columns: ", paste(nc_cols, collapse = " ")) + } + annot <- get_LengthFraction(data, + genome_build = genome_build, + seg_cols = c("chromosome", "start", "end", "segVal") + ) + } else { + if (nrow(data@annotation) == 0L) { + annot <- get_LengthFraction(data@data, + genome_build = data@genome_build, + seg_cols = c("chromosome", "start", "end", "segVal") + ) + } else { + annot <- data@annotation + } + data <- data@data + } + + if (is.null(ploidy_df)) { + ploidy_df <- get_cn_ploidy(data) + } else { + ploidy_df <- data.table::as.data.table(ploidy_df) + } + + annot <- merge(annot, ploidy_df, by = "sample", all.x = TRUE) + annot$type <- sub("[0-9]+", "", annot$location) + + # For segments spans the centromere, remove it if length < 50%, i.e. 1 here. + # We assign average weights to all arms. + df <- annot[!(type == "pq" & round(fraction, 2) < 1) & (chromosome %in% paste0("chr", 1:22)), ] %>% + dplyr::mutate( + fraction = ifelse(type == "pq", .data$fraction / 2, .data$fraction), + type = ifelse(type == "pq", "p,q", type) + ) %>% + tidyr::separate_rows(type, sep = ",") %>% + data.table::as.data.table() + + df <- df[, .(flag = as.integer(round(sum(segVal * fraction) - ploidy[1]))), + # overage = round(sum(fraction), 3)), + by = .(sample, chromosome, type) + ] + + df %>% + dplyr::mutate( + flag = dplyr::case_when( + .data$flag > 1 ~ 1L, + .data$flag < -1 ~ -1L, + TRUE ~ .data$flag + ), + cytoband = paste(.data$chromosome, .data$type, sep = "-") + ) %>% + dplyr::select(-c("chromosome", "type")) %>% + tidyr::pivot_wider(id_cols = "sample", names_from = "cytoband", values_from = "flag", values_fill = 0L) %>% + dplyr::mutate(AScore = rowSums(abs(.[, -1]))) %>% + dplyr::select(c("sample", "AScore", dplyr::everything())) %>% + data.table::as.data.table() +} + +utils::globalVariables(c("ploidy")) diff --git a/_pkgdown.yml b/_pkgdown.yml index 8a87bab7..4606e49a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -146,6 +146,7 @@ reference: contents: - transform_seg_table - get_cn_ploidy + - get_Aneuploidy_score - scoring - show_cn_profile - show_cn_circos diff --git a/docs/reference/get_Aneuploidy_score.html b/docs/reference/get_Aneuploidy_score.html new file mode 100644 index 00000000..e24309fb --- /dev/null +++ b/docs/reference/get_Aneuploidy_score.html @@ -0,0 +1,220 @@ + + + + + + + + +Get Aneuploidy Score from Copy Number Profile — get_Aneuploidy_score • sigminer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +
+

This implements a Cohen-Sharir method (see reference) like "Aneuploidy Score" computation. +You can read the source code to see how it works. Basically, it follows +the logic of Cohen-Sharir method but with some difference in detail implementation. +Their results should be counterpart, but with no data validation for now. +Please raise an issue if you find problem/bugs in this function.

+
+ +
get_Aneuploidy_score(data, ploidy_df = NULL, genome_build = "hg19")
+ +

Arguments

+ + + + + + + + + + + + + + +
data

a CopyNumber object or a data.frame containing at least +'chromosome', 'start', 'end', 'segVal', 'sample' these columns.

ploidy_df

default is NULL, compute ploidy by segment-size weighted copy number +aross autosome, see get_cn_ploidy. You can also provide a data.frame with 'sample' +and 'ploidy' columns.

genome_build

genome build version, should be 'hg19', 'hg38', 'mm9' or 'mm10'.

+ +

Value

+ +

A data.frame

+

References

+ + +
    +
  • Cohen-Sharir, Y., McFarland, J. M., Abdusamad, M., Marquis, C., Bernhard, S. V., Kazachkova, M., ... & Ben-David, U. (2021). Aneuploidy renders cancer cells vulnerable to mitotic checkpoint inhibition. Nature, 1-6.

  • +
  • Logic reference: https://github.com/quevedor2/aneuploidy_score/.

  • +
+ + +

Examples

+
# Load copy number object
+load(system.file("extdata", "toy_copynumber.RData",
+  package = "sigminer", mustWork = TRUE
+))
+
+df <- get_Aneuploidy_score(cn)
+df
+
+df2 <- get_Aneuploidy_score(cn@data)
+df2
+
+df3 <- get_Aneuploidy_score(cn@data,
+  ploidy_df = get_cn_ploidy(cn@data)
+)
+df3
+
+
+ +
+ + + +
+ + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 47a9437c..5224fc64 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -798,6 +798,12 @@

Get Ploidy from Absolute Copy Number Profile

+ +

get_Aneuploidy_score()

+ +

Get Aneuploidy Score from Copy Number Profile

+ +

scoring()

diff --git a/man/get_Aneuploidy_score.Rd b/man/get_Aneuploidy_score.Rd new file mode 100644 index 00000000..85eabfee --- /dev/null +++ b/man/get_Aneuploidy_score.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_Aneuploidy_score.R +\name{get_Aneuploidy_score} +\alias{get_Aneuploidy_score} +\title{Get Aneuploidy Score from Copy Number Profile} +\usage{ +get_Aneuploidy_score(data, ploidy_df = NULL, genome_build = "hg19") +} +\arguments{ +\item{data}{a CopyNumber object or a \code{data.frame} containing at least +'chromosome', 'start', 'end', 'segVal', 'sample' these columns.} + +\item{ploidy_df}{default is \code{NULL}, compute ploidy by segment-size weighted copy number +aross autosome, see \link{get_cn_ploidy}. You can also provide a \code{data.frame} with 'sample' +and 'ploidy' columns.} + +\item{genome_build}{genome build version, should be 'hg19', 'hg38', 'mm9' or 'mm10'.} +} +\value{ +A \code{data.frame} +} +\description{ +This implements a Cohen-Sharir method (see reference) like "Aneuploidy Score" computation. +You can read the source code to see how it works. Basically, it follows +the logic of Cohen-Sharir method but with some difference in detail implementation. +Their results should be counterpart, but with no data validation for now. +\strong{Please raise an issue if you find problem/bugs in this function}. +} +\examples{ +# Load copy number object +load(system.file("extdata", "toy_copynumber.RData", + package = "sigminer", mustWork = TRUE +)) + +df <- get_Aneuploidy_score(cn) +df + +df2 <- get_Aneuploidy_score(cn@data) +df2 + +df3 <- get_Aneuploidy_score(cn@data, + ploidy_df = get_cn_ploidy(cn@data) +) +df3 +} +\references{ +\itemize{ +\item Cohen-Sharir, Y., McFarland, J. M., Abdusamad, M., Marquis, C., Bernhard, S. V., Kazachkova, M., ... & Ben-David, U. (2021). Aneuploidy renders cancer cells vulnerable to mitotic checkpoint inhibition. Nature, 1-6. +\item Logic reference: \url{https://github.com/quevedor2/aneuploidy_score/}. +} +} diff --git a/tests/testthat/test-roxytest-testexamples-get_Aneuploidy_score.R b/tests/testthat/test-roxytest-testexamples-get_Aneuploidy_score.R new file mode 100644 index 00000000..bf1cce53 --- /dev/null +++ b/tests/testthat/test-roxytest-testexamples-get_Aneuploidy_score.R @@ -0,0 +1,26 @@ +# Generated by roxytest: Do not edit by hand! + +# File R/get_Aneuploidy_score.R: @testexamples + +test_that("Function get_Aneuploidy_score() @ L42", { + + # Load copy number object + load(system.file("extdata", "toy_copynumber.RData", + package = "sigminer", mustWork = TRUE + )) + + df <- get_Aneuploidy_score(cn) + df + + df2 <- get_Aneuploidy_score(cn@data) + df2 + + df3 <- get_Aneuploidy_score(cn@data, + ploidy_df = get_cn_ploidy(cn@data) + ) + df3 + expect_equal(nrow(df), 10L) + expect_equal(nrow(df2), 10L) + expect_equal(nrow(df3), 10L) +}) +