From 56cce49dd63ff619274a6cdbaa59e2be3c44e2e8 Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:06:58 +0200 Subject: [PATCH] Update tree agglomeration (#656) --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS | 3 +++ R/agglomerate.R | 33 +++++++++++++++----------- R/getPrevalence.R | 39 +++++++++++++++++++++++++++++++ man/getPrevalence.Rd | 9 +++++++ tests/testthat/test-5prevalence.R | 15 +++++++++++- 7 files changed, 87 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 055ef6702..eeff2a4e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.3 +Version: 1.15.4 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", diff --git a/NAMESPACE b/NAMESPACE index 1ad38d083..b8c444f21 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -347,6 +347,7 @@ importFrom(SummarizedExperiment,colData) importFrom(SummarizedExperiment,rowData) importFrom(SummarizedExperiment,rowRanges) importFrom(TreeSummarizedExperiment,changeTree) +importFrom(TreeSummarizedExperiment,subsetByLeaf) importFrom(ape,as.phylo) importFrom(ape,bind.tree) importFrom(ape,collapse.singles) diff --git a/NEWS b/NEWS index 46361a3e8..053a02399 100644 --- a/NEWS +++ b/NEWS @@ -156,3 +156,6 @@ computation + transformAssay can apply transformation to altExp + Added CSS transformation + In agglomerateByVariable, splitOn and getDominant, use 'group' to specify grouping variable. + +Changes in version 1.15.x ++ subsetBy*: added update.tree argument diff --git a/R/agglomerate.R b/R/agglomerate.R index 1527be4ce..bb95f9fe5 100644 --- a/R/agglomerate.R +++ b/R/agglomerate.R @@ -23,9 +23,6 @@ #' regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be #' removed if \code{na.rm = TRUE} before agglomeration. #' -#' @param update.tree \code{Logical scalar}. Should -#' \code{rowTree()} also be agglomerated? (Default: \code{FALSE}) -#' #' @param agglomerateTree Deprecated. Use \code{update.tree} instead. #' #' @param agglomerate.tree Deprecated. Use \code{update.tree} instead. @@ -450,16 +447,18 @@ setMethod( # Agglomerate all rowTrees found in TreeSE object. Get tips that represent # rows and remove all others. +#' @importFrom TreeSummarizedExperiment subsetByLeaf .agglomerate_trees <- function(x, by = 1, ...){ # Get right functions based on direction tree_names_FUN <- switch( by, "1" = rowTreeNames, "2" = colTreeNames, stop(".")) links_FUN <- switch(by, "1" = rowLinks, "2" = colLinks, stop(".")) tree_FUN <- switch(by, "1" = rowTree, "2" = colTree, stop(".")) - # Get right argument names for changeTree call + # Get right argument names for subsetByLeaf call args_names <- switch( - by, "1" = c("x", "rowTree", "rowNodeLab", "whichRowTree"), - "2" = c("x", "colTree", "colNodeLab", "whichColTree"), + by, + "1" = c("x", "rowLeaf", "whichRowTree", "updateTree"), + "2" = c("x", "colLeaf", "whichColTree", "updateTree"), stop(".")) # Get names of trees and links between trees and rows tree_names <- tree_names_FUN(x) @@ -475,11 +474,9 @@ setMethod( # Get names of nodes that are preserved links_temp <- links_temp[["nodeLab"]] # Agglomerate the tree - tree <- .prune_tree(tree, links_temp, ...) - # Change the tree with agglomerated version - args <- list(x, tree, links_temp, name) + args <- list(x, links_temp, name, TRUE) names(args) <- args_names - x <- do.call(changeTree, args) + x <- do.call(subsetByLeaf, args) } } return(x) @@ -507,12 +504,22 @@ setMethod( # even after pruning; these rows have still child-nodes that represent # other rows. # Suppress warning: drop all tips of the tree: returning NULL - suppressWarnings( - tree <- drop.tip( + tree <- tryCatch({ + drop.tip( tree, remove_tips, trim.internal = FALSE, collapse.singles = FALSE) - ) + }, warning = function(w) { + # Do nothing on warning + }, error = function(e) { + # Try to prune by also pruning internal nodes. Sometimes that is the + # case; we need to trim also internal nodes in order to prune + # leaf. + drop.tip( + tree, remove_tips, + trim.internal = TRUE, + collapse.singles = FALSE) + }) # If all tips were dropped, the result is NULL --> stop loop if( is.null(tree) ){ warning("Pruning resulted to empty tree.", call. = FALSE) diff --git a/R/getPrevalence.R b/R/getPrevalence.R index 0496db069..fca790187 100644 --- a/R/getPrevalence.R +++ b/R/getPrevalence.R @@ -28,6 +28,9 @@ #' #' @param na.rm \code{Logical scalar}. Should NA values be omitted when calculating #' prevalence? (Default: \code{TRUE}) +#' +#' @param update.tree \code{Logical scalar}. Should +#' \code{rowTree()} also be agglomerated? (Default: \code{FALSE}) #' #' @param ... additional arguments #' \itemize{ @@ -444,6 +447,24 @@ setMethod("subsetByPrevalent", signature = c(x = "SummarizedExperiment"), } ) +#' @rdname getPrevalence +#' @export +setMethod("subsetByPrevalent", signature = c(x = "TreeSummarizedExperiment"), + function(x, update.tree = FALSE, ...){ + # Check that update.tree is logical value + if( !.is_a_bool(update.tree) ){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) + } + # + x <- callNextMethod(x, ...) + # Agglomerate tree if specified + if( update.tree ){ + x <- .agglomerate_trees(x, ...) + } + return(x) + } +) + ############################# subsetByRare ################################# #' @rdname getPrevalence @@ -462,6 +483,24 @@ setMethod("subsetByRare", signature = c(x = "SummarizedExperiment"), } ) +#' @rdname getPrevalence +#' @export +setMethod("subsetByRare", signature = c(x = "TreeSummarizedExperiment"), + function(x, update.tree = FALSE, ...){ + # Check that update.tree is logical value + if( !.is_a_bool(update.tree) ){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) + } + # + x <- callNextMethod(x, ...) + # Agglomerate tree if specified + if( update.tree ){ + x <- .agglomerate_trees(x, ...) + } + return(x) + } +) + ############################# getPrevalentAbundance ############################ #' @rdname getPrevalence diff --git a/man/getPrevalence.Rd b/man/getPrevalence.Rd index 11c42860f..370cb3a6d 100644 --- a/man/getPrevalence.Rd +++ b/man/getPrevalence.Rd @@ -12,8 +12,10 @@ \alias{getRare,SummarizedExperiment-method} \alias{subsetByPrevalent} \alias{subsetByPrevalent,SummarizedExperiment-method} +\alias{subsetByPrevalent,TreeSummarizedExperiment-method} \alias{subsetByRare} \alias{subsetByRare,SummarizedExperiment-method} +\alias{subsetByRare,TreeSummarizedExperiment-method} \alias{getPrevalentAbundance} \alias{getPrevalentAbundance,ANY-method} \alias{getPrevalentAbundance,SummarizedExperiment-method} @@ -81,10 +83,14 @@ subsetByPrevalent(x, ...) \S4method{subsetByPrevalent}{SummarizedExperiment}(x, rank = NULL, ...) +\S4method{subsetByPrevalent}{TreeSummarizedExperiment}(x, update.tree = FALSE, ...) + subsetByRare(x, ...) \S4method{subsetByRare}{SummarizedExperiment}(x, rank = NULL, ...) +\S4method{subsetByRare}{TreeSummarizedExperiment}(x, update.tree = FALSE, ...) + getPrevalentAbundance( x, assay.type = assay_name, @@ -148,6 +154,9 @@ calculation. (Default: \code{"counts"})} \item{prevalence}{Prevalence threshold (in 0 to 1). The required prevalence is strictly greater by default. To include the limit, set \code{include.lowest} to \code{TRUE}.} + +\item{update.tree}{\code{Logical scalar}. Should +\code{rowTree()} also be agglomerated? (Default: \code{FALSE})} } \value{ \code{subsetPrevalent} and \code{subsetRareFeatures} return subset of diff --git a/tests/testthat/test-5prevalence.R b/tests/testthat/test-5prevalence.R index 8d977fd59..a77ea05d2 100644 --- a/tests/testthat/test-5prevalence.R +++ b/tests/testthat/test-5prevalence.R @@ -310,6 +310,13 @@ test_that("subsetByPrevalent", { alias <- subsetByPrevalent(gp_null, detection=5, prevalence = 0.33, rank = "Phylum") alias <- unname(assay(alias, "counts")) expect_equal(alias, pr2) + + # Check that tree subsetting works + expect_error(subsetByPrevalent(GlobalPatterns, update.tree = 1)) + expect_error(subsetByPrevalent(GlobalPatterns, update.tree = NULL)) + expect_error(subsetByPrevalent(GlobalPatterns, update.tree = c(TRUE, FALSE))) + tse_sub <- subsetByPrevalent(GlobalPatterns, prevalence = 0.4, rank = "Genus", update.tree = TRUE) + expect_equal(length(rowTree(tse_sub)$tip.label), nrow(tse_sub)) }) @@ -371,7 +378,13 @@ test_that("subsetByRare", { alias <- subsetByRare(gp_null, detection=5, prevalence = 0.33, rank = "Phylum") alias <- unname(assay(alias, "counts")) expect_equal(alias, pr2) - + + # Check that tree subsetting works + expect_error(subsetByRare(GlobalPatterns, update.tree = 1)) + expect_error(subsetByRare(GlobalPatterns, update.tree = NULL)) + expect_error(subsetByRare(GlobalPatterns, update.tree = c(TRUE, FALSE))) + tse_sub <- subsetByRare(GlobalPatterns, rank = "Class", update.tree = TRUE) + expect_equal(length(rowTree(tse_sub)$tip.label), nrow(tse_sub)) }) test_that("agglomerateByPrevalence", {