diff --git a/NAMESPACE b/NAMESPACE index c1bb49521..154782d03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(getPrevalentFeatures) export(getPrevalentTaxa) export(getRareFeatures) export(getRareTaxa) +export(getTaxonomyRanks) export(getTopFeatures) export(getTopTaxa) export(getUniqueFeatures) @@ -78,6 +79,7 @@ export(runJSD) export(runNMDS) export(runOverlap) export(runUnifrac) +export(setTaxonomyRanks) export(splitByRanks) export(splitOn) export(subsampleCounts) @@ -308,6 +310,7 @@ importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) +importFrom(utils,assignInMyNamespace) importFrom(utils,combn) importFrom(utils,head) importFrom(utils,read.delim) diff --git a/NEWS b/NEWS index b052af1cd..e97cded4f 100644 --- a/NEWS +++ b/NEWS @@ -104,3 +104,4 @@ Changes in version 1.11.x + cluster: Overwrite old results instead of failing + getPrevalence: bugfix, if assay contains NA values, it does not end up to NA anymore. + getExperimentCrossCorrelation fix: enable using of sampleMap in MAE. ++ Implemented the setTaxonomyRanks function to specify which ranks are recognized as taxonomy ranks. diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R index 0db66b343..dccf00a28 100644 --- a/R/loadFromMetaphlan.R +++ b/R/loadFromMetaphlan.R @@ -89,7 +89,7 @@ NULL loadFromMetaphlan <- function( - file, colData = sample_meta, sample_meta = NULL, phy_tree = NULL, ...){ + file, colData = sample_meta, sample_meta = NULL, phy_tree = NULL,...){ ################################ Input check ################################ if(!.is_non_empty_string(file)){ stop("'file' must be a single character value.", @@ -137,6 +137,8 @@ loadFromMetaphlan <- function( altExp(tse, rank) <- se_objects[[rank]] } } + # Set taxonomy ranks using .set_taxonomy_ranks + .set_ranks_based_on_rowdata(tse,...) # Load sample meta data if it is provided if( !is.null(colData) ) { @@ -392,3 +394,23 @@ loadFromMetaphlan <- function( } return(data) } +.set_ranks_based_on_rowdata <- function(tse,...){ + # Get ranks from rowData + ranks <- colnames(rowData(tse)) + # Ranks must be character columns + is_char <- lapply(rowData(tse), function(x) is.character(x) || is.factor(x)) + is_char <- unlist(is_char) + ranks <- ranks[ is_char ] + # rowData is empty, cannot set ranks + if( length(ranks) == 0 ){ + warning( + "Ranks cannot be set. rowData(x) does not include columns ", + "specifying character values.", call. = FALSE) + return(NULL) + } + # Finally, set ranks and give message + tse <- setTaxonomyRanks(ranks) + message("TAXONOMY_RANKS set to: '", paste0(ranks, collapse = "', '"), "'") + return(NULL) +} + diff --git a/R/taxonomy.R b/R/taxonomy.R index cc8d473c1..97fc0e6ca 100644 --- a/R/taxonomy.R +++ b/R/taxonomy.R @@ -71,7 +71,8 @@ #' (default: \code{FALSE}) #' #' @param ... optional arguments not used currently. -#' +#' +#' @param ranks Avector of ranks to be set #' @details #' Taxonomic information from the \code{IdTaxa} function of \code{DECIPHER} #' package are returned as a special class. With \code{as(taxa,"DataFrame")} @@ -116,6 +117,17 @@ #' mapTaxonomy(GlobalPatterns, taxa = "Escherichia") #' # returns information on a single output #' mapTaxonomy(GlobalPatterns, taxa = "Escherichia",to="Family") +#' +#' # setTaxonomyRanks +#' tse <- GlobalPatterns +#' colnames(rowData(tse))[1] <- "TAXA1" +#' +#' setTaxonomyRanks(colnames(rowData(tse))) +#' # Taxonomy ranks set to: taxa1 phylum class order family genus species +#' +#' # getTaxonomyRanks is to get/check if the taxonomic ranks is set to "TAXA1" +#' getTaxonomyRanks() +#' NULL #' @rdname taxonomy-methods @@ -197,6 +209,32 @@ setMethod("checkTaxonomy", signature = c(x = "SummarizedExperiment"), } ) +#' @rdname taxonomy-methods +#' @importFrom utils assignInMyNamespace +#' @aliases checkTaxonomy +#' @export +# Function to set taxonomy ranks +setTaxonomyRanks <- function(ranks) { + ranks <- tolower(ranks) + # Check if rank is a character vector with length >= 1 + if (!is.character(ranks) || length(ranks) < 1 + || any(ranks == "" | ranks == " " | ranks == "\t" | ranks == "-" | ranks == "_") + || any(grepl("\\s{2,}", ranks))) { + stop("Input 'rank' should be a character vector with non-empty strings, + no spaces, tabs, hyphens, underscores, and non-continuous spaces." + , call. = FALSE) + } + #Replace default value of mia::TAXONOMY_RANKS + assignInMyNamespace("TAXONOMY_RANKS", ranks) +} + +#' @rdname taxonomy-methods +#' @export +# Function to get taxonomy ranks +getTaxonomyRanks <- function() { + return(TAXONOMY_RANKS) +} + .check_taxonomic_rank <- function(rank, x){ if(length(rank) != 1L){ stop("'rank' must be a single character value.",call. = FALSE) diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd index d224bfa10..2e9c2a6b3 100644 --- a/man/taxonomy-methods.Rd +++ b/man/taxonomy-methods.Rd @@ -10,6 +10,8 @@ \alias{taxonomyRankEmpty,SummarizedExperiment-method} \alias{checkTaxonomy} \alias{checkTaxonomy,SummarizedExperiment-method} +\alias{setTaxonomyRanks} +\alias{getTaxonomyRanks} \alias{getTaxonomyLabels} \alias{getTaxonomyLabels,SummarizedExperiment-method} \alias{mapTaxonomy} @@ -43,6 +45,10 @@ checkTaxonomy(x, ...) \S4method{checkTaxonomy}{SummarizedExperiment}(x) +setTaxonomyRanks(ranks) + +getTaxonomyRanks() + getTaxonomyLabels(x, ...) \S4method{getTaxonomyLabels}{SummarizedExperiment}( @@ -74,6 +80,8 @@ removed if \code{na.rm = TRUE} before agglomeration} \item{...}{optional arguments not used currently.} +\item{ranks}{Avector of ranks to be set} + \item{with_rank}{\code{TRUE} or \code{FALSE}: Should the level be add as a suffix? For example: "Phylum:Crenarchaeota" (default: \code{with_rank = FALSE})} @@ -170,6 +178,17 @@ mapTaxonomy(GlobalPatterns) mapTaxonomy(GlobalPatterns, taxa = "Escherichia") # returns information on a single output mapTaxonomy(GlobalPatterns, taxa = "Escherichia",to="Family") + +# setTaxonomyRanks +tse <- GlobalPatterns +colnames(rowData(tse))[1] <- "TAXA1" + +setTaxonomyRanks(colnames(rowData(tse))) +# Taxonomy ranks set to: taxa1 phylum class order family genus species + +# getTaxonomyRanks is to get/check if the taxonomic ranks is set to "TAXA1" +getTaxonomyRanks() + } \seealso{ \code{\link[=agglomerate-methods]{agglomerateByRank}},