diff --git a/DESCRIPTION b/DESCRIPTION index a3dc1737..5c1403fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: TreeDist Type: Package Title: Calculate and Map Distances Between Phylogenetic Trees -Version: 2.8.0.9001 +Version: 2.8.0.9002 Authors@R: c(person("Martin R.", "Smith", email = "martin.smith@durham.ac.uk", role = c("aut", "cre", "cph", "prg"), diff --git a/NEWS.md b/NEWS.md index 9a357b88..616d3e70 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,14 @@ -# TreeDist 2.8.0.9001 (development) +# TreeDist 2.8.0.9002 (2024-09-03) - `VisualizeMatching()` allows more control over output format, and returns - the matching ([#124](https://github.com/ms609/TreeDist/issues/124)) -- `SpectralEigens()` returns correct eigenvalues (smallest was overlooked) -- `SpectralEigens()` handles values of `nEig` larger than the input + the matching ([#124](https://github.com/ms609/TreeDist/issues/124)). + +- `DistanceFromMedian(Average = median)` allows calculation of MAD. + +- `SpectralEigens()` returns correct eigenvalues (smallest was overlooked). + +- `SpectralEigens()` handles values of `nEig` larger than the input. + # TreeDist 2.8.0 (2024-07-25) diff --git a/R/cluster_stats.R b/R/cluster_stats.R index 8690e3a5..412df9ed 100644 --- a/R/cluster_stats.R +++ b/R/cluster_stats.R @@ -81,7 +81,7 @@ SumOfVars <- SumOfVariances #' distance from the centroid to points in each cluster. #' @examples MeanCentroidDistance(points, cluster) #' @export -MeanCentroidDistance <- function(x, cluster = 1) { +MeanCentroidDistance <- function(x, cluster = 1, Average = mean) { if (is.null(dim(x))) { warning(paste0("`x` lacks dimensions. ", "Did you subset without specifying `drop = FALSE`?")) @@ -90,7 +90,8 @@ MeanCentroidDistance <- function(x, cluster = 1) { # Return: vapply(seq_along(unique(cluster)), - function(i) .MeanCentroidDist(x[cluster == i, , drop = FALSE]), + function(i) .MeanCentroidDist(x[cluster == i, , drop = FALSE], + Average = Average), numeric(1)) } @@ -102,37 +103,41 @@ MeanCentDist <- MeanCentroidDistance #' @export MeanCentroidDist <- MeanCentroidDistance -.MeanCentroidDist <- function(x) { +.MeanCentroidDist <- function(x, Average = mean) { recentred <- t(t(x) - apply(x, 2, mean)) # Return: - mean(sqrt(rowSums(recentred ^ 2))) + Average(sqrt(rowSums(recentred ^ 2))) } #' @rdname cluster-statistics +#' @param Average Function to use to summarize distances. Defaults to `mean`; +#' specifying `median` returns a value akin to the median absolute divergence +#' (see [`mad`]). #' @return `DistanceFromMedian()` returns a numeric specifying the mean distance #' of each point (except the median) from the median point of its cluster. #' @examples DistanceFromMedian(points, cluster) #' @export -DistanceFromMedian <- function(x, cluster = 1) UseMethod("DistanceFromMedian") +DistanceFromMedian <- function(x, cluster = 1, Average = mean) + UseMethod("DistanceFromMedian") #' @rdname cluster-statistics #' @export DistFromMed <- DistanceFromMedian #' @export -DistanceFromMedian.dist <- function(x, cluster = 1) { +DistanceFromMedian.dist <- function(x, cluster = 1, Average = mean) { d <- as.matrix(x) vapply(seq_along(unique(cluster)), function(i) { .DistanceFromMedian.dist( - d[cluster == i, cluster == i, drop = FALSE]) + d[cluster == i, cluster == i, drop = FALSE], Average = Average) }, numeric(1)) } #' @export -DistanceFromMedian.numeric <- function(x, cluster = 1) { +DistanceFromMedian.numeric <- function(x, cluster = 1, Average = mean) { if (is.null(dim(x))) { warning(paste0("`x` lacks dimensions. ", "Did you subset without specifying `drop = FALSE`?")) @@ -141,22 +146,23 @@ DistanceFromMedian.numeric <- function(x, cluster = 1) { # Return: vapply(seq_along(unique(cluster)), - function(i) .DistanceFromMedian(x[cluster == i, , drop = FALSE]), + function(i) .DistanceFromMedian(x[cluster == i, , drop = FALSE], + Average = Average), numeric(1)) } -.DistanceFromMedian <- function(x) { +.DistanceFromMedian <- function(x, Average = mean) { if (dim(x)[[1]] > 1) { - .DistanceFromMedian.dist(as.matrix(dist(x))) + .DistanceFromMedian.dist(as.matrix(dist(x)), Average = Average) } else { NA_real_ } } -.DistanceFromMedian.dist <- function(d) { +.DistanceFromMedian.dist <- function(d, Average = mean) { if (dim(d)[[1]] > 1) { medPoint <- which.min(unname(colSums(d))) - mean(d[medPoint, -medPoint]) + Average(d[medPoint, -medPoint]) } else { NA_real_ } @@ -167,27 +173,28 @@ DistanceFromMedian.numeric <- function(x, cluster = 1) { #' point within a cluster to its nearest neighbour. #' @examples MeanNN(points, cluster) #' @export -MeanNN <- function(x, cluster = 1) UseMethod("MeanNN") +MeanNN <- function(x, cluster = 1, Average = mean) UseMethod("MeanNN") #' @export -MeanNN.dist <- function(x, cluster = 1) { +MeanNN.dist <- function(x, cluster = 1, Average = mean) { d <- as.matrix(x) diag(d) <- NA_real_ vapply(seq_along(unique(cluster)), - function(i) .MeanNN.dist(d[cluster == i, cluster == i, drop = FALSE]), + function(i) .MeanNN.dist(d[cluster == i, cluster == i, drop = FALSE], + Average = Average), numeric(1)) } -.MeanNN.dist <- function(x) { +.MeanNN.dist <- function(x, Average = mean) { if (dim(x)[[1]] > 1) { - mean(apply(x, 1, min, na.rm = TRUE)) + Average(apply(x, 1, min, na.rm = TRUE)) } else { NA_real_ } } #' @export -MeanNN.numeric <- function(x, cluster = 1) { +MeanNN.numeric <- function(x, cluster = 1, Average = mean) { if (is.null(dim(x))) { warning(paste0("`x` lacks dimensions. ", "Did you subset without specifying `drop = FALSE`?")) @@ -196,17 +203,17 @@ MeanNN.numeric <- function(x, cluster = 1) { # Return: vapply(seq_along(unique(cluster)), - function(i) .MeanNN(x[cluster == i, , drop = FALSE]), + function(i) .MeanNN(x[cluster == i, , drop = FALSE], Average = Average), numeric(1)) } -.MeanNN <- function(x) { +.MeanNN <- function(x, Average = mean) { if (dim(x)[[1]] > 1) { distances <- as.matrix(dist(x)) diag(distances) <- NA_real_ # Return: - mean(apply(distances, 1, min, na.rm = TRUE)) + Average(apply(distances, 1, min, na.rm = TRUE)) } else { # Return: diff --git a/man/cluster-statistics.Rd b/man/cluster-statistics.Rd index 14386843..eb997329 100644 --- a/man/cluster-statistics.Rd +++ b/man/cluster-statistics.Rd @@ -20,17 +20,17 @@ SumOfVariances(x, cluster = 1) SumOfVars(x, cluster = 1) -MeanCentroidDistance(x, cluster = 1) +MeanCentroidDistance(x, cluster = 1, Average = mean) -MeanCentDist(x, cluster = 1) +MeanCentDist(x, cluster = 1, Average = mean) -MeanCentroidDist(x, cluster = 1) +MeanCentroidDist(x, cluster = 1, Average = mean) -DistanceFromMedian(x, cluster = 1) +DistanceFromMedian(x, cluster = 1, Average = mean) -DistFromMed(x, cluster = 1) +DistFromMed(x, cluster = 1, Average = mean) -MeanNN(x, cluster = 1) +MeanNN(x, cluster = 1, Average = mean) MeanMSTEdge(x, cluster = 1) } @@ -41,6 +41,10 @@ distances between each pair of points.} \item{cluster}{Optional integer vector specifying the cluster or group to which each row in \code{x} belongs.} + +\item{Average}{Function to use to summarize distances. Defaults to \code{mean}; +specifying \code{median} returns a value akin to the median absolute divergence +(see \code{\link{mad}}).} } \value{ \code{SumOfRanges()} returns a numeric specifying the sum of ranges diff --git a/tests/testthat/test-cluster_stats.R b/tests/testthat/test-cluster_stats.R index 7c82db25..142065c3 100644 --- a/tests/testthat/test-cluster_stats.R +++ b/tests/testthat/test-cluster_stats.R @@ -47,6 +47,12 @@ test_that("DistanceFromMedian()", { sum(0.6, 0.4, 0.2, 0.2, 0.4, 0.6, 0.8) / 7) ) + expect_equal(DistanceFromMedian(points, cluster, median), + c(median(c(2, 2, 4)), + NA_real_, + median(c(0.6, 0.4, 0.2, 0.2, 0.4, 0.6, 0.8))) + ) + expect_equal( sapply(1:3, function(i) DistFromMed(points[cluster == i, , drop = FALSE])), DistanceFromMedian(points, cluster)