Skip to content

Commit

Permalink
2.8.0.9002: Median distance from median (=MAD)
Browse files Browse the repository at this point in the history
  • Loading branch information
ms609 committed Sep 3, 2024
1 parent 5cb5f3e commit 277636a
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 33 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: TreeDist
Type: Package
Title: Calculate and Map Distances Between Phylogenetic Trees
Version: 2.8.0.9001
Version: 2.8.0.9002
Authors@R: c(person("Martin R.", "Smith",
email = "[email protected]",
role = c("aut", "cre", "cph", "prg"),
Expand Down
13 changes: 9 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# TreeDist 2.8.0.9001 (development)
# TreeDist 2.8.0.9002 (2024-09-03)

- `VisualizeMatching()` allows more control over output format, and returns
the matching ([#124](https://github.com/ms609/TreeDist/issues/124))
- `SpectralEigens()` returns correct eigenvalues (smallest was overlooked)
- `SpectralEigens()` handles values of `nEig` larger than the input
the matching ([#124](https://github.com/ms609/TreeDist/issues/124)).

- `DistanceFromMedian(Average = median)` allows calculation of MAD.

- `SpectralEigens()` returns correct eigenvalues (smallest was overlooked).

- `SpectralEigens()` handles values of `nEig` larger than the input.


# TreeDist 2.8.0 (2024-07-25)

Expand Down
51 changes: 29 additions & 22 deletions R/cluster_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ SumOfVars <- SumOfVariances
#' distance from the centroid to points in each cluster.
#' @examples MeanCentroidDistance(points, cluster)
#' @export
MeanCentroidDistance <- function(x, cluster = 1) {
MeanCentroidDistance <- function(x, cluster = 1, Average = mean) {
if (is.null(dim(x))) {
warning(paste0("`x` lacks dimensions. ",
"Did you subset without specifying `drop = FALSE`?"))
Expand All @@ -90,7 +90,8 @@ MeanCentroidDistance <- function(x, cluster = 1) {

# Return:
vapply(seq_along(unique(cluster)),
function(i) .MeanCentroidDist(x[cluster == i, , drop = FALSE]),
function(i) .MeanCentroidDist(x[cluster == i, , drop = FALSE],
Average = Average),
numeric(1))
}

Expand All @@ -102,37 +103,41 @@ MeanCentDist <- MeanCentroidDistance
#' @export
MeanCentroidDist <- MeanCentroidDistance

.MeanCentroidDist <- function(x) {
.MeanCentroidDist <- function(x, Average = mean) {
recentred <- t(t(x) - apply(x, 2, mean))

# Return:
mean(sqrt(rowSums(recentred ^ 2)))
Average(sqrt(rowSums(recentred ^ 2)))
}

#' @rdname cluster-statistics
#' @param Average Function to use to summarize distances. Defaults to `mean`;
#' specifying `median` returns a value akin to the median absolute divergence
#' (see [`mad`]).
#' @return `DistanceFromMedian()` returns a numeric specifying the mean distance
#' of each point (except the median) from the median point of its cluster.
#' @examples DistanceFromMedian(points, cluster)
#' @export
DistanceFromMedian <- function(x, cluster = 1) UseMethod("DistanceFromMedian")
DistanceFromMedian <- function(x, cluster = 1, Average = mean)
UseMethod("DistanceFromMedian")

#' @rdname cluster-statistics
#' @export
DistFromMed <- DistanceFromMedian

#' @export
DistanceFromMedian.dist <- function(x, cluster = 1) {
DistanceFromMedian.dist <- function(x, cluster = 1, Average = mean) {
d <- as.matrix(x)
vapply(seq_along(unique(cluster)),
function(i) {
.DistanceFromMedian.dist(
d[cluster == i, cluster == i, drop = FALSE])
d[cluster == i, cluster == i, drop = FALSE], Average = Average)
},
numeric(1))
}

#' @export
DistanceFromMedian.numeric <- function(x, cluster = 1) {
DistanceFromMedian.numeric <- function(x, cluster = 1, Average = mean) {
if (is.null(dim(x))) {
warning(paste0("`x` lacks dimensions. ",
"Did you subset without specifying `drop = FALSE`?"))
Expand All @@ -141,22 +146,23 @@ DistanceFromMedian.numeric <- function(x, cluster = 1) {

# Return:
vapply(seq_along(unique(cluster)),
function(i) .DistanceFromMedian(x[cluster == i, , drop = FALSE]),
function(i) .DistanceFromMedian(x[cluster == i, , drop = FALSE],
Average = Average),
numeric(1))
}

.DistanceFromMedian <- function(x) {
.DistanceFromMedian <- function(x, Average = mean) {
if (dim(x)[[1]] > 1) {
.DistanceFromMedian.dist(as.matrix(dist(x)))
.DistanceFromMedian.dist(as.matrix(dist(x)), Average = Average)
} else {
NA_real_
}
}

.DistanceFromMedian.dist <- function(d) {
.DistanceFromMedian.dist <- function(d, Average = mean) {
if (dim(d)[[1]] > 1) {
medPoint <- which.min(unname(colSums(d)))
mean(d[medPoint, -medPoint])
Average(d[medPoint, -medPoint])
} else {
NA_real_
}
Expand All @@ -167,27 +173,28 @@ DistanceFromMedian.numeric <- function(x, cluster = 1) {
#' point within a cluster to its nearest neighbour.
#' @examples MeanNN(points, cluster)
#' @export
MeanNN <- function(x, cluster = 1) UseMethod("MeanNN")
MeanNN <- function(x, cluster = 1, Average = mean) UseMethod("MeanNN")

#' @export
MeanNN.dist <- function(x, cluster = 1) {
MeanNN.dist <- function(x, cluster = 1, Average = mean) {
d <- as.matrix(x)
diag(d) <- NA_real_
vapply(seq_along(unique(cluster)),
function(i) .MeanNN.dist(d[cluster == i, cluster == i, drop = FALSE]),
function(i) .MeanNN.dist(d[cluster == i, cluster == i, drop = FALSE],
Average = Average),
numeric(1))
}

.MeanNN.dist <- function(x) {
.MeanNN.dist <- function(x, Average = mean) {
if (dim(x)[[1]] > 1) {
mean(apply(x, 1, min, na.rm = TRUE))
Average(apply(x, 1, min, na.rm = TRUE))
} else {
NA_real_
}
}

#' @export
MeanNN.numeric <- function(x, cluster = 1) {
MeanNN.numeric <- function(x, cluster = 1, Average = mean) {
if (is.null(dim(x))) {
warning(paste0("`x` lacks dimensions. ",
"Did you subset without specifying `drop = FALSE`?"))
Expand All @@ -196,17 +203,17 @@ MeanNN.numeric <- function(x, cluster = 1) {

# Return:
vapply(seq_along(unique(cluster)),
function(i) .MeanNN(x[cluster == i, , drop = FALSE]),
function(i) .MeanNN(x[cluster == i, , drop = FALSE], Average = Average),
numeric(1))
}

.MeanNN <- function(x) {
.MeanNN <- function(x, Average = mean) {
if (dim(x)[[1]] > 1) {
distances <- as.matrix(dist(x))
diag(distances) <- NA_real_

# Return:
mean(apply(distances, 1, min, na.rm = TRUE))
Average(apply(distances, 1, min, na.rm = TRUE))

} else {
# Return:
Expand Down
16 changes: 10 additions & 6 deletions man/cluster-statistics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions tests/testthat/test-cluster_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ test_that("DistanceFromMedian()", {
sum(0.6, 0.4, 0.2, 0.2, 0.4, 0.6, 0.8) / 7)
)

expect_equal(DistanceFromMedian(points, cluster, median),
c(median(c(2, 2, 4)),
NA_real_,
median(c(0.6, 0.4, 0.2, 0.2, 0.4, 0.6, 0.8)))
)

expect_equal(
sapply(1:3, function(i) DistFromMed(points[cluster == i, , drop = FALSE])),
DistanceFromMedian(points, cluster)
Expand Down

0 comments on commit 277636a

Please sign in to comment.