From be62301694cd661c24684bf05405be87fbacbc5b Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:23:41 +0100 Subject: [PATCH] Report matched scores --- NEWS.md | 4 +- R/tree_distance.R | 17 +++- tests/testthat/Rplots.pdf | Bin tests/testthat/test-VisualizeMatching.R | 13 ++- tests/testthat/test-tree_distance.R | 80 ++++++++++-------- tests/testthat/test-tree_distance_utilities.R | 35 +++++--- 6 files changed, 87 insertions(+), 62 deletions(-) delete mode 100644 tests/testthat/Rplots.pdf diff --git a/NEWS.md b/NEWS.md index 423045cb6..9a357b883 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # TreeDist 2.8.0.9001 (development) -- `VisualizeMatching()` allows more control over output format - ([#124](https://github.com/ms609/TreeDist/issues/124)) +- `VisualizeMatching()` allows more control over output format, and returns + the matching ([#124](https://github.com/ms609/TreeDist/issues/124)) - `SpectralEigens()` returns correct eigenvalues (smallest was overlooked) - `SpectralEigens()` handles values of `nEig` larger than the input diff --git a/R/tree_distance.R b/R/tree_distance.R index 86c88614a..568cbb7ed 100644 --- a/R/tree_distance.R +++ b/R/tree_distance.R @@ -19,12 +19,14 @@ #' specified pair scorer. If `reportMatching = TRUE`, attribute also list: #' #' - `matching`: which split in `splits2` is optimally matched to each split in -#' `split1` (`NA` if not matched); -#' -#' - `pairScores`: Calculated scores for each possible matching of each split. +#' `split1` (`NA` if not matched); #' #' - `matchedSplits`: Textual representation of each match +#' +#' - `matchedScores`: Scores for matched split. #' +#' - `pairScores`: Calculated scores for each possible matching of each split. +#' #' @keywords internal #' @template MRS #' @encoding UTF-8 @@ -54,7 +56,6 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer, nTip = nTip, ...)[["score"]] } } - attr(ret, "pairScores") <- pairScores if (!is.null(attr(splits1, "tip.label"))) { matched1 <- !is.na(matching) @@ -68,6 +69,14 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer, pairScores[matrix(c(matched1, matched2), ncol = 2L)] > 0 } else rep(TRUE, length(matched1))) } + + attr(ret, "matchedScores") <- vapply( + seq_along(matching), + function(i) pairScores[i, matching[[i]]], + vector(mode(pairScores), 1) + ) + + attr(ret, "pairScores") <- pairScores } # Return: ret diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/tests/testthat/test-VisualizeMatching.R b/tests/testthat/test-VisualizeMatching.R index b465463e4..ad9646727 100644 --- a/tests/testthat/test-VisualizeMatching.R +++ b/tests/testthat/test-VisualizeMatching.R @@ -18,19 +18,18 @@ test_that("VisualizeMatching() works", { skip_if(packageVersion("graphics") < "4.3") skip_if(packageVersion("vdiffr") < "1.0") - TestVM <- function() { + vdiffr::expect_doppelganger("Test VM", TestVM <- function() { VisualizeMatching(MutualClusteringInfo, tree1, tree2, setPar = TRUE, precision = 3, matchZeros = FALSE, - Plot = plot.phylo) - } - vdiffr::expect_doppelganger("Test VM", TestVM) + Plot = plot.phylo) -> x + expect_equal(sum(attr(x, "matchedScores")), x[[1]]) + }) - TestVMr <- function() { + vdiffr::expect_doppelganger("Test VMr", function() { VisualizeMatching(MutualClusteringInfo, tree1, tree2r, setPar = TRUE, precision = 3, matchZeros = TRUE, Plot = plot.phylo, cex = 1.5) - } - vdiffr::expect_doppelganger("Test VMr", TestVMr) + }) vdiffr::expect_doppelganger("Visualize MCI matching", function() { par(mfrow = c(2, 2), mar = rep(0.1, 4), cex = 1.5) diff --git a/tests/testthat/test-tree_distance.R b/tests/testthat/test-tree_distance.R index 94a9ea9fa..6f916d2f7 100644 --- a/tests/testthat/test-tree_distance.R +++ b/tests/testthat/test-tree_distance.R @@ -1,24 +1,26 @@ -# Labels in different order to confound as.Splits -treeSym8 <- ape::read.tree(text="((e, (f, (g, h))), (((a, b), c), d));") -treeBal8 <- ape::read.tree(text="(((e, f), (g, h)), ((a, b), (c, d)));") -treeOpp8 <- ape::read.tree(text="(((a, f), (c, h)), ((g, b), (e, d)));") -treesSBO8 <- structure(list(treeSym8, treeBal8, treeOpp8), - class = "multiPhylo") -treesSSBB8 <- structure(list(treeSym8, treeSym8, treeBal8, treeBal8), - class = "multiPhylo") - -treeCat8 <- ape::read.tree(text="((((h, g), f), e), (d, (c, (b, a))));") -treeTac8 <- ape::read.tree(text="((((e, c), g), a), (h, (b, (d, f))));") -treeStar8 <- ape::read.tree(text="(e, c, g, h, b, a, d, f);") - -treeAb.Cdefgh <- ape::read.tree(text="((a, b), (c, d, e, f, g, h));") -treeAbc.Defgh <- ape::read.tree(text="((a, b, c), (d, e, f, g, h));") -treeAcd.Befgh <- ape::read.tree(text="((a, c, d), (b, e, f, g, h));") -treeAbcd.Efgh <- ape::read.tree(text="((a, b, c, d), (e, f, g, h));") -treeTwoSplits <- ape::read.tree(text="(((a, b), c, d), (e, f, g, h));") - -testTrees <- c(treesSBO8, treeCat8, treeTac8, treeStar8, treeAb.Cdefgh, - treeAbc.Defgh, treeAbcd.Efgh, treeAcd.Befgh, treeTwoSplits) +{ + # Labels in different order to confound as.Splits + treeSym8 <- ape::read.tree(text="((e, (f, (g, h))), (((a, b), c), d));") + treeBal8 <- ape::read.tree(text="(((e, f), (g, h)), ((a, b), (c, d)));") + treeOpp8 <- ape::read.tree(text="(((a, f), (c, h)), ((g, b), (e, d)));") + treesSBO8 <- structure(list(treeSym8, treeBal8, treeOpp8), + class = "multiPhylo") + treesSSBB8 <- structure(list(treeSym8, treeSym8, treeBal8, treeBal8), + class = "multiPhylo") + + treeCat8 <- ape::read.tree(text="((((h, g), f), e), (d, (c, (b, a))));") + treeTac8 <- ape::read.tree(text="((((e, c), g), a), (h, (b, (d, f))));") + treeStar8 <- ape::read.tree(text="(e, c, g, h, b, a, d, f);") + + treeAb.Cdefgh <- ape::read.tree(text="((a, b), (c, d, e, f, g, h));") + treeAbc.Defgh <- ape::read.tree(text="((a, b, c), (d, e, f, g, h));") + treeAcd.Befgh <- ape::read.tree(text="((a, c, d), (b, e, f, g, h));") + treeAbcd.Efgh <- ape::read.tree(text="((a, b, c, d), (e, f, g, h));") + treeTwoSplits <- ape::read.tree(text="(((a, b), c, d), (e, f, g, h));") + + testTrees <- c(treesSBO8, treeCat8, treeTac8, treeStar8, treeAb.Cdefgh, + treeAbc.Defgh, treeAbcd.Efgh, treeAcd.Befgh, treeTwoSplits) +} test_that("Split compatibility is correctly established", { expect_true(SplitsCompatible(as.logical(c(0,0,1,1,0)), @@ -644,14 +646,14 @@ test_that("Matchings are correct", { }) test_that("Matching Split Distance is correctly calculated", { - expect_equal(0L, MatchingSplitDistance(treeSym8, treeSym8)) - expect_equal(0L, MatchingSplitDistance(treeStar8, treeSym8)) - expect_equal(0L, MatchingSplitDistance(treeStar8, treeStar8)) + expect_equal(MatchingSplitDistance(treeSym8, treeSym8), 0L) + expect_equal(MatchingSplitDistance(treeStar8, treeSym8), 0L) + expect_equal(MatchingSplitDistance(treeStar8, treeStar8), 0L) match0 <- MatchingSplitDistance(treeStar8, treeStar8, reportMatching = TRUE) - expect_equal(rep(0L, 4), c(match0, vapply(attributes(match0), length, 0)), - ignore_attr = TRUE) - expect_equal(1L, MatchingSplitDistance(treeAb.Cdefgh, treeAbc.Defgh)) - expect_equal(2L, MatchingSplitDistance(treeAb.Cdefgh, treeAbcd.Efgh)) + expect_equal(c(match0, vapply(attributes(match0), length, 0)), + rep(0L, 5), ignore_attr = TRUE) + expect_equal(MatchingSplitDistance(treeAb.Cdefgh, treeAbc.Defgh), 1L) + expect_equal(MatchingSplitDistance(treeAb.Cdefgh, treeAbcd.Efgh), 2L) splitAB <- as.Splits(c(rep(TRUE, 2), rep(FALSE, 7))) splitABC <- as.Splits(c(rep(TRUE, 3), rep(FALSE, 6))) @@ -660,20 +662,26 @@ test_that("Matching Split Distance is correctly calculated", { splitABCDE <- as.Splits(c(rep(TRUE, 5), rep(FALSE, 4))) splitAI <- as.Splits(c(TRUE, rep(FALSE, 7), TRUE)) - expect_equal(2L, MatchingSplitDistanceSplits(splitAB, splitAI)) - expect_equal(2L, MatchingSplitDistanceSplits(splitAB, splitABCD)) - expect_equal(3L, MatchingSplitDistanceSplits(splitAB, splitABCDE)) - expect_equal(4L, MatchingSplitDistanceSplits(splitABC, splitAEF)) + expect_equal(MatchingSplitDistanceSplits(splitAB, splitAI), 2L) + expect_equal(MatchingSplitDistanceSplits(splitAB, splitABCD), 2L) + expect_equal(MatchingSplitDistanceSplits(splitAB, splitABCDE), 3L) + expect_equal(MatchingSplitDistanceSplits(splitABC, splitAEF), 4L) expect_equal(MatchingSplitDistanceSplits(splitABC, splitAEF), MatchingSplitDistanceSplits(splitAEF, splitABC)) # Invariant to tree description order - sq_pectinate <- ape::read.tree(text="((((((1, 2), 3), 4), 5), 6), (7, (8, (9, (10, 11)))));") - shuffle1 <- ape::read.tree(text="(((((1, 5), 2), 6), (3, 4)), ((8, (7, 9)), (10, 11)));") - shuffle2 <- ape::read.tree(text="(((8, (7, 9)), (10, 11)), ((((1, 5), 2), 6), (3, 4)));") + sq_pectinate <- ape::read.tree( + text = "((((((1, 2), 3), 4), 5), 6), (7, (8, (9, (10, 11)))));" + ) + shuffle1 <- ape::read.tree( + text = "(((((1, 5), 2), 6), (3, 4)), ((8, (7, 9)), (10, 11)));" + ) + shuffle2 <- ape::read.tree( + text = "(((8, (7, 9)), (10, 11)), ((((1, 5), 2), 6), (3, 4)));" + ) expect_equal(MatchingSplitDistance(shuffle1, sq_pectinate), MatchingSplitDistance(sq_pectinate, shuffle1)) - expect_equal(0L, MatchingSplitDistance(shuffle1, shuffle2)) + expect_equal(MatchingSplitDistance(shuffle1, shuffle2), 0L) expect_equal(MatchingSplitDistance(shuffle1, sq_pectinate), MatchingSplitDistance(shuffle2, sq_pectinate)) }) diff --git a/tests/testthat/test-tree_distance_utilities.R b/tests/testthat/test-tree_distance_utilities.R index ce7b5bf25..63b0a4738 100644 --- a/tests/testthat/test-tree_distance_utilities.R +++ b/tests/testthat/test-tree_distance_utilities.R @@ -128,14 +128,21 @@ test_that("Matches are reported", { splits1 <- as.Splits(tree1) splits2 <- as.Splits(tree2, tree1) + .ExpectAtOK <- function(at) { + expect_equal(4L, length(at)) + expect_equal(names(at), + c("matching", "matchedSplits", "matchedScores", "pairScores")) + } + Test <- function(Func, relaxed = FALSE, ...) { + at <- attributes(Func(tree1, tree2, reportMatching = TRUE, ...)) - expect_equal(3L, length(at)) + .ExpectAtOK(at) matchedSplits <- match(splits1, splits2) if (relaxed) { expect_equal(matchedSplits[!is.na(matchedSplits)], - as.integer(at$matching[c(1, 3, 5)])) + as.integer(at[["matching"]][c(1, 3, 5)])) } else { cs <- CompatibleSplits(splits1, splits2) cs[, matchedSplits] <- FALSE @@ -144,16 +151,17 @@ test_that("Matches are reported", { expect_equal(matchedSplits, as.integer(at$matching)) } - ghSplit <- at$matchedSplits[ + ghSplit <- at[["matchedSplits"]][ match(as.Splits(c(rep(FALSE, 6), TRUE, TRUE), letters[1:8]), splits1[[which(!is.na(matchedSplits))]])] - expect_equal("g h | a b c d e f => g h | a b c d e f", ghSplit) + expect_equal(ghSplit, "g h | a b c d e f => g h | a b c d e f") at <- attributes(Func(treeSym8, treeTwoSplits, reportMatching = TRUE, ...)) - expect_equal(3L, length(at)) - expect_equal(match(as.Splits(treeSym8), as.Splits(treeTwoSplits, treeSym8)), - as.integer(at$matching)) - expect_equal("a b | e f g h c d => a b | e f g h c d", at$matchedSplits[[2]]) + .ExpectAtOK(at) + expect_equal(as.integer(at[["matching"]]), + match(as.Splits(treeSym8), as.Splits(treeTwoSplits, treeSym8))) + expect_equal(at[["matchedSplits"]][[2]], + "a b | e f g h c d => a b | e f g h c d") } Test(SharedPhylogeneticInfo) @@ -173,14 +181,15 @@ test_that("Matches are reported", { # Matching Split Distance matches differently: at <- attributes(MatchingSplitDistance(treeSym8, treeBal8, reportMatching = TRUE)) - expect_equal(3L, length(at)) - expect_equal(c(1:3, 5:4), as.integer(at$matching)) - expect_equal("a b | e f g h c d => a b | e f g h c d", at$matchedSplits[[5]]) + .ExpectAtOK(at) + expect_equal(as.integer(at$matching), c(1:3, 5:4)) + expect_equal(at[["matchedSplits"]][[5]], + "a b | e f g h c d => a b | e f g h c d") # Zero match: expect_true(attr(SharedPhylogeneticInfo( - ape::read.tree(text="((a, b), (c, d));"), - ape::read.tree(text="((a, c), (b, d));"), + ape::read.tree(text = "((a, b), (c, d));"), + ape::read.tree(text = "((a, c), (b, d));"), reportMatching = TRUE), "matchedSplits") %in% c( "a b | c d .. a c | b d",