From b3b35216e021f4606a856a333ee3b27450c24914 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 12:15:11 +0100 Subject: [PATCH 01/14] BigTreeDist --- DESCRIPTION | 4 +- NAMESPACE | 54 +++++++-------- R/Information.R | 16 ++--- R/RcppExports.R | 26 ++++---- R/cluster_stats.R | 2 +- R/plot.R | 6 +- R/shiny.R | 4 +- R/tree_distance_info.R | 10 +-- R/tree_distance_kendall-colijn.R | 30 ++++----- R/tree_distance_mast.R | 8 +-- R/tree_distance_msd.R | 4 +- R/tree_distance_nni.R | 10 +-- R/tree_distance_nye.R | 6 +- R/tree_distance_path.R | 12 ++-- R/tree_distance_rf.R | 12 ++-- R/tree_distance_spr.R | 2 +- R/tree_distance_utilities.R | 8 +-- R/tree_information.R | 12 ++-- R/trustworthiness.R | 2 +- R/zzz.R | 2 +- inst/treespace/app.R | 4 +- man/AllSplitPairings.Rd | 4 +- ...Dist-package.Rd => BigTreeDist-package.Rd} | 66 +++++++++---------- man/CompareAll.Rd | 2 +- man/JaccardRobinsonFoulds.Rd | 10 +-- man/KendallColijn.Rd | 22 +++---- man/LAPJV.Rd | 4 +- man/MASTSize.Rd | 4 +- man/MSTSegments.Rd | 2 +- man/MapTrees.Rd | 4 +- man/MappingQuality.Rd | 2 +- man/MatchingSplitDistance.Rd | 6 +- man/MeilaVariationOfInformation.Rd | 4 +- man/NNIDist.Rd | 6 +- man/NyeSimilarity.Rd | 6 +- man/PathDist.Rd | 12 ++-- man/Robinson-Foulds.Rd | 8 +-- man/SPRDist.Rd | 2 +- man/SplitEntropy.Rd | 6 +- man/SplitSharedInformation.Rd | 4 +- man/StartParallel.Rd | 12 ++-- man/TreeDistance.Rd | 26 ++++---- man/TreeInfo.Rd | 10 +-- man/VisualizeMatching.Rd | 2 +- man/kmeanspp.Rd | 6 +- memcheck/examples.R | 2 +- memcheck/tests.R | 2 +- memcheck/vignettes.R | 2 +- src/RcppExports.cpp | 54 +++++++-------- src/day_1985.cpp | 8 +-- src/information.h | 2 +- src/nni_distance.cpp | 6 +- src/tree_distance_functions.cpp | 2 +- src/tree_distances.cpp | 10 +-- src/tree_distances.h | 2 +- 55 files changed, 277 insertions(+), 277 deletions(-) rename man/{TreeDist-package.Rd => BigTreeDist-package.Rd} (63%) diff --git a/DESCRIPTION b/DESCRIPTION index b6925f4ce..1a63de0a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: TreeDist +Package: BigTreeDist Type: Package Title: Calculate and Map Distances Between Phylogenetic Trees Version: 2.6.3.9001 @@ -77,7 +77,7 @@ Suggests: vdiffr (>= 1.0.0), LinkingTo: Rcpp, - TreeTools, + BigTreeTools, RdMacros: Rdpack VignetteBuilder: knitr Config/Needs/check: rcmdcheck diff --git a/NAMESPACE b/NAMESPACE index 0ec990df9..a870892a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -118,33 +118,33 @@ export(TreeDistPlot) export(TreeDistance) export(TreesConsistentWithTwoSplits) export(VisualizeMatching) +importFrom(BigTreeTools,AllAncestors) +importFrom(BigTreeTools,KeepTip) +importFrom(BigTreeTools,LnRooted.int) +importFrom(BigTreeTools,LnTreesMatchingSplit) +importFrom(BigTreeTools,Log2Rooted.int) +importFrom(BigTreeTools,Log2TreesMatchingSplit) +importFrom(BigTreeTools,Log2Unrooted) +importFrom(BigTreeTools,Log2Unrooted.int) +importFrom(BigTreeTools,MSTEdges) +importFrom(BigTreeTools,MSTLength) +importFrom(BigTreeTools,NRooted) +importFrom(BigTreeTools,NSplits) +importFrom(BigTreeTools,NTip) +importFrom(BigTreeTools,PectinateTree) +importFrom(BigTreeTools,Postorder) +importFrom(BigTreeTools,Preorder) +importFrom(BigTreeTools,RenumberTips) +importFrom(BigTreeTools,RootOnNode) +importFrom(BigTreeTools,SplitInformation) +importFrom(BigTreeTools,SplitsInBinaryTree) +importFrom(BigTreeTools,TipLabels) +importFrom(BigTreeTools,TipsInSplits) +importFrom(BigTreeTools,TreeIsRooted) +importFrom(BigTreeTools,TreesMatchingSplit) +importFrom(BigTreeTools,as.ClusterTable) +importFrom(BigTreeTools,as.Splits) importFrom(Rdpack,reprompt) -importFrom(TreeTools,AllAncestors) -importFrom(TreeTools,KeepTip) -importFrom(TreeTools,LnRooted.int) -importFrom(TreeTools,LnTreesMatchingSplit) -importFrom(TreeTools,Log2Rooted.int) -importFrom(TreeTools,Log2TreesMatchingSplit) -importFrom(TreeTools,Log2Unrooted) -importFrom(TreeTools,Log2Unrooted.int) -importFrom(TreeTools,MSTEdges) -importFrom(TreeTools,MSTLength) -importFrom(TreeTools,NRooted) -importFrom(TreeTools,NSplits) -importFrom(TreeTools,NTip) -importFrom(TreeTools,PectinateTree) -importFrom(TreeTools,Postorder) -importFrom(TreeTools,Preorder) -importFrom(TreeTools,RenumberTips) -importFrom(TreeTools,RootOnNode) -importFrom(TreeTools,SplitInformation) -importFrom(TreeTools,SplitsInBinaryTree) -importFrom(TreeTools,TipLabels) -importFrom(TreeTools,TipsInSplits) -importFrom(TreeTools,TreeIsRooted) -importFrom(TreeTools,TreesMatchingSplit) -importFrom(TreeTools,as.ClusterTable) -importFrom(TreeTools,as.Splits) importFrom(ape,Nnode.phylo) importFrom(ape,drop.tip) importFrom(ape,edgelabels) @@ -182,4 +182,4 @@ importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,var) importFrom(utils,combn) -useDynLib(TreeDist, .registration = TRUE) +useDynLib(BigTreeDist, .registration = TRUE) diff --git a/R/Information.R b/R/Information.R index 41492d9be..b907e9da9 100644 --- a/R/Information.R +++ b/R/Information.R @@ -47,7 +47,7 @@ #' @references \insertAllCited{} #' #' @family information functions -#' @importFrom TreeTools Log2TreesMatchingSplit Log2Unrooted +#' @importFrom BigTreeTools Log2TreesMatchingSplit Log2Unrooted #' @export SplitSharedInformation <- function(n, A1, A2 = A1) { Log2Unrooted(n) + @@ -57,7 +57,7 @@ SplitSharedInformation <- function(n, A1, A2 = A1) { } #' @describeIn SplitSharedInformation Different information between two splits. -#' @importFrom TreeTools SplitInformation +#' @importFrom BigTreeTools SplitInformation #' @export SplitDifferentInformation <- function(n, A1, A2 = A1) { Log2TreesMatchingSplit(A1, n - A1) + @@ -69,10 +69,10 @@ SplitDifferentInformation <- function(n, A1, A2 = A1) { #' Use variation of clustering information to compare pairs of splits #' #' Compare a pair of splits viewed as clusterings of taxa, using the variation -#' of clustering information proposed by \insertCite{Meila2007}{TreeDist}. +#' of clustering information proposed by \insertCite{Meila2007}{BigTreeDist}. #' #' This is equivalent to the mutual clustering information -#' \insertCite{Vinh2010}{TreeDist}. +#' \insertCite{Vinh2010}{BigTreeDist}. #' For the total information content, multiply the VoI by the number of leaves. #' #' @template split12Params @@ -259,7 +259,7 @@ SplitEntropy <- function(split1, split2 = split1) { #' @describeIn SplitSharedInformation Number of trees consistent with two #' splits. -#' @importFrom TreeTools TreesMatchingSplit NRooted +#' @importFrom BigTreeTools TreesMatchingSplit NRooted #' @export TreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { @@ -306,7 +306,7 @@ TreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { #' @describeIn SplitSharedInformation Natural logarithm of #' `TreesConsistentWithTwoSplits()`. -#' @importFrom TreeTools LnTreesMatchingSplit LnRooted.int +#' @importFrom BigTreeTools LnTreesMatchingSplit LnRooted.int #' @export LnTreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { smallSplit <- min(A1, A2) @@ -326,7 +326,7 @@ LnTreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { #' @describeIn SplitSharedInformation Base two logarithm of #' `TreesConsistentWithTwoSplits()`. -#' @importFrom TreeTools Log2TreesMatchingSplit Log2Rooted.int +#' @importFrom BigTreeTools Log2TreesMatchingSplit Log2Rooted.int #' @export Log2TreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { smallSplit <- min(A1, A2) @@ -346,7 +346,7 @@ Log2TreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { #' @describeIn SplitSharedInformation Base 2 logarithm of #' `TreesConsistentWithTwoSplits()`. -#' @importFrom TreeTools Log2TreesMatchingSplit Log2Rooted.int +#' @importFrom BigTreeTools Log2TreesMatchingSplit Log2Rooted.int #' @export Log2TreesConsistentWithTwoSplits <- function(n, A1, A2 = A1) { smallSplit <- min(A1, A2) diff --git a/R/RcppExports.R b/R/RcppExports.R index 4453a0ceb..100603e31 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -2,54 +2,54 @@ # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 COMCLUST <- function(trees) { - .Call(`_TreeDist_COMCLUST`, trees) + .Call(`_BigTreeDist_COMCLUST`, trees) } consensus_info <- function(trees, phylo, p) { - .Call(`_TreeDist_consensus_info`, trees, phylo, p) + .Call(`_BigTreeDist_consensus_info`, trees, phylo, p) } robinson_foulds_all_pairs <- function(tables) { - .Call(`_TreeDist_robinson_foulds_all_pairs`, tables) + .Call(`_BigTreeDist_robinson_foulds_all_pairs`, tables) } lapjv <- function(x, maxX) { - .Call(`_TreeDist_lapjv`, x, maxX) + .Call(`_BigTreeDist_lapjv`, x, maxX) } cpp_mast <- function(edge1, edge2, nTip) { - .Call(`_TreeDist_cpp_mast`, edge1, edge2, nTip) + .Call(`_BigTreeDist_cpp_mast`, edge1, edge2, nTip) } cpp_nni_distance <- function(edge1, edge2, nTip) { - .Call(`_TreeDist_cpp_nni_distance`, edge1, edge2, nTip) + .Call(`_BigTreeDist_cpp_nni_distance`, edge1, edge2, nTip) } cpp_robinson_foulds_distance <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip) + .Call(`_BigTreeDist_cpp_robinson_foulds_distance`, x, y, nTip) } cpp_robinson_foulds_info <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_robinson_foulds_info`, x, y, nTip) + .Call(`_BigTreeDist_cpp_robinson_foulds_info`, x, y, nTip) } cpp_matching_split_distance <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_matching_split_distance`, x, y, nTip) + .Call(`_BigTreeDist_cpp_matching_split_distance`, x, y, nTip) } cpp_jaccard_similarity <- function(x, y, nTip, k, allowConflict) { - .Call(`_TreeDist_cpp_jaccard_similarity`, x, y, nTip, k, allowConflict) + .Call(`_BigTreeDist_cpp_jaccard_similarity`, x, y, nTip, k, allowConflict) } cpp_msi_distance <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_msi_distance`, x, y, nTip) + .Call(`_BigTreeDist_cpp_msi_distance`, x, y, nTip) } cpp_mutual_clustering <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_mutual_clustering`, x, y, nTip) + .Call(`_BigTreeDist_cpp_mutual_clustering`, x, y, nTip) } cpp_shared_phylo <- function(x, y, nTip) { - .Call(`_TreeDist_cpp_shared_phylo`, x, y, nTip) + .Call(`_BigTreeDist_cpp_shared_phylo`, x, y, nTip) } diff --git a/R/cluster_stats.R b/R/cluster_stats.R index c3ac14f4a..15834d188 100644 --- a/R/cluster_stats.R +++ b/R/cluster_stats.R @@ -255,7 +255,7 @@ MeanMSTEdge.numeric <- function(x, cluster = 1) { numeric(1)) } -#' @importFrom TreeTools MSTLength +#' @importFrom BigTreeTools MSTLength .MeanMSTEdge <- function(x) { n <- dim(x)[1] # Return: diff --git a/R/plot.R b/R/plot.R index dc8cfe8e7..20195328f 100644 --- a/R/plot.R +++ b/R/plot.R @@ -104,7 +104,7 @@ TreeDistPlot <- function(tr, title = NULL, bold = NULL, leaveRoom = FALSE, #' @importFrom ape nodelabels edgelabels plot.phylo #' @importFrom colorspace qualitative_hcl sequential_hcl #' @importFrom graphics par -#' @importFrom TreeTools as.Splits +#' @importFrom BigTreeTools as.Splits #' #' @examples #' tree1 <- TreeTools::BalancedTree(6) @@ -270,7 +270,7 @@ VisualizeMatching <- function(Func, tree1, tree2, setPar = TRUE, #' #' To identify strain in a multidimensional scaling of distances, it can be #' useful to plot a minimum spanning tree -#' \insertCite{Gower1966,SmithSpace}{TreeDist}. Colouring each edge of the +#' \insertCite{Gower1966,SmithSpace}{BigTreeDist}. Colouring each edge of the #' tree according to its strain can identify areas where the mapping is #' stretched or compressed. #' @@ -323,7 +323,7 @@ MSTSegments <- function(mapping, mstEnds, ...) { #' early in `palette` assigned to edges in which the ratio of mapped #' distance to original distance is small. #' @importFrom grDevices hcl.colors -#' @importFrom TreeTools MSTEdges +#' @importFrom BigTreeTools MSTEdges #' @export StrainCol <- function(distances, mapping, mstEnds = MSTEdges(distances), palette = rev(hcl.colors(256L, "RdYlBu"))) { diff --git a/R/shiny.R b/R/shiny.R index f9bb6f34d..d4825b1bf 100644 --- a/R/shiny.R +++ b/R/shiny.R @@ -95,7 +95,7 @@ #' according to the methods used; it would be appropriate to cite and briefly #' discuss these studies in any publication using figures generated using #' this application. The application itself can be cited using -#' \insertCite{SmithDist,SmithSpace;textual}{TreeDist} +#' \insertCite{SmithDist,SmithSpace;textual}{BigTreeDist} #' #' #' @seealso @@ -112,7 +112,7 @@ #' @importFrom shinyjs useShinyjs #' @export MapTrees <- function() { - appDir <- system.file("treespace", package = "TreeDist") + appDir <- system.file("treespace", package = "BigTreeDist") if (appDir == "") { stop("Could not find example directory. Try re-installing 'TreeDist'.", call. = FALSE) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index 95d7d1b91..22fa1b4ca 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -19,7 +19,7 @@ #' The returned tree similarity measures state the amount of information, #' in bits, that the splits in two trees hold in common #' when they are optimally matched, following -#' \insertCite{SmithDist;textual}{TreeDist}. +#' \insertCite{SmithDist;textual}{BigTreeDist}. #' The complementary tree distance measures state how much information is #' different in the splits of two trees, under an optimal matching. #' Where trees contain different tips, tips present in one tree but not the @@ -33,7 +33,7 @@ #' [a separate vignette](https://ms609.github.io/TreeDist/articles/information.html). #' #' Using the mutual (clustering) information -#' \insertCite{Meila2007,Vinh2010}{TreeDist} of two splits to quantify their +#' \insertCite{Meila2007,Vinh2010}{BigTreeDist} of two splits to quantify their #' similarity gives rise to the Mutual Clustering Information measure #' (`MutualClusteringInfo()`, `MutualClusteringInfoSplits()`); #' the entropy distance gives the Clustering Information Distance @@ -61,7 +61,7 @@ #' subtract the similarity score from a maximum value. In order to generate #' distance _metrics_, these functions subtract the similarity twice from the #' total information content (SPI, MSI) or entropy (MCI) of all the splits in -#' both trees \insertCite{SmithDist}{TreeDist}. +#' both trees \insertCite{SmithDist}{BigTreeDist}. #' #' # Normalization #' @@ -70,7 +70,7 @@ #' The maximum **distance** is the sum of the information content or entropy of #' each split in each tree; the maximum **similarity** is half this value. #' (See Vinh _et al._ (2010, table 3) and -#' \insertCite{SmithDist;textual}{TreeDist} for +#' \insertCite{SmithDist;textual}{BigTreeDist} for #' alternative normalization possibilities.) #' #' Note that a distance value of one (= similarity of zero) will seldom be @@ -254,7 +254,7 @@ ClusteringInfoDist <- ClusteringInfoDistance #' @param samples Integer specifying how many samplings to obtain; #' accuracy of estimate increases with `sqrt(samples)`. #' @importFrom stats sd -#' @importFrom TreeTools as.Splits +#' @importFrom BigTreeTools as.Splits #' @export ExpectedVariation <- function(tree1, tree2, samples = 1e+4) { info1 <- SplitwiseInfo(tree1) diff --git a/R/tree_distance_kendall-colijn.R b/R/tree_distance_kendall-colijn.R index 65f04a1a5..30751d5fd 100644 --- a/R/tree_distance_kendall-colijn.R +++ b/R/tree_distance_kendall-colijn.R @@ -16,10 +16,10 @@ #' #' An analogous distance can be created from any vector representation of a #' tree. -#' The split size vector metric \insertCite{SmithSpace}{TreeDist} is an attempt +#' The split size vector metric \insertCite{SmithSpace}{BigTreeDist} is an attempt #' to mimic the Kendall Colijn metric in situations where the position of #' the root should not be afforded special significance; and the path distance -#' \insertCite{Steel1993}{TreeDist} is a familiar alternative whose underlying +#' \insertCite{Steel1993}{BigTreeDist} is a familiar alternative whose underlying #' vector measures the distance of the last common ancestor of each pair #' of leaves from the leaves themselves, i.e. the length of the path from one #' leaf to another. @@ -27,22 +27,22 @@ #' #' None of these vector-based methods performs as well as other tree distances #' in measuring similarities in the relationships implied by a pair of trees -#' \insertCite{SmithDist}{TreeDist}; in particular, the Kendall Colijn +#' \insertCite{SmithDist}{BigTreeDist}; in particular, the Kendall Colijn #' metric is strongly influenced by tree balance, and may not be appropriate -#' for a suite of common applications \insertCite{SmithSpace}{TreeDist}. +#' for a suite of common applications \insertCite{SmithSpace}{BigTreeDist}. #' #' @template tree12ListParams #' @param Vector Function converting a tree to a numeric vector. #' #' `KCVector`, the default, returns the number of edges between the common #' ancestor of each pair of leaves and the root of the tree -#' \insertCite{@per @Kendall2016}{TreeDist}. +#' \insertCite{@per @Kendall2016}{BigTreeDist}. #' #' `PathVector` returns the number of edges between each pair of leaves -#' \insertCite{@per @Steel1993}{TreeDist}. +#' \insertCite{@per @Steel1993}{BigTreeDist}. #' #' `SplitVector` returns the size of the smallest split that contains each -#' pair of leaves (per \insertCite{SmithSpace;nobrackets}{TreeDist}). +#' pair of leaves (per \insertCite{SmithSpace;nobrackets}{BigTreeDist}). #' #' @templateVar returns `KendallColijn()` returns #' @template distReturn @@ -128,9 +128,9 @@ KendallColijn <- function(tree1, tree2 = NULL, Vector = KCVector) { .EuclideanDistance <- function(x) sqrt(sum(x * x)) #' @describeIn KendallColijn Creates a vector that characterises a rooted tree, -#' as described in \insertCite{Kendall2016;textual}{TreeDist}. +#' as described in \insertCite{Kendall2016;textual}{BigTreeDist}. #' @param tree A tree of class \code{\link[ape:read.tree]{phylo}}. -#' @importFrom TreeTools AllAncestors Preorder +#' @importFrom BigTreeTools AllAncestors Preorder #' @importFrom utils combn #' @export KCVector <- function(tree) { @@ -154,8 +154,8 @@ KCVector <- function(tree) { #' @describeIn KendallColijn Creates a vector reporting the number of edges #' between each pair of leaves, per the path metric of -#' \insertCite{Steel1993;textual}{TreeDist}. -#' @importFrom TreeTools AllAncestors Preorder +#' \insertCite{Steel1993;textual}{BigTreeDist}. +#' @importFrom BigTreeTools AllAncestors Preorder #' @importFrom utils combn #' @export PathVector <- function(tree) { @@ -188,8 +188,8 @@ PathVector <- function(tree) { #' @describeIn KendallColijn Creates a vector reporting the smallest split #' containing each pair of leaves, per the metric proposed in -#' \insertCite{SmithSpace;textual}{TreeDist}. -#' @importFrom TreeTools as.Splits +#' \insertCite{SmithSpace;textual}{BigTreeDist}. +#' @importFrom BigTreeTools as.Splits #' @export SplitVector <- function(tree) { tipLabel <- tree$tip.label @@ -216,12 +216,12 @@ SplitVector <- function(tree) { #' @examples #' KCDiameter(trees) #' KCDiameter(4) -#' @importFrom TreeTools PectinateTree +#' @importFrom BigTreeTools PectinateTree #' @rdname KendallColijn #' @export KCDiameter <- function(tree) UseMethod("KCDiameter") -#' @importFrom TreeTools NTip +#' @importFrom BigTreeTools NTip #' @export KCDiameter.phylo <- function(tree) { KCDiameter.numeric(NTip(tree)) diff --git a/R/tree_distance_mast.R b/R/tree_distance_mast.R index 77ee4653e..79573016e 100644 --- a/R/tree_distance_mast.R +++ b/R/tree_distance_mast.R @@ -1,11 +1,11 @@ #' Maximum Agreement Subtree size #' #' Calculate the size or phylogenetic information content -#' \insertCite{Steel2006}{TreeDist} +#' \insertCite{Steel2006}{BigTreeDist} #' of the maximum agreement subtree between two phylogenetic trees, i.e. #' the largest tree that can be obtained from both `tree1` and `tree2` by #' deleting, but not rearranging, leaves, using the algorithm of -#' \insertCite{Valiente2009;textual}{TreeDist}. +#' \insertCite{Valiente2009;textual}{BigTreeDist}. #' #' Implemented for trees with up to 4096 tips. Contact the maintainer if you #' need to process larger trees. @@ -53,7 +53,7 @@ MASTSize <- function(tree1, tree2 = tree1, rooted = TRUE) { } #' @importFrom ape drop.tip -#' @importFrom TreeTools Postorder RenumberTips TreeIsRooted RootOnNode +#' @importFrom BigTreeTools Postorder RenumberTips TreeIsRooted RootOnNode .MASTSizeSingle <- function(tree1, tree2, rooted = TRUE, tipLabels = tree1$tip.label, ...) { @@ -99,7 +99,7 @@ MASTSize <- function(tree1, tree2 = tree1, rooted = TRUE) { #' @rdname MASTSize #' @return `MASTInfo()` returns a vector or matrix listing the phylogenetic #' information content, in bits, of the maximum agreement subtree. -#' @importFrom TreeTools Log2Rooted.int Log2Unrooted.int +#' @importFrom BigTreeTools Log2Rooted.int Log2Unrooted.int #' @export MASTInfo <- function(tree1, tree2 = tree1, rooted = TRUE) { size <- MASTSize(tree1, tree2, rooted = rooted) diff --git a/R/tree_distance_msd.R b/R/tree_distance_msd.R index 181f3af1f..7a7ed5a79 100644 --- a/R/tree_distance_msd.R +++ b/R/tree_distance_msd.R @@ -18,7 +18,7 @@ #' A normalization value or function must be provided in order to return a #' normalized value. If you are aware of a generalised formula, please #' let me know by -#' \href{https://github.com/ms609/TreeDist/issues/new}{creating a GitHub issue} +#' \href{https://github.com/ms609/BigTreeDist/issues/new}{creating a GitHub issue} #' so that it can be implemented. #' #' @examples @@ -52,7 +52,7 @@ MatchingSplitDistance <- function(tree1, tree2 = NULL, normalize = FALSE, #' @rdname MatchingSplitDistance #' @inheritParams SharedPhylogeneticInfoSplits -#' @useDynLib TreeDist, .registration = TRUE +#' @useDynLib BigTreeDist, .registration = TRUE #' @export MatchingSplitDistanceSplits <- function(splits1, splits2, nTip = attr(splits1, "nTip"), diff --git a/R/tree_distance_nni.R b/R/tree_distance_nni.R index b96857d36..169d9b081 100644 --- a/R/tree_distance_nni.R +++ b/R/tree_distance_nni.R @@ -1,7 +1,7 @@ #' Approximate Nearest Neighbour Interchange distance #' -#' Use the approach of \insertCite{Li1996;textual}{TreeDist} to approximate the -#' Nearest Neighbour Interchange distance \insertCite{Robinson1971}{TreeDist} +#' Use the approach of \insertCite{Li1996;textual}{BigTreeDist} to approximate the +#' Nearest Neighbour Interchange distance \insertCite{Robinson1971}{BigTreeDist} #' between phylogenetic trees. #' #' In brief, this approximation algorithm works by identifying edges in one @@ -19,7 +19,7 @@ #' NNI operations, and provides a loose upper bound on the NNI score. #' The maximum number of moves for an _n_-leaf tree #' ([OEIS A182136](https://oeis.org/A182136)) can be calculated exactly for -#' small trees \insertCite{Fack2002}{TreeDist}; this provides a tighter upper +#' small trees \insertCite{Fack2002}{BigTreeDist}; this provides a tighter upper #' bound, but is unavailable for _n_ > 12. #' `NNIDiameter()` reports the limits on this bound. #' @@ -70,7 +70,7 @@ NNIDist <- function(tree1, tree2 = tree1) { .TreeDistance(.NNIDistSingle, tree1, tree2) } -#' @importFrom TreeTools Postorder RenumberTips +#' @importFrom BigTreeTools Postorder RenumberTips #' @importFrom ape Nnode.phylo .NNIDistSingle <- function(tree1, tree2, nTip, ...) { tree2 <- RenumberTips(tree2, tree1$tip.label) @@ -151,7 +151,7 @@ NNIDiameter.numeric <- function(tree) { ) } -#' @importFrom TreeTools NTip +#' @importFrom BigTreeTools NTip #' @export NNIDiameter.phylo <- function(tree) { NNIDiameter(NTip(tree)) diff --git a/R/tree_distance_nye.R b/R/tree_distance_nye.R index 74510c349..0cb5958bf 100644 --- a/R/tree_distance_nye.R +++ b/R/tree_distance_nye.R @@ -64,7 +64,7 @@ #' @family tree distances #' #' @encoding UTF-8 -#' @importFrom TreeTools NSplits SplitsInBinaryTree +#' @importFrom BigTreeTools NSplits SplitsInBinaryTree #' @export NyeSimilarity <- function(tree1, tree2 = NULL, similarity = TRUE, normalize = FALSE, @@ -128,7 +128,7 @@ NyeSplitSimilarity <- function(splits1, splits2, #' By default, conflicting splits may be paired. #' #' Note that the settings `k = 1, allowConflict = TRUE, similarity = TRUE` -#' give the similarity metric of \insertCite{Nye2006;textual}{TreeDist}; +#' give the similarity metric of \insertCite{Nye2006;textual}{BigTreeDist}; #' a slightly faster implementation of this metric is available as #' [`NyeSimilarity()`]. #' @@ -179,7 +179,7 @@ NyeSplitSimilarity <- function(splits1, splits2, #' @family tree distances #' #' @encoding UTF-8 -#' @importFrom TreeTools NSplits +#' @importFrom BigTreeTools NSplits #' @export JaccardRobinsonFoulds <- function(tree1, tree2 = NULL, k = 1L, allowConflict = TRUE, similarity = FALSE, diff --git a/R/tree_distance_path.R b/R/tree_distance_path.R index 7fcbee512..72839b40a 100644 --- a/R/tree_distance_path.R +++ b/R/tree_distance_path.R @@ -9,15 +9,15 @@ #' #' The path distance is calculated by tabulating the cladistic difference (= #' topological distance) between each pair of tips in each tree. -#' A precursor to the path distance \insertCite{Farris1969}{TreeDist} +#' A precursor to the path distance \insertCite{Farris1969}{BigTreeDist} #' took the mean squared #' difference between the elements of each tree's tabulation (Farris, 1973); #' the method used here is that proposed by -#' \insertCite{Steel1993;textual}{TreeDist}, which takes the square root of this +#' \insertCite{Steel1993;textual}{BigTreeDist}, which takes the square root of this #' sum. #' Other precursor measures are described in -#' \insertCite{Williams1971;textual}{TreeDist} and -#' \insertCite{Phipps1971;textual}{TreeDist}. +#' \insertCite{Williams1971;textual}{BigTreeDist} and +#' \insertCite{Phipps1971;textual}{BigTreeDist}. #' #' If a root node is present, trees are treated as rooted. #' To avoid counting the root edge twice, use `UnrootTree(tree)` before @@ -25,7 +25,7 @@ #' #' Use of the path distance is discouraged as it emphasizes #' shallow relationships at the expense of deeper (and arguably more -#' fundamental) relationships \insertCite{Farris1973}{TreeDist}. +#' fundamental) relationships \insertCite{Farris1973}{BigTreeDist}. #' #' @template tree12ListParams #' @@ -54,7 +54,7 @@ #' @template MRS #' @family tree distances #' @importFrom phangorn path.dist -#' @importFrom TreeTools Postorder +#' @importFrom BigTreeTools Postorder #' @export PathDist <- function(tree1, tree2 = NULL) { if (inherits(tree1, "phylo")) { diff --git a/R/tree_distance_rf.R b/R/tree_distance_rf.R index 6205e648d..471d57141 100644 --- a/R/tree_distance_rf.R +++ b/R/tree_distance_rf.R @@ -2,15 +2,15 @@ #' content #' #' Calculate the Robinson–Foulds distance -#' \insertCite{Robinson1981}{TreeDist}, or +#' \insertCite{Robinson1981}{BigTreeDist}, or #' the equivalent similarity measure, with options to #' (i) annotate matched splits; #' (ii) weight splits according to their phylogenetic information content -#' \insertCite{SmithDist}{TreeDist}. +#' \insertCite{SmithDist}{BigTreeDist}. #' Whilst slower to calculate, information theoretic modifications of the #' Robinson–Foulds distance (see [`TreeDistance()`]) #' are better suited to most use cases -#' \insertCite{SmithDist,SmithSpace}{TreeDist}. +#' \insertCite{SmithDist,SmithSpace}{BigTreeDist}. #' #' Note that if `reportMatching = TRUE`, the `pairScores` attribute returns #' a logical matrix specifying whether each pair of splits is identical. @@ -22,7 +22,7 @@ #' tree distance, because their similarity is less remarkable. #' #' Rapid comparison between multiple pairs of trees employs the -#' \insertCite{Day1985;textual}{TreeDist} linear-time algorithm. +#' \insertCite{Day1985;textual}{BigTreeDist} linear-time algorithm. #' #' @inheritParams TreeDistance #' @param similarity Logical specifying whether to report the result as a tree @@ -96,7 +96,7 @@ InfoRobinsonFouldsSplits <- function(splits1, splits2, } #' @rdname Robinson-Foulds -#' @importFrom TreeTools NSplits as.ClusterTable +#' @importFrom BigTreeTools NSplits as.ClusterTable #' @export RobinsonFoulds <- function(tree1, tree2 = NULL, similarity = FALSE, normalize = FALSE, reportMatching = FALSE) { @@ -129,7 +129,7 @@ RobinsonFoulds <- function(tree1, tree2 = NULL, similarity = FALSE, #' @describeIn Robinson-Foulds Matched splits, intended for use with #' [`VisualizeMatching()`]. #' @param \dots Not used. -#' @importFrom TreeTools NSplits +#' @importFrom BigTreeTools NSplits #' @export RobinsonFouldsMatching <- function(tree1, tree2, similarity = FALSE, normalize = FALSE, ...) { diff --git a/R/tree_distance_spr.R b/R/tree_distance_spr.R index 7ca0ab328..7eb1ddc38 100644 --- a/R/tree_distance_spr.R +++ b/R/tree_distance_spr.R @@ -39,7 +39,7 @@ #' #' @family tree distances #' @importFrom phangorn SPR.dist -#' @importFrom TreeTools Postorder +#' @importFrom BigTreeTools Postorder #' @export SPRDist <- function(tree1, tree2 = NULL, symmetric = TRUE) { if (inherits(tree1, "phylo")) { diff --git a/R/tree_distance_utilities.R b/R/tree_distance_utilities.R index 8b16d67ff..941c6d7b5 100644 --- a/R/tree_distance_utilities.R +++ b/R/tree_distance_utilities.R @@ -8,7 +8,7 @@ #' #' @template MRS #' @keywords internal -#' @importFrom TreeTools as.Splits TipLabels +#' @importFrom BigTreeTools as.Splits TipLabels #' @importFrom utils combn #' @export CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, @@ -207,7 +207,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, #' Calculate distance between trees, or lists of trees #' @template MRS -#' @importFrom TreeTools TipLabels +#' @importFrom BigTreeTools TipLabels #' @param checks Logical specifying whether to perform basic sanity checks to #' avoid crashes in C++. #' @keywords internal @@ -411,7 +411,7 @@ CompareAll <- function(x, Func, FUN.VALUE = Func(x[[1]], x[[1]], ...), #' @param \dots Additional parameters to `InfoInTree()` or `how`. #' @keywords internal #' @template MRS -#' @importFrom TreeTools KeepTip TipLabels +#' @importFrom BigTreeTools KeepTip TipLabels #' @export NormalizeInfo <- function(unnormalized, tree1, tree2, InfoInTree, infoInBoth = NULL, how = TRUE, Combine = "+", ...) { @@ -465,7 +465,7 @@ NormalizeInfo <- function(unnormalized, tree1, tree2, InfoInTree, } # We only call this function when not all trees contain identical leaf sets -#' @importFrom TreeTools KeepTip TipLabels +#' @importFrom BigTreeTools KeepTip TipLabels .SharedOnly <- function(tree1, tree2, lab1 = TipLabels(tree1), lab2 = TipLabels(tree2)) { diff --git a/R/tree_information.R b/R/tree_information.R index 0c747268f..27772abc5 100644 --- a/R/tree_information.R +++ b/R/tree_information.R @@ -16,7 +16,7 @@ #' phylogenetic information content, used in [`SplitwiseInfo()`]. #' In essence, it asks, given a split that subdivides the leaves of a tree into #' two partitions, how easy it is to predict which partition a randomly drawn -#' leaf belongs to \insertCite{@Meila2007; @Vinh2010}{TreeDist}. +#' leaf belongs to \insertCite{@Meila2007; @Vinh2010}{BigTreeDist}. #' #' #' Formally, the entropy of a split _S_ that divides _n_ leaves into two @@ -73,21 +73,21 @@ #' #' #' As entropy measures the bits required to transmit the cluster label of each -#' leaf \insertCite{@@Vinh2010: p. 2840}{TreeDist}, the information content of +#' leaf \insertCite{@@Vinh2010: p. 2840}{BigTreeDist}, the information content of #' a split is its entropy multiplied by the number of leaves. #' #' @section Phylogenetic information: #' #' Phylogenetic information expresses the information content of a split #' in terms of the probability that a uniformly selected tree will contain it -#' \insertCite{Thorley1998}{TreeDist}. +#' \insertCite{Thorley1998}{BigTreeDist}. #' #' @section Consensus information: #' #' The information content of splits in a consensus tree is calculated by #' interpreting support values (i.e. the proportion of trees containing #' each split in the consensus) as probabilities that the true tree contains -#' that split, following \insertCite{SmithCons;textual}{TreeDist}. +#' that split, following \insertCite{SmithCons;textual}{BigTreeDist}. #' #' @return `SplitwiseInfo()`, `ClusteringInfo()` and `ClusteringEntropy()` #' return the splitwise information content of the tree -- or of each split @@ -165,7 +165,7 @@ SplitwiseInfo.multiPhylo <- function(x, p = NULL, sum = TRUE) { #' @export SplitwiseInfo.list <- SplitwiseInfo.multiPhylo -#' @importFrom TreeTools Log2Rooted.int Log2Unrooted.int TipsInSplits +#' @importFrom BigTreeTools Log2Rooted.int Log2Unrooted.int TipsInSplits #' @export SplitwiseInfo.Splits <- function(x, p = NULL, sum = TRUE) { nTip <- attr(x, "nTip") @@ -232,7 +232,7 @@ SplitwiseInfo.NULL <- function(x, p = NULL, sum = TRUE) NULL #' tree <- ape::read.tree(text = "(a, b, (c, (d, e, (f, g)0.8))0.9);") #' ClusteringInfo(tree) #' ClusteringInfo(tree, TRUE) -#' @importFrom TreeTools as.Splits +#' @importFrom BigTreeTools as.Splits #' @rdname TreeInfo #' @export ClusteringEntropy <- function(x, p = NULL, sum = TRUE) { diff --git a/R/trustworthiness.R b/R/trustworthiness.R index 72ef3862c..bf148e197 100644 --- a/R/trustworthiness.R +++ b/R/trustworthiness.R @@ -21,7 +21,7 @@ #' Faithfulness of mapped distances #' #' `MappingQuality()` calculates the trustworthiness and continuity -#' of mapped distances \insertCite{Venna2001,Kaski2003}{TreeDist}. +#' of mapped distances \insertCite{Venna2001,Kaski2003}{BigTreeDist}. #' Trustworthiness measures, on a scale from 0--1, #' the degree to which points that are nearby in a mapping are truly close #' neighbours; continuity, the extent to which points that are truly nearby diff --git a/R/zzz.R b/R/zzz.R index 826528eaf..10a928603 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,6 @@ .onUnload <- function(libpath) { StopParallel(quietly = TRUE) - library.dynam.unload("TreeDist", libpath) + library.dynam.unload("BigTreeDist", libpath) } ## Reminders when releasing for CRAN diff --git a/inst/treespace/app.R b/inst/treespace/app.R index 07709d8db..5799b7867 100644 --- a/inst/treespace/app.R +++ b/inst/treespace/app.R @@ -16,8 +16,8 @@ suppressPackageStartupMessages({ library("shiny", exclude = "runExample") library("shinyjs", exclude = "runExample") }) -library("TreeTools", quietly = TRUE) -library("TreeDist") +library("BigTreeTools", quietly = TRUE) +library("BigTreeDist") if (!requireNamespace("cluster", quietly = TRUE)) install.packages("cluster") if (!requireNamespace("protoclust", quietly = TRUE)) { diff --git a/man/AllSplitPairings.Rd b/man/AllSplitPairings.Rd index 507dc77f4..eeaff261d 100644 --- a/man/AllSplitPairings.Rd +++ b/man/AllSplitPairings.Rd @@ -21,8 +21,8 @@ values should be divided by four. } \description{ Calculate the variation of clustering information -\insertCite{Meila2007}{TreeDist} for each possible pairing of -non-trivial splits on \emph{n} leaves \insertCite{SmithDist}{TreeDist}, +\insertCite{Meila2007}{BigTreeDist} for each possible pairing of +non-trivial splits on \emph{n} leaves \insertCite{SmithDist}{BigTreeDist}, tabulating the number of pairings with each similarity. } \examples{ diff --git a/man/TreeDist-package.Rd b/man/BigTreeDist-package.Rd similarity index 63% rename from man/TreeDist-package.Rd rename to man/BigTreeDist-package.Rd index 59a89b1fc..ae9945529 100644 --- a/man/TreeDist-package.Rd +++ b/man/BigTreeDist-package.Rd @@ -1,28 +1,28 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/TreeDist-package.R +% Please edit documentation in R/BigTreeDist-package.R \docType{package} \encoding{UTF-8} -\name{TreeDist-package} -\alias{TreeDist} -\alias{TreeDist-package} -\title{TreeDist: Distances between Phylogenetic Trees} +\name{BigTreeDist-package} +\alias{BigTreeDist} +\alias{BigTreeDist-package} +\title{BigTreeDist: Distances between Phylogenetic Trees} \description{ -'TreeDist' is an R package that implements a suite of metrics that quantify the +'BigTreeDist' is an R package that implements a suite of metrics that quantify the topological distance between pairs of unweighted phylogenetic trees. It also includes a simple "Shiny" application to allow the visualization of distance-based tree spaces, and functions to calculate the information content of trees and splits. } \details{ -"TreeDist" primarily employs metrics in the category of +"BigTreeDist" primarily employs metrics in the category of "generalized Robinson–Foulds distances": they are based on comparing splits (bipartitions) between trees, and thus reflect the relationship data within trees, with no reference to branch lengths. Detailed documentation and usage instructions are -\href{https://ms609.github.io/TreeDist/}{available online} or in the vignettes. +\href{https://ms609.github.io/BigTreeDist/}{available online} or in the vignettes. \subsection{Generalized RF distances}{ -The \href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html}{Robinson–Foulds distance} +The \href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html}{Robinson–Foulds distance} simply tallies the number of non-trivial splits (sometimes inaccurately termed clades, nodes or edges) that occur in both trees -- any splits that are not perfectly identical contributes one point to the distance score of zero, @@ -30,7 +30,7 @@ however similar or different they are. By overlooking potential similarities between almost-identical splits, this conservative approach has undesirable properties. -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{"Generalized" RF metrics} +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{"Generalized" RF metrics} generate \emph{matchings} that pair each split in one tree with a similar split in the other. Each pair of splits is assigned a similarity score; the sum of these scores in @@ -39,7 +39,7 @@ the optimal matching then quantifies the similarity between two trees. Different ways of calculating the the similarity between a pair of splits lead to different tree distance metrics, implemented in the functions below: \itemize{ -\item \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{MutualClusteringInfo()}}, \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{SharedPhylogeneticInfo()}} +\item \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{MutualClusteringInfo()}}, \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{SharedPhylogeneticInfo()}} \itemize{ \item Smith (2020) scores matchings based on the amount of information that one partition contains about the other. The Mutual Phylogenetic @@ -47,35 +47,35 @@ Information assigns zero similarity to split pairs that cannot both exist on a single tree; The Mutual Clustering Information metric is more forgiving, and exhibits more desirable behaviour; it is the recommended metric for tree comparison. -(Its complement, \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{ClusteringInfoDistance()}}, returns a tree +(Its complement, \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{ClusteringInfoDistance()}}, returns a tree distance.) } -\item \href{https://ms609.github.io/TreeDist/reference/NyeSimilarity.html}{\code{NyeSimilarity()}} +\item \href{https://ms609.github.io/BigTreeDist/reference/NyeSimilarity.html}{\code{NyeSimilarity()}} \itemize{ \item Nye \emph{et al.} (2006) score matchings according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index. This approach is extended by Böcker \emph{et al}. (2013) with the Jaccard–Robinson–Foulds metric (function -\href{https://ms609.github.io/TreeDist/reference/JaccardRobinsonFoulds.html}{\code{JaccardRobinsonFoulds()}}). +\href{https://ms609.github.io/BigTreeDist/reference/JaccardRobinsonFoulds.html}{\code{JaccardRobinsonFoulds()}}). } -\item \href{https://ms609.github.io/TreeDist/reference/MatchingSplitDistance.html}{\code{MatchingSplitDistance()}} +\item \href{https://ms609.github.io/BigTreeDist/reference/MatchingSplitDistance.html}{\code{MatchingSplitDistance()}} \itemize{ \item Bogdanowicz and Giaro (2012) and Lin \emph{et al.} (2012) independently proposed counting the number of "mismatched" leaves in a pair of splits. -\href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{MatchingSplitInfoDistance()}} +\href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{MatchingSplitInfoDistance()}} provides an information-based equivalent (Smith 2020). } } The package also implements the variation of the path distance proposed by Kendal and Colijn (2016) (function -\href{https://ms609.github.io/TreeDist/reference/KendallColijn.html}{\code{KendallColijn()}}), +\href{https://ms609.github.io/BigTreeDist/reference/KendallColijn.html}{\code{KendallColijn()}}), approximations of the Nearest-Neighbour Interchange (NNI) distance (function -\href{https://ms609.github.io/TreeDist/reference/NNIDist.html}{\code{NNIDist()}}; +\href{https://ms609.github.io/BigTreeDist/reference/NNIDist.html}{\code{NNIDist()}}; following Li \emph{et al.} (1996)), and calculates the size (function -\href{https://ms609.github.io/TreeDist/reference/MASTSize.html}{\code{MASTSize()}}) and +\href{https://ms609.github.io/BigTreeDist/reference/MASTSize.html}{\code{MASTSize()}}) and information content (function -\href{https://ms609.github.io/TreeDist/reference/MASTSize.html}{\code{MASTInfo()}}) of the +\href{https://ms609.github.io/BigTreeDist/reference/MASTSize.html}{\code{MASTInfo()}}) of the Maximum Agreement Subtree. For an implementation of the Tree Bisection and Reconnection (TBR) distance, see @@ -87,30 +87,30 @@ Map tree spaces and readily visualize mapped landscapes, avoiding common analytical pitfalls (Smith, forthcoming), using the inbuilt graphical user interface: -\if{html}{\out{
}}\preformatted{TreeDist::MapTrees() +\if{html}{\out{
}}\preformatted{BigTreeDist::MapTrees() }\if{html}{\out{
}} Serious analysts should consult the -\href{https://ms609.github.io/TreeDist/articles/treespace.html}{vignette} +\href{https://ms609.github.io/BigTreeDist/articles/treespace.html}{vignette} for a command-line interface. } \references{ \itemize{ -\item \insertRef{Bocker2013}{TreeDist} -\item \insertRef{Bogdanowicz2012}{TreeDist} -\item \insertRef{Kendall2016}{TreeDist} -\item \insertRef{Li1996}{TreeDist} -\item \insertRef{Lin2012}{TreeDist} -\item \insertRef{Nye2006}{TreeDist} -\item \insertRef{SmithDist}{TreeDist} -\item \insertRef{SmithSpace}{TreeDist} +\item \insertRef{Bocker2013}{BigTreeDist} +\item \insertRef{Bogdanowicz2012}{BigTreeDist} +\item \insertRef{Kendall2016}{BigTreeDist} +\item \insertRef{Li1996}{BigTreeDist} +\item \insertRef{Lin2012}{BigTreeDist} +\item \insertRef{Nye2006}{BigTreeDist} +\item \insertRef{SmithDist}{BigTreeDist} +\item \insertRef{SmithSpace}{BigTreeDist} } } \seealso{ Further documentation is available in the -\href{https://ms609.github.io/TreeDist/articles/}{package vignettes}, visible from -R using \code{vignette(package = "TreeDist")}. +\href{https://ms609.github.io/BigTreeDist/articles/}{package vignettes}, visible from +R using \code{vignette(package = "BigTreeDist")}. Other R packages implementing tree distance functions include: \itemize{ @@ -121,7 +121,7 @@ Other R packages implementing tree distance functions include: } \item \href{https://cran.r-project.org/package=phangorn}{phangorn} \itemize{ -\item \code{treedist()}: Path, Robinson–Foulds and approximate SPR distances. +\item \code{BigTreeDist()}: Path, Robinson–Foulds and approximate SPR distances. } \item \href{https://ms609.github.io/Quartet/}{Quartet}: Triplet and Quartet distances, using the tqDist algorithm. diff --git a/man/CompareAll.Rd b/man/CompareAll.Rd index 92b760bb0..4412c1020 100644 --- a/man/CompareAll.Rd +++ b/man/CompareAll.Rd @@ -10,7 +10,7 @@ CompareAll(x, Func, FUN.VALUE = Func(x[[1]], x[[1]], ...), ...) \item{x}{List of trees, in the format expected by \code{Func()}.} \item{Func}{distance function returning distance between two trees, -e.g. \code{\link[phangorn:treedist]{path.dist()}}.} +e.g. \code{\link[phangorn:BigTreeDist]{path.dist()}}.} \item{FUN.VALUE}{Format of output of \code{Func()}, to be passed to \code{\link[=vapply]{vapply()}}. If unspecified, calculated by running \code{Func(x[[1]], x[[1]])}.} diff --git a/man/JaccardRobinsonFoulds.Rd b/man/JaccardRobinsonFoulds.Rd index 32a143ed3..bcde46e8e 100644 --- a/man/JaccardRobinsonFoulds.Rd +++ b/man/JaccardRobinsonFoulds.Rd @@ -69,9 +69,9 @@ or \code{splits1} and \code{splits2}. } \description{ Calculate the -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{Jaccard–Robinson–Foulds metric} -\insertCite{Bocker2013}{TreeDist}, a -\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds metric}. +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{Jaccard–Robinson–Foulds metric} +\insertCite{Bocker2013}{BigTreeDist}, a +\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds metric}. } \details{ In short, the Jaccard–Robinson–Foulds @@ -82,12 +82,12 @@ Matchings are scored according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index, and raised to an arbitrary exponent. A more detailed explanation is provided in the -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{vignettes}. +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{vignettes}. By default, conflicting splits may be paired. Note that the settings \verb{k = 1, allowConflict = TRUE, similarity = TRUE} -give the similarity metric of \insertCite{Nye2006;textual}{TreeDist}; +give the similarity metric of \insertCite{Nye2006;textual}{BigTreeDist}; a slightly faster implementation of this metric is available as \code{\link[=NyeSimilarity]{NyeSimilarity()}}. diff --git a/man/KendallColijn.Rd b/man/KendallColijn.Rd index cbea16de3..1bc147fea 100644 --- a/man/KendallColijn.Rd +++ b/man/KendallColijn.Rd @@ -29,13 +29,13 @@ or lists of such trees to undergo pairwise comparison. Where implemented, \code{KCVector}, the default, returns the number of edges between the common ancestor of each pair of leaves and the root of the tree -\insertCite{@per @Kendall2016}{TreeDist}. +\insertCite{@per @Kendall2016}{BigTreeDist}. \code{PathVector} returns the number of edges between each pair of leaves -\insertCite{@per @Steel1993}{TreeDist}. +\insertCite{@per @Steel1993}{BigTreeDist}. \code{SplitVector} returns the size of the smallest split that contains each -pair of leaves (per \insertCite{SmithSpace;nobrackets}{TreeDist}).} +pair of leaves (per \insertCite{SmithSpace;nobrackets}{BigTreeDist}).} \item{tree}{A tree of class \code{\link[ape:read.tree]{phylo}}.} } @@ -68,32 +68,32 @@ of the squares of the differences between the vectors. An analogous distance can be created from any vector representation of a tree. -The split size vector metric \insertCite{SmithSpace}{TreeDist} is an attempt +The split size vector metric \insertCite{SmithSpace}{BigTreeDist} is an attempt to mimic the Kendall Colijn metric in situations where the position of the root should not be afforded special significance; and the path distance -\insertCite{Steel1993}{TreeDist} is a familiar alternative whose underlying +\insertCite{Steel1993}{BigTreeDist} is a familiar alternative whose underlying vector measures the distance of the last common ancestor of each pair of leaves from the leaves themselves, i.e. the length of the path from one leaf to another. None of these vector-based methods performs as well as other tree distances in measuring similarities in the relationships implied by a pair of trees -\insertCite{SmithDist}{TreeDist}; in particular, the Kendall Colijn +\insertCite{SmithDist}{BigTreeDist}; in particular, the Kendall Colijn metric is strongly influenced by tree balance, and may not be appropriate -for a suite of common applications \insertCite{SmithSpace}{TreeDist}. +for a suite of common applications \insertCite{SmithSpace}{BigTreeDist}. } \section{Functions}{ \itemize{ \item \code{KCVector()}: Creates a vector that characterises a rooted tree, -as described in \insertCite{Kendall2016;textual}{TreeDist}. +as described in \insertCite{Kendall2016;textual}{BigTreeDist}. \item \code{PathVector()}: Creates a vector reporting the number of edges between each pair of leaves, per the path metric of -\insertCite{Steel1993;textual}{TreeDist}. +\insertCite{Steel1993;textual}{BigTreeDist}. \item \code{SplitVector()}: Creates a vector reporting the smallest split containing each pair of leaves, per the metric proposed in -\insertCite{SmithSpace;textual}{TreeDist}. +\insertCite{SmithSpace;textual}{BigTreeDist}. }} \examples{ @@ -120,7 +120,7 @@ KCDiameter(4) \insertAllCited{} } \seealso{ -\href{https://CRAN.R-project.org/package=treespace/vignettes/introduction.html}{\code{treespace::treeDist}} +\href{https://CRAN.R-project.org/package=treespace/vignettes/introduction.html}{\code{treespace::BigTreeDist}} is a more sophisticated, if more cumbersome, implementation that supports lambda > 0, i.e. use of edge lengths in tree comparison. diff --git a/man/LAPJV.Rd b/man/LAPJV.Rd index 561d00326..d1277d0e9 100644 --- a/man/LAPJV.Rd +++ b/man/LAPJV.Rd @@ -15,7 +15,7 @@ optimal matching; and \code{matching}, the columns matched to each row of the matrix in turn. } \description{ -Use the algorithm of \insertCite{Jonker1987;textual}{TreeDist} to solve the +Use the algorithm of \insertCite{Jonker1987;textual}{BigTreeDist} to solve the \href{http://www.assignmentproblems.com/doc/LSAPIntroduction.pdf}{Linear Sum Assignment Problem}. } \details{ @@ -23,7 +23,7 @@ The Linear Assignment Problem seeks to match each row of a matrix with a column, such that the cost of the matching is minimized. The Jonker & Volgenant approach is a faster alternative to the Hungarian -algorithm \insertCite{Munkres1957}{TreeDist}, which is implemented in +algorithm \insertCite{Munkres1957}{BigTreeDist}, which is implemented in \code{clue::solve_LSAP()}. Note: the JV algorithm expects integers. In order to apply the function diff --git a/man/MASTSize.Rd b/man/MASTSize.Rd index d4f4d4253..fa2885036 100644 --- a/man/MASTSize.Rd +++ b/man/MASTSize.Rd @@ -24,11 +24,11 @@ information content, in bits, of the maximum agreement subtree. } \description{ Calculate the size or phylogenetic information content -\insertCite{Steel2006}{TreeDist} +\insertCite{Steel2006}{BigTreeDist} of the maximum agreement subtree between two phylogenetic trees, i.e. the largest tree that can be obtained from both \code{tree1} and \code{tree2} by deleting, but not rearranging, leaves, using the algorithm of -\insertCite{Valiente2009;textual}{TreeDist}. +\insertCite{Valiente2009;textual}{BigTreeDist}. } \details{ Implemented for trees with up to 4096 tips. Contact the maintainer if you diff --git a/man/MSTSegments.Rd b/man/MSTSegments.Rd index 3df43f766..276421de8 100644 --- a/man/MSTSegments.Rd +++ b/man/MSTSegments.Rd @@ -39,7 +39,7 @@ distance to original distance is small. \description{ To identify strain in a multidimensional scaling of distances, it can be useful to plot a minimum spanning tree -\insertCite{Gower1966,SmithSpace}{TreeDist}. Colouring each edge of the +\insertCite{Gower1966,SmithSpace}{BigTreeDist}. Colouring each edge of the tree according to its strain can identify areas where the mapping is stretched or compressed. } diff --git a/man/MapTrees.Rd b/man/MapTrees.Rd index 0c1013160..edce6910a 100644 --- a/man/MapTrees.Rd +++ b/man/MapTrees.Rd @@ -100,7 +100,7 @@ A list of references employed when constructing the tree space is populated according to the methods used; it would be appropriate to cite and briefly discuss these studies in any publication using figures generated using this application. The application itself can be cited using -\insertCite{SmithDist,SmithSpace;textual}{TreeDist} +\insertCite{SmithDist,SmithSpace;textual}{BigTreeDist} } \references{ @@ -108,7 +108,7 @@ this application. The application itself can be cited using } \seealso{ Full detail of tree space analysis in R is provided in the accompanying -\href{https://ms609.github.io/TreeDist/articles/treespace.html}{vignette}. +\href{https://ms609.github.io/BigTreeDist/articles/treespace.html}{vignette}. Other tree space functions: \code{\link{MSTSegments}()}, diff --git a/man/MappingQuality.Rd b/man/MappingQuality.Rd index 9beaf5093..fd1ef39c9 100644 --- a/man/MappingQuality.Rd +++ b/man/MappingQuality.Rd @@ -24,7 +24,7 @@ containing the entries: \code{Trustworthiness}, \code{Continuity}, \code{TxC} } \description{ \code{MappingQuality()} calculates the trustworthiness and continuity -of mapped distances \insertCite{Venna2001,Kaski2003}{TreeDist}. +of mapped distances \insertCite{Venna2001,Kaski2003}{BigTreeDist}. Trustworthiness measures, on a scale from 0--1, the degree to which points that are nearby in a mapping are truly close neighbours; continuity, the extent to which points that are truly nearby diff --git a/man/MatchingSplitDistance.Rd b/man/MatchingSplitDistance.Rd index 30ec70627..1da70590f 100644 --- a/man/MatchingSplitDistance.Rd +++ b/man/MatchingSplitDistance.Rd @@ -51,8 +51,8 @@ or \code{splits1} and \code{splits2}. } \description{ Calculate the -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#matching-split-distance}{Matching Split Distance} -\insertCite{Bogdanowicz2012,Lin2012}{TreeDist} for unrooted binary trees. +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#matching-split-distance}{Matching Split Distance} +\insertCite{Bogdanowicz2012,Lin2012}{BigTreeDist} for unrooted binary trees. } \details{ Trees need not contain identical leaves; scores are based on the leaves that @@ -65,7 +65,7 @@ with \code{setdiff(TipLabels(tree1), TipLabels(tree2))}. A normalization value or function must be provided in order to return a normalized value. If you are aware of a generalised formula, please let me know by -\href{https://github.com/ms609/TreeDist/issues/new}{creating a GitHub issue} +\href{https://github.com/ms609/BigTreeDist/issues/new}{creating a GitHub issue} so that it can be implemented. } diff --git a/man/MeilaVariationOfInformation.Rd b/man/MeilaVariationOfInformation.Rd index d96988487..c1d8cc472 100644 --- a/man/MeilaVariationOfInformation.Rd +++ b/man/MeilaVariationOfInformation.Rd @@ -24,11 +24,11 @@ measured in bits. } \description{ Compare a pair of splits viewed as clusterings of taxa, using the variation -of clustering information proposed by \insertCite{Meila2007}{TreeDist}. +of clustering information proposed by \insertCite{Meila2007}{BigTreeDist}. } \details{ This is equivalent to the mutual clustering information -\insertCite{Vinh2010}{TreeDist}. +\insertCite{Vinh2010}{BigTreeDist}. For the total information content, multiply the VoI by the number of leaves. } \examples{ diff --git a/man/NNIDist.Rd b/man/NNIDist.Rd index a0d8bc8fe..3691257a8 100644 --- a/man/NNIDist.Rd +++ b/man/NNIDist.Rd @@ -53,8 +53,8 @@ where \emph{n} is the number of leaves, and \emph{N} the number of internal node i.e. \deqn{n - 2}{_n_ − 2}. } \description{ -Use the approach of \insertCite{Li1996;textual}{TreeDist} to approximate the -Nearest Neighbour Interchange distance \insertCite{Robinson1971}{TreeDist} +Use the approach of \insertCite{Li1996;textual}{BigTreeDist} to approximate the +Nearest Neighbour Interchange distance \insertCite{Robinson1971}{BigTreeDist} between phylogenetic trees. } \details{ @@ -73,7 +73,7 @@ analogue of the mergesort algorithm. This takes at most \emph{n} log \emph{n} + NNI operations, and provides a loose upper bound on the NNI score. The maximum number of moves for an \emph{n}-leaf tree (\href{https://oeis.org/A182136}{OEIS A182136}) can be calculated exactly for -small trees \insertCite{Fack2002}{TreeDist}; this provides a tighter upper +small trees \insertCite{Fack2002}{BigTreeDist}; this provides a tighter upper bound, but is unavailable for \emph{n} > 12. \code{NNIDiameter()} reports the limits on this bound. diff --git a/man/NyeSimilarity.Rd b/man/NyeSimilarity.Rd index 003147169..5d98944a9 100644 --- a/man/NyeSimilarity.Rd +++ b/man/NyeSimilarity.Rd @@ -68,14 +68,14 @@ or \code{splits1} and \code{splits2}. } \description{ \code{NyeSimilarity()} and \code{NyeSplitSimilarity()} implement the -\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds} -tree comparison metric of \insertCite{Nye2006;textual}{TreeDist}. +\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds} +tree comparison metric of \insertCite{Nye2006;textual}{BigTreeDist}. In short, this finds the optimal matching that pairs each branch from one tree with a branch in the second, where matchings are scored according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index. A more detailed account is available in the -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#nye-et-al--tree-similarity-metric}{vignettes}. +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#nye-et-al--tree-similarity-metric}{vignettes}. } \details{ The measure is defined as a similarity score. If \code{similarity = FALSE}, the diff --git a/man/PathDist.Rd b/man/PathDist.Rd index f8a7478c2..f53c03c20 100644 --- a/man/PathDist.Rd +++ b/man/PathDist.Rd @@ -21,21 +21,21 @@ Calculate the path distance between rooted or unrooted trees. } \details{ This function is a wrapper for the function -\code{\link[phangorn:treedist]{path.dist()}} in the phangorn package. +\code{\link[phangorn:BigTreeDist]{path.dist()}} in the phangorn package. It pre-processes trees to ensure that their internal representation does not cause the \code{path.dist()} function to crash R. The path distance is calculated by tabulating the cladistic difference (= topological distance) between each pair of tips in each tree. -A precursor to the path distance \insertCite{Farris1969}{TreeDist} +A precursor to the path distance \insertCite{Farris1969}{BigTreeDist} took the mean squared difference between the elements of each tree's tabulation (Farris, 1973); the method used here is that proposed by -\insertCite{Steel1993;textual}{TreeDist}, which takes the square root of this +\insertCite{Steel1993;textual}{BigTreeDist}, which takes the square root of this sum. Other precursor measures are described in -\insertCite{Williams1971;textual}{TreeDist} and -\insertCite{Phipps1971;textual}{TreeDist}. +\insertCite{Williams1971;textual}{BigTreeDist} and +\insertCite{Phipps1971;textual}{BigTreeDist}. If a root node is present, trees are treated as rooted. To avoid counting the root edge twice, use \code{UnrootTree(tree)} before @@ -43,7 +43,7 @@ calculating the path distance. Use of the path distance is discouraged as it emphasizes shallow relationships at the expense of deeper (and arguably more -fundamental) relationships \insertCite{Farris1973}{TreeDist}. +fundamental) relationships \insertCite{Farris1973}{BigTreeDist}. } \examples{ library("TreeTools") diff --git a/man/Robinson-Foulds.Rd b/man/Robinson-Foulds.Rd index b68033e41..f71621281 100644 --- a/man/Robinson-Foulds.Rd +++ b/man/Robinson-Foulds.Rd @@ -86,15 +86,15 @@ or \code{splits1} and \code{splits2}. } \description{ Calculate the Robinson–Foulds distance -\insertCite{Robinson1981}{TreeDist}, or +\insertCite{Robinson1981}{BigTreeDist}, or the equivalent similarity measure, with options to (i) annotate matched splits; (ii) weight splits according to their phylogenetic information content -\insertCite{SmithDist}{TreeDist}. +\insertCite{SmithDist}{BigTreeDist}. Whilst slower to calculate, information theoretic modifications of the Robinson–Foulds distance (see \code{\link[=TreeDistance]{TreeDistance()}}) are better suited to most use cases -\insertCite{SmithDist,SmithSpace}{TreeDist}. +\insertCite{SmithDist,SmithSpace}{BigTreeDist}. } \details{ Note that if \code{reportMatching = TRUE}, the \code{pairScores} attribute returns @@ -107,7 +107,7 @@ to be identical by chance alone make a smaller contribution to overall tree distance, because their similarity is less remarkable. Rapid comparison between multiple pairs of trees employs the -\insertCite{Day1985;textual}{TreeDist} linear-time algorithm. +\insertCite{Day1985;textual}{BigTreeDist} linear-time algorithm. } \section{Functions}{ \itemize{ diff --git a/man/SPRDist.Rd b/man/SPRDist.Rd index ff83a9b72..95c9958bc 100644 --- a/man/SPRDist.Rd +++ b/man/SPRDist.Rd @@ -28,7 +28,7 @@ Approximate the Subtree Prune and Regraft (SPR) distance. } \details{ \code{SPRDist()} is a wrapper for the function -\code{\link[phangorn:treedist]{SPR.dist()}} in the phangorn package. +\code{\link[phangorn:BigTreeDist]{SPR.dist()}} in the phangorn package. It pre-processes trees to ensure that their internal representation does not cause the \code{SPR.dist()} function to crash R, and allows an improved (but slower) symmetric heuristic. diff --git a/man/SplitEntropy.Rd b/man/SplitEntropy.Rd index 4388b2747..217b84a96 100644 --- a/man/SplitEntropy.Rd +++ b/man/SplitEntropy.Rd @@ -27,9 +27,9 @@ Calculate the entropy, joint entropy, entropy distance and information content of two splits, treating each split as a division of \emph{n} leaves into two groups. Further details are available in a -\href{https://ms609.github.io/TreeDist/articles/information.html}{vignette}, -\insertCite{Mackay2003;textual}{TreeDist} and -\insertCite{Meila2007;textual}{TreeDist}. +\href{https://ms609.github.io/BigTreeDist/articles/information.html}{vignette}, +\insertCite{Mackay2003;textual}{BigTreeDist} and +\insertCite{Meila2007;textual}{BigTreeDist}. } \examples{ A <- TRUE diff --git a/man/SplitSharedInformation.Rd b/man/SplitSharedInformation.Rd index 57560887e..c7e6cdd88 100644 --- a/man/SplitSharedInformation.Rd +++ b/man/SplitSharedInformation.Rd @@ -31,7 +31,7 @@ once the splits have been arranged such that \emph{A1} fully overlaps with \emph trees consistent with two splits. \code{SplitSharedInformation()} returns the phylogenetic information that two splits -have in common \insertCite{Meila2007}{TreeDist}, in bits. +have in common \insertCite{Meila2007}{BigTreeDist}, in bits. \code{SplitDifferentInformation()} returns the amount of phylogenetic information distinct to one of the two splits, in bits. @@ -39,7 +39,7 @@ distinct to one of the two splits, in bits. \description{ Calculate the phylogenetic information shared, or not shared, between two splits. -See the \href{https://ms609.github.io/TreeDist/articles/information.html}{accompanying vignette} +See the \href{https://ms609.github.io/BigTreeDist/articles/information.html}{accompanying vignette} for definitions. } \details{ diff --git a/man/StartParallel.Rd b/man/StartParallel.Rd index f52611992..32dedd4ff 100644 --- a/man/StartParallel.Rd +++ b/man/StartParallel.Rd @@ -24,7 +24,7 @@ StopParallel(quietly = FALSE) } \value{ \code{StartParallel()} and \code{SetParallel()} return the previous value of -\code{options("TreeDist-cluster")}. +\code{options("BigTreeDist-cluster")}. \code{GetParallel()} returns the currently specified cluster. @@ -35,17 +35,17 @@ StopParallel(quietly = FALSE) Accelerate distance calculation by employing multiple \acronym{CPU} workers. } \details{ -"TreeDist" parallelizes the calculation of tree to tree distances via +"BigTreeDist" parallelizes the calculation of tree to tree distances via the \code{\link[=parCapply]{parCapply()}} function, using a user-defined cluster specified in -\code{options("TreeDist-cluster")}. +\code{options("BigTreeDist-cluster")}. -\code{StartParallel()} calls \code{parallel::makeCluster()} and tells "TreeDist" to +\code{StartParallel()} calls \code{parallel::makeCluster()} and tells "BigTreeDist" to use the created cluster. -\code{SetParallel()} tells "TreeDist" to use a pre-existing or user-specified +\code{SetParallel()} tells "BigTreeDist" to use a pre-existing or user-specified cluster. -\code{StopParallel()} stops the current TreeDist cluster. +\code{StopParallel()} stops the current BigTreeDist cluster. } \examples{ if (interactive()) { # Only run in terminal diff --git a/man/TreeDistance.Rd b/man/TreeDistance.Rd index 4f2c28d0b..5b61211e1 100644 --- a/man/TreeDistance.Rd +++ b/man/TreeDistance.Rd @@ -142,21 +142,21 @@ phylogenetic or clustering information that two trees hold in common, as proposed in Smith (2020). } \details{ -\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds distances} +\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds distances} calculate tree similarity by finding an optimal matching that the similarity between a split on one tree and its pair on a second, considering all possible ways to pair splits between trees (including leaving a split unpaired). The methods implemented here use the concepts of -\href{https://ms609.github.io/TreeDist/articles/information.html}{entropy and information} -\insertCite{Mackay2003}{TreeDist} to assign a similarity score between each +\href{https://ms609.github.io/BigTreeDist/articles/information.html}{entropy and information} +\insertCite{Mackay2003}{BigTreeDist} to assign a similarity score between each pair of splits. The returned tree similarity measures state the amount of information, in bits, that the splits in two trees hold in common when they are optimally matched, following -\insertCite{SmithDist;textual}{TreeDist}. +\insertCite{SmithDist;textual}{BigTreeDist}. The complementary tree distance measures state how much information is different in the splits of two trees, under an optimal matching. Where trees contain different tips, tips present in one tree but not the @@ -166,10 +166,10 @@ hold information in common nor differ regarding these tips). \section{Concepts of information}{ The phylogenetic (Shannon) information content and entropy of a split are defined in -\href{https://ms609.github.io/TreeDist/articles/information.html}{a separate vignette}. +\href{https://ms609.github.io/BigTreeDist/articles/information.html}{a separate vignette}. Using the mutual (clustering) information -\insertCite{Meila2007,Vinh2010}{TreeDist} of two splits to quantify their +\insertCite{Meila2007,Vinh2010}{BigTreeDist} of two splits to quantify their similarity gives rise to the Mutual Clustering Information measure (\code{MutualClusteringInfo()}, \code{MutualClusteringInfoSplits()}); the entropy distance gives the Clustering Information Distance @@ -189,7 +189,7 @@ The Matching Split Information measure (\code{MatchingSplitInfo()}, splits as the phylogenetic information content of the most informative split that is consistent with both input splits; \code{MatchingSplitInfoDistance()} is the corresponding measure of tree difference. -(\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{More information here}.) +(\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{More information here}.) } \section{Conversion to distances}{ @@ -197,7 +197,7 @@ To convert similarity measures to distances, it is necessary to subtract the similarity score from a maximum value. In order to generate distance \emph{metrics}, these functions subtract the similarity twice from the total information content (SPI, MSI) or entropy (MCI) of all the splits in -both trees \insertCite{SmithDist}{TreeDist}. +both trees \insertCite{SmithDist}{BigTreeDist}. } \section{Normalization}{ @@ -206,7 +206,7 @@ ranges from zero to (in principle) one. The maximum \strong{distance} is the sum of the information content or entropy of each split in each tree; the maximum \strong{similarity} is half this value. (See Vinh \emph{et al.} (2010, table 3) and -\insertCite{SmithDist;textual}{TreeDist} for +\insertCite{SmithDist;textual}{BigTreeDist} for alternative normalization possibilities.) Note that a distance value of one (= similarity of zero) will seldom be @@ -231,12 +231,12 @@ To balance memory demands and runtime with flexibility, these functions are implemented for trees with up to 2048 leaves. To analyse trees with up to 8192 leaves, you will need to a modified version of \pkg{TreeTools}. -First uninstall \pkg{TreeDist} and \pkg{TreeTools} using \code{remove.packages()}. +First uninstall \pkg{BigTreeDist} and \pkg{TreeTools} using \code{remove.packages()}. Then use \code{devtools::install_github("ms609/TreeTools", ref = "more-leaves")} to install the modified \pkg{TreeTools} package. -Finally, install \pkg{TreeDist} using -\code{devtools::install_github("ms609/TreeDist")}. -(\pkg{TreeDist} will need building from source \emph{after} the modified +Finally, install \pkg{BigTreeDist} using +\code{devtools::install_github("ms609/BigTreeDist")}. +(\pkg{BigTreeDist} will need building from source \emph{after} the modified \pkg{TreeTools} package has been installed, as its code links to values set in the TreeTools source code.) diff --git a/man/TreeInfo.Rd b/man/TreeInfo.Rd index a31234d8e..1162d94f4 100644 --- a/man/TreeInfo.Rd +++ b/man/TreeInfo.Rd @@ -84,7 +84,7 @@ in the splits within a tree". Its approach is complementary to the phylogenetic information content, used in \code{\link[=SplitwiseInfo]{SplitwiseInfo()}}. In essence, it asks, given a split that subdivides the leaves of a tree into two partitions, how easy it is to predict which partition a randomly drawn -leaf belongs to \insertCite{@Meila2007; @Vinh2010}{TreeDist}. +leaf belongs to \insertCite{@Meila2007; @Vinh2010}{BigTreeDist}. Formally, the entropy of a split \emph{S} that divides \emph{n} leaves into two partitions of sizes \emph{a} and \emph{b} is given by @@ -134,7 +134,7 @@ requires fewer than two bits for two leaves, so the entropy is less than of A and B.) As entropy measures the bits required to transmit the cluster label of each -leaf \insertCite{@Vinh2010: p. 2840}{TreeDist}, the information content of +leaf \insertCite{@Vinh2010: p. 2840}{BigTreeDist}, the information content of a split is its entropy multiplied by the number of leaves. } @@ -143,7 +143,7 @@ a split is its entropy multiplied by the number of leaves. Phylogenetic information expresses the information content of a split in terms of the probability that a uniformly selected tree will contain it -\insertCite{Thorley1998}{TreeDist}. +\insertCite{Thorley1998}{BigTreeDist}. } \section{Consensus information}{ @@ -152,7 +152,7 @@ in terms of the probability that a uniformly selected tree will contain it The information content of splits in a consensus tree is calculated by interpreting support values (i.e. the proportion of trees containing each split in the consensus) as probabilities that the true tree contains -that split, following \insertCite{SmithCons;textual}{TreeDist}. +that split, following \insertCite{SmithCons;textual}{BigTreeDist}. } \examples{ @@ -203,7 +203,7 @@ ConsensusInfo(trees, "clustering") \seealso{ An introduction to the phylogenetic information content of a split is given in \href{https://ms609.github.io/TreeTools/reference/SplitInformation.html}{\code{SplitInformation()}} -and in a \href{https://ms609.github.io/TreeDist/articles/information.html}{package vignette}. +and in a \href{https://ms609.github.io/BigTreeDist/articles/information.html}{package vignette}. Other information functions: \code{\link{SplitEntropy}()}, diff --git a/man/VisualizeMatching.Rd b/man/VisualizeMatching.Rd index f1edd25fa..3d01d840f 100644 --- a/man/VisualizeMatching.Rd +++ b/man/VisualizeMatching.Rd @@ -43,7 +43,7 @@ similarity of the associated splits (\code{FALSE}).} } \description{ Depict the splits that are matched between two trees using a specified -\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{Generalized Robinson–Foulds} +\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{Generalized Robinson–Foulds} similarity measure. } \details{ diff --git a/man/kmeanspp.Rd b/man/kmeanspp.Rd index e279beb37..ac0b7c219 100644 --- a/man/kmeanspp.Rd +++ b/man/kmeanspp.Rd @@ -18,12 +18,12 @@ chosen} \item{\dots}{additional arguments passed to \code{\link[stats]{kmeans}}} } \description{ -k-means++ clustering \insertCite{Arthur2007}{TreeDist} improves the speed and +k-means++ clustering \insertCite{Arthur2007}{BigTreeDist} improves the speed and accuracy of standard \code{\link[stats]{kmeans}} clustering -\insertCite{Hartigan1979}{TreeDist} by preferring initial cluster centres +\insertCite{Hartigan1979}{BigTreeDist} by preferring initial cluster centres that are far from others. A scalable version of the algorithm has been proposed for larger data sets -\insertCite{Bahmani2012}{TreeDist}, but is not implemented here. +\insertCite{Bahmani2012}{BigTreeDist}, but is not implemented here. } \examples{ # Generate random points diff --git a/memcheck/examples.R b/memcheck/examples.R index 1b3c6eb76..60962292f 100644 --- a/memcheck/examples.R +++ b/memcheck/examples.R @@ -1,5 +1,5 @@ # Code to be run with # R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R # First build and install the package. -library("TreeDist") +library("BigTreeDist") devtools::run_examples() diff --git a/memcheck/tests.R b/memcheck/tests.R index 4456a9d46..c96e9aa52 100644 --- a/memcheck/tests.R +++ b/memcheck/tests.R @@ -1,5 +1,5 @@ # Code to be run with # R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R # First build and install the package. -library("TreeDist") +library("BigTreeDist") devtools::test() diff --git a/memcheck/vignettes.R b/memcheck/vignettes.R index 74430cd1c..1b15e8aa4 100644 --- a/memcheck/vignettes.R +++ b/memcheck/vignettes.R @@ -1,5 +1,5 @@ # Code to be run with # R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R # First build and install the package. -library("TreeDist") +library("BigTreeDist") devtools::build_vignettes() diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 1e9f1f8a2..26ff6f8da 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,7 +12,7 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); // COMCLUST int COMCLUST(List trees); -RcppExport SEXP _TreeDist_COMCLUST(SEXP treesSEXP) { +RcppExport SEXP _BigTreeDist_COMCLUST(SEXP treesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -23,7 +23,7 @@ END_RCPP } // consensus_info double consensus_info(const List trees, const LogicalVector phylo, const NumericVector p); -RcppExport SEXP _TreeDist_consensus_info(SEXP treesSEXP, SEXP phyloSEXP, SEXP pSEXP) { +RcppExport SEXP _BigTreeDist_consensus_info(SEXP treesSEXP, SEXP phyloSEXP, SEXP pSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -36,7 +36,7 @@ END_RCPP } // robinson_foulds_all_pairs IntegerVector robinson_foulds_all_pairs(List tables); -RcppExport SEXP _TreeDist_robinson_foulds_all_pairs(SEXP tablesSEXP) { +RcppExport SEXP _BigTreeDist_robinson_foulds_all_pairs(SEXP tablesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -47,7 +47,7 @@ END_RCPP } // lapjv List lapjv(NumericMatrix x, NumericVector maxX); -RcppExport SEXP _TreeDist_lapjv(SEXP xSEXP, SEXP maxXSEXP) { +RcppExport SEXP _BigTreeDist_lapjv(SEXP xSEXP, SEXP maxXSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -59,7 +59,7 @@ END_RCPP } // cpp_mast int cpp_mast(IntegerMatrix edge1, IntegerMatrix edge2, IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_mast(SEXP edge1SEXP, SEXP edge2SEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_mast(SEXP edge1SEXP, SEXP edge2SEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -72,7 +72,7 @@ END_RCPP } // cpp_nni_distance IntegerVector cpp_nni_distance(const IntegerMatrix edge1, const IntegerMatrix edge2, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_nni_distance(SEXP edge1SEXP, SEXP edge2SEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_nni_distance(SEXP edge1SEXP, SEXP edge2SEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -85,7 +85,7 @@ END_RCPP } // cpp_robinson_foulds_distance List cpp_robinson_foulds_distance(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -98,7 +98,7 @@ END_RCPP } // cpp_robinson_foulds_info List cpp_robinson_foulds_info(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_robinson_foulds_info(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_robinson_foulds_info(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -111,7 +111,7 @@ END_RCPP } // cpp_matching_split_distance List cpp_matching_split_distance(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_matching_split_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_matching_split_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -124,7 +124,7 @@ END_RCPP } // cpp_jaccard_similarity List cpp_jaccard_similarity(const RawMatrix x, const RawMatrix y, const IntegerVector nTip, const NumericVector k, const LogicalVector allowConflict); -RcppExport SEXP _TreeDist_cpp_jaccard_similarity(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP, SEXP kSEXP, SEXP allowConflictSEXP) { +RcppExport SEXP _BigTreeDist_cpp_jaccard_similarity(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP, SEXP kSEXP, SEXP allowConflictSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -139,7 +139,7 @@ END_RCPP } // cpp_msi_distance List cpp_msi_distance(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_msi_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_msi_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -152,7 +152,7 @@ END_RCPP } // cpp_mutual_clustering List cpp_mutual_clustering(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_mutual_clustering(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_mutual_clustering(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -165,7 +165,7 @@ END_RCPP } // cpp_shared_phylo List cpp_shared_phylo(const RawMatrix x, const RawMatrix y, const IntegerVector nTip); -RcppExport SEXP _TreeDist_cpp_shared_phylo(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { +RcppExport SEXP _BigTreeDist_cpp_shared_phylo(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -178,23 +178,23 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_TreeDist_COMCLUST", (DL_FUNC) &_TreeDist_COMCLUST, 1}, - {"_TreeDist_consensus_info", (DL_FUNC) &_TreeDist_consensus_info, 3}, - {"_TreeDist_robinson_foulds_all_pairs", (DL_FUNC) &_TreeDist_robinson_foulds_all_pairs, 1}, - {"_TreeDist_lapjv", (DL_FUNC) &_TreeDist_lapjv, 2}, - {"_TreeDist_cpp_mast", (DL_FUNC) &_TreeDist_cpp_mast, 3}, - {"_TreeDist_cpp_nni_distance", (DL_FUNC) &_TreeDist_cpp_nni_distance, 3}, - {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3}, - {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3}, - {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3}, - {"_TreeDist_cpp_jaccard_similarity", (DL_FUNC) &_TreeDist_cpp_jaccard_similarity, 5}, - {"_TreeDist_cpp_msi_distance", (DL_FUNC) &_TreeDist_cpp_msi_distance, 3}, - {"_TreeDist_cpp_mutual_clustering", (DL_FUNC) &_TreeDist_cpp_mutual_clustering, 3}, - {"_TreeDist_cpp_shared_phylo", (DL_FUNC) &_TreeDist_cpp_shared_phylo, 3}, + {"_BigTreeDist_COMCLUST", (DL_FUNC) &_BigTreeDist_COMCLUST, 1}, + {"_BigTreeDist_consensus_info", (DL_FUNC) &_BigTreeDist_consensus_info, 3}, + {"_BigTreeDist_robinson_foulds_all_pairs", (DL_FUNC) &_BigTreeDist_robinson_foulds_all_pairs, 1}, + {"_BigTreeDist_lapjv", (DL_FUNC) &_BigTreeDist_lapjv, 2}, + {"_BigTreeDist_cpp_mast", (DL_FUNC) &_BigTreeDist_cpp_mast, 3}, + {"_BigTreeDist_cpp_nni_distance", (DL_FUNC) &_BigTreeDist_cpp_nni_distance, 3}, + {"_BigTreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_BigTreeDist_cpp_robinson_foulds_distance, 3}, + {"_BigTreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_BigTreeDist_cpp_robinson_foulds_info, 3}, + {"_BigTreeDist_cpp_matching_split_distance", (DL_FUNC) &_BigTreeDist_cpp_matching_split_distance, 3}, + {"_BigTreeDist_cpp_jaccard_similarity", (DL_FUNC) &_BigTreeDist_cpp_jaccard_similarity, 5}, + {"_BigTreeDist_cpp_msi_distance", (DL_FUNC) &_BigTreeDist_cpp_msi_distance, 3}, + {"_BigTreeDist_cpp_mutual_clustering", (DL_FUNC) &_BigTreeDist_cpp_mutual_clustering, 3}, + {"_BigTreeDist_cpp_shared_phylo", (DL_FUNC) &_BigTreeDist_cpp_shared_phylo, 3}, {NULL, NULL, 0} }; -RcppExport void R_init_TreeDist(DllInfo *dll) { +RcppExport void R_init_BigTreeDist(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/src/day_1985.cpp b/src/day_1985.cpp index 4cdc640ec..ba5cee75b 100644 --- a/src/day_1985.cpp +++ b/src/day_1985.cpp @@ -4,10 +4,10 @@ using namespace Rcpp; #include "tree_distances.h" /* includes */ #include "information.h" -#include /* for root_on_node() */ -#include /* for root_on_node() */ -#include /* for ClusterTable() */ -using TreeTools::ClusterTable; +#include /* for root_on_node() */ +#include /* for root_on_node() */ +#include /* for ClusterTable() */ +using BigTreeTools::ClusterTable; #include /* for array */ #include /* for bitset */ diff --git a/src/information.h b/src/information.h index 6e0d95cbd..e186d44cb 100644 --- a/src/information.h +++ b/src/information.h @@ -2,7 +2,7 @@ #define _TREEDIST_INFO_H #include /* for log2() */ -#include /* for CT_MAX_LEAVES */ +#include /* for CT_MAX_LEAVES */ #include "ints.h" /* for int16 */ diff --git a/src/nni_distance.cpp b/src/nni_distance.cpp index bbc88d79f..ad6e9c712 100644 --- a/src/nni_distance.cpp +++ b/src/nni_distance.cpp @@ -1,13 +1,13 @@ #include -#include +#include #include #include "tree_distances.h" using namespace Rcpp; -using TreeTools::SplitList; -using TreeTools::powers_of_two; +using BigTreeTools::SplitList; +using BigTreeTools::powers_of_two; #define PARENT1(i) edge1(i, 0) #define PARENT2(i) edge2(i, 0) diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp index 75a5ef839..cdc5b7e81 100644 --- a/src/tree_distance_functions.cpp +++ b/src/tree_distance_functions.cpp @@ -1,5 +1,5 @@ #include -#include /* for SL_MAX_TIPS */ +#include /* for SL_MAX_TIPS */ #include /* for log2() */ diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 51b38238c..8df39eb01 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -1,15 +1,15 @@ -#include +#include #include #include /* for unique_ptr, make_unique */ #include #include "tree_distances.h" using namespace Rcpp; -using TreeTools::SplitList; -using TreeTools::bitcounts; -using TreeTools::count_bits; +using BigTreeTools::SplitList; +using BigTreeTools::bitcounts; +using BigTreeTools::count_bits; -TREETOOLS_SPLITLIST_INIT +BIGTREETOOLS_SPLITLIST_INIT // [[Rcpp::export]] List cpp_robinson_foulds_distance (const RawMatrix x, const RawMatrix y, diff --git a/src/tree_distances.h b/src/tree_distances.h index c4182f3d5..2d2e4ef9a 100644 --- a/src/tree_distances.h +++ b/src/tree_distances.h @@ -1,7 +1,7 @@ #ifndef _TREEDIST_TREE_DISTANCES_H #define _TREEDIST_TREE_DISTANCES_H -#include +#include #include #include /* for numeric_limits */ From 547ef0d2075a8bf76be5cf65861c8ef975dcb3e1 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:12:58 +0100 Subject: [PATCH 02/14] Big* --- tests/testthat/test-parallel.R | 2 +- tests/testthat/test-plot.R | 6 +++--- tests/testthat/test-tree_distance.R | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-parallel.R b/tests/testthat/test-parallel.R index 92f092bc2..0e2d7d305 100644 --- a/tests/testthat/test-parallel.R +++ b/tests/testthat/test-parallel.R @@ -1,5 +1,5 @@ test_that("Parallelization works", { - library("TreeTools") + library("BigTreeTools") trees <- as.phylo(0:20, 20) suppressMessages({ diff --git a/tests/testthat/test-plot.R b/tests/testthat/test-plot.R index 05a2d3677..623e79d10 100644 --- a/tests/testthat/test-plot.R +++ b/tests/testthat/test-plot.R @@ -3,7 +3,7 @@ library("TreeTools") test_that("TreeDistPlot() warns", { expect_warning( expect_warning( - expect_null(TreeDist::TreeDistPlot(PectinateTree(8))), + expect_null(BigTreeDist::TreeDistPlot(PectinateTree(8))), "Leaves.*must be labelled with integers"), "fewer than 2 tips" # From plot.phylo: I don't understand why! ) @@ -13,13 +13,13 @@ test_that("TreeDistPlot() works", { tr <- PectinateTree(1:11) tr$edge.width <- rep(1:2, 10) Test1 <- function() { - TreeDist::TreeDistPlot(tr, title = "Test", + BigTreeDist::TreeDistPlot(tr, title = "Test", bold = c(2, 4, 6), leaveRoom = TRUE, prune = 1, graft = 10) } Test2 <- function() { - TreeDist::TreeDistPlot(tr, title="Crop tightly", + BigTreeDist::TreeDistPlot(tr, title="Crop tightly", bold = c(2, 4, 6), prune = 11, graft = 10, leaveRoom = FALSE) } diff --git a/tests/testthat/test-tree_distance.R b/tests/testthat/test-tree_distance.R index 462489380..b85d3920b 100644 --- a/tests/testthat/test-tree_distance.R +++ b/tests/testthat/test-tree_distance.R @@ -603,37 +603,37 @@ test_that("Matchings are correct", { } - Test(TreeDist:::cpp_robinson_foulds_distance, + Test(BigTreeDist:::cpp_robinson_foulds_distance, list(NA, 2, NA, 3, NA, NA, 5, NA), list(NA, 2, 4, NA, 7, NA) ) - Test(TreeDist:::cpp_robinson_foulds_info, + Test(BigTreeDist:::cpp_robinson_foulds_info, list(NA, 2, NA, 3, NA, NA, 5, NA), list(NA, 2, 4, NA, 7, NA) ) - Test(TreeDist:::cpp_matching_split_distance, + Test(BigTreeDist:::cpp_matching_split_distance, list(1, 2, 4, 3, NA, NA, 5, 6), list(1, 2, 5, 4, 7, 6) ) - Test(TreeDist:::cpp_jaccard_similarity, + Test(BigTreeDist:::cpp_jaccard_similarity, list(NA, 2, 1, 3, 4, 6, 5, NA), list(3, 2, 4, 5, 7, 6), k = 2, allowConflict = TRUE) - Test(TreeDist:::cpp_jaccard_similarity, + Test(BigTreeDist:::cpp_jaccard_similarity, list(NA, 2, 1, 3, NA, 6, 5, 4), list(3, 2, 4, 1, 7, 6), k = 2, allowConflict = FALSE) - Test(TreeDist:::cpp_msi_distance, + Test(BigTreeDist:::cpp_msi_distance, list(NA, 2, 1, 4, 3, 6, 5, NA), list(3, 2, c(4, 5), c(4, 5), c(6, 7), c(7, 6)) ) - Test(TreeDist:::cpp_shared_phylo, + Test(BigTreeDist:::cpp_shared_phylo, list(NA, 2, 4, 3, 1, 6, 5, NA), list(5, 2, 4, 3, 7, 6) ) - Test(TreeDist:::cpp_mutual_clustering, + Test(BigTreeDist:::cpp_mutual_clustering, list(4, 2, NA, 3, 6, NA, 5, 1), list(8, 2, 4, 5, 7, 1) ) From 944fd7febee56948c7403fa2ce1f9dceef47f518 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:13:07 +0100 Subject: [PATCH 03/14] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1a63de0a0..2a2ce5213 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,7 @@ Imports: Rdpack (>= 0.7), shiny, shinyjs, - TreeTools (>= 1.7.2), + BigTreeTools (>= 1.7.2), Suggests: bookdown, cluster, From 090a6d98aa77f59c66fac00ecb65c1382cf2e6ef Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:13:19 +0100 Subject: [PATCH 04/14] Big --- tests/testthat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat.R b/tests/testthat.R index 497eca5c9..fc8ad6570 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library("testthat", warn.conflicts = FALSE) -library("TreeDist") +library("BigTreeDist") test_check("TreeDist") From 9d03d8bfbb71007654255ebb24b4f2b371112b73 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:18:29 +0100 Subject: [PATCH 05/14] Big* --- ...eeDist-package.R => BigTreeDist-package.R} | 0 R/Information.R | 10 ++-- R/kmeanspp.R | 6 +- R/lap.R | 4 +- man/BigTreeDist-package.Rd | 58 +++++++++---------- man/CompareAll.Rd | 2 +- man/JaccardRobinsonFoulds.Rd | 8 +-- man/KendallColijn.Rd | 2 +- man/MapTrees.Rd | 2 +- man/MatchingSplitDistance.Rd | 4 +- man/NyeSimilarity.Rd | 6 +- man/PathDist.Rd | 2 +- man/SPRDist.Rd | 2 +- man/SplitEntropy.Rd | 2 +- man/SplitSharedInformation.Rd | 2 +- man/StartParallel.Rd | 12 ++-- man/TreeDistance.Rd | 18 +++--- man/TreeInfo.Rd | 2 +- man/VisualizeMatching.Rd | 2 +- 19 files changed, 72 insertions(+), 72 deletions(-) rename R/{TreeDist-package.R => BigTreeDist-package.R} (100%) diff --git a/R/TreeDist-package.R b/R/BigTreeDist-package.R similarity index 100% rename from R/TreeDist-package.R rename to R/BigTreeDist-package.R diff --git a/R/Information.R b/R/Information.R index b907e9da9..3edff2ec5 100644 --- a/R/Information.R +++ b/R/Information.R @@ -22,7 +22,7 @@ #' trees consistent with two splits. #' #' `SplitSharedInformation()` returns the phylogenetic information that two splits -#' have in common \insertCite{Meila2007}{TreeDist}, in bits. +#' have in common \insertCite{Meila2007}{BigTreeDist}, in bits. #' #' `SplitDifferentInformation()` returns the amount of phylogenetic information #' distinct to one of the two splits, in bits. @@ -146,8 +146,8 @@ MeilaMutualInformation <- function(split1, split2) { #' Variation of information for all split pairings #' #' Calculate the variation of clustering information -#' \insertCite{Meila2007}{TreeDist} for each possible pairing of -#' non-trivial splits on _n_ leaves \insertCite{SmithDist}{TreeDist}, +#' \insertCite{Meila2007}{BigTreeDist} for each possible pairing of +#' non-trivial splits on _n_ leaves \insertCite{SmithDist}{BigTreeDist}, #' tabulating the number of pairings with each similarity. #' #' @param n Integer specifying the number of leaves in a tree. @@ -211,8 +211,8 @@ AllSplitPairings <- memoise(function(n) { #' two groups. #' Further details are available in a #' [vignette](https://ms609.github.io/TreeDist/articles/information.html), -#' \insertCite{Mackay2003;textual}{TreeDist} and -#' \insertCite{Meila2007;textual}{TreeDist}. +#' \insertCite{Mackay2003;textual}{BigTreeDist} and +#' \insertCite{Meila2007;textual}{BigTreeDist}. #' #' @template split12Params #' diff --git a/R/kmeanspp.R b/R/kmeanspp.R index 0628de62b..d1c03206d 100644 --- a/R/kmeanspp.R +++ b/R/kmeanspp.R @@ -1,11 +1,11 @@ #' k-means++ clustering #' -#' k-means++ clustering \insertCite{Arthur2007}{TreeDist} improves the speed and +#' k-means++ clustering \insertCite{Arthur2007}{BigTreeDist} improves the speed and #' accuracy of standard \code{\link[stats]{kmeans}} clustering -#' \insertCite{Hartigan1979}{TreeDist} by preferring initial cluster centres +#' \insertCite{Hartigan1979}{BigTreeDist} by preferring initial cluster centres #' that are far from others. #' A scalable version of the algorithm has been proposed for larger data sets -#' \insertCite{Bahmani2012}{TreeDist}, but is not implemented here. +#' \insertCite{Bahmani2012}{BigTreeDist}, but is not implemented here. #' #' @param x Numeric matrix of data, or an object that can be coerced to such a #' matrix (such as a numeric vector or a data frame with all numeric columns). diff --git a/R/lap.R b/R/lap.R index 6182ed63f..5bf47e071 100644 --- a/R/lap.R +++ b/R/lap.R @@ -1,6 +1,6 @@ #' Solve linear assignment problem using LAPJV #' -#' Use the algorithm of \insertCite{Jonker1987;textual}{TreeDist} to solve the +#' Use the algorithm of \insertCite{Jonker1987;textual}{BigTreeDist} to solve the #' [Linear Sum Assignment Problem]( #' http://www.assignmentproblems.com/doc/LSAPIntroduction.pdf). #' @@ -8,7 +8,7 @@ #' column, such that the cost of the matching is minimized. #' #' The Jonker & Volgenant approach is a faster alternative to the Hungarian -#' algorithm \insertCite{Munkres1957}{TreeDist}, which is implemented in +#' algorithm \insertCite{Munkres1957}{BigTreeDist}, which is implemented in #' `clue::solve_LSAP()`. #' #' Note: the JV algorithm expects integers. In order to apply the function diff --git a/man/BigTreeDist-package.Rd b/man/BigTreeDist-package.Rd index ae9945529..23a2d906a 100644 --- a/man/BigTreeDist-package.Rd +++ b/man/BigTreeDist-package.Rd @@ -5,24 +5,24 @@ \name{BigTreeDist-package} \alias{BigTreeDist} \alias{BigTreeDist-package} -\title{BigTreeDist: Distances between Phylogenetic Trees} +\title{TreeDist: Distances between Phylogenetic Trees} \description{ -'BigTreeDist' is an R package that implements a suite of metrics that quantify the +'TreeDist' is an R package that implements a suite of metrics that quantify the topological distance between pairs of unweighted phylogenetic trees. It also includes a simple "Shiny" application to allow the visualization of distance-based tree spaces, and functions to calculate the information content of trees and splits. } \details{ -"BigTreeDist" primarily employs metrics in the category of +"TreeDist" primarily employs metrics in the category of "generalized Robinson–Foulds distances": they are based on comparing splits (bipartitions) between trees, and thus reflect the relationship data within trees, with no reference to branch lengths. Detailed documentation and usage instructions are -\href{https://ms609.github.io/BigTreeDist/}{available online} or in the vignettes. +\href{https://ms609.github.io/TreeDist/}{available online} or in the vignettes. \subsection{Generalized RF distances}{ -The \href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html}{Robinson–Foulds distance} +The \href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html}{Robinson–Foulds distance} simply tallies the number of non-trivial splits (sometimes inaccurately termed clades, nodes or edges) that occur in both trees -- any splits that are not perfectly identical contributes one point to the distance score of zero, @@ -30,7 +30,7 @@ however similar or different they are. By overlooking potential similarities between almost-identical splits, this conservative approach has undesirable properties. -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{"Generalized" RF metrics} +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{"Generalized" RF metrics} generate \emph{matchings} that pair each split in one tree with a similar split in the other. Each pair of splits is assigned a similarity score; the sum of these scores in @@ -39,7 +39,7 @@ the optimal matching then quantifies the similarity between two trees. Different ways of calculating the the similarity between a pair of splits lead to different tree distance metrics, implemented in the functions below: \itemize{ -\item \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{MutualClusteringInfo()}}, \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{SharedPhylogeneticInfo()}} +\item \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{MutualClusteringInfo()}}, \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{SharedPhylogeneticInfo()}} \itemize{ \item Smith (2020) scores matchings based on the amount of information that one partition contains about the other. The Mutual Phylogenetic @@ -47,35 +47,35 @@ Information assigns zero similarity to split pairs that cannot both exist on a single tree; The Mutual Clustering Information metric is more forgiving, and exhibits more desirable behaviour; it is the recommended metric for tree comparison. -(Its complement, \href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{ClusteringInfoDistance()}}, returns a tree +(Its complement, \href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{ClusteringInfoDistance()}}, returns a tree distance.) } -\item \href{https://ms609.github.io/BigTreeDist/reference/NyeSimilarity.html}{\code{NyeSimilarity()}} +\item \href{https://ms609.github.io/TreeDist/reference/NyeSimilarity.html}{\code{NyeSimilarity()}} \itemize{ \item Nye \emph{et al.} (2006) score matchings according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index. This approach is extended by Böcker \emph{et al}. (2013) with the Jaccard–Robinson–Foulds metric (function -\href{https://ms609.github.io/BigTreeDist/reference/JaccardRobinsonFoulds.html}{\code{JaccardRobinsonFoulds()}}). +\href{https://ms609.github.io/TreeDist/reference/JaccardRobinsonFoulds.html}{\code{JaccardRobinsonFoulds()}}). } -\item \href{https://ms609.github.io/BigTreeDist/reference/MatchingSplitDistance.html}{\code{MatchingSplitDistance()}} +\item \href{https://ms609.github.io/TreeDist/reference/MatchingSplitDistance.html}{\code{MatchingSplitDistance()}} \itemize{ \item Bogdanowicz and Giaro (2012) and Lin \emph{et al.} (2012) independently proposed counting the number of "mismatched" leaves in a pair of splits. -\href{https://ms609.github.io/BigTreeDist/reference/TreeDistance.html}{\code{MatchingSplitInfoDistance()}} +\href{https://ms609.github.io/TreeDist/reference/TreeDistance.html}{\code{MatchingSplitInfoDistance()}} provides an information-based equivalent (Smith 2020). } } The package also implements the variation of the path distance proposed by Kendal and Colijn (2016) (function -\href{https://ms609.github.io/BigTreeDist/reference/KendallColijn.html}{\code{KendallColijn()}}), +\href{https://ms609.github.io/TreeDist/reference/KendallColijn.html}{\code{KendallColijn()}}), approximations of the Nearest-Neighbour Interchange (NNI) distance (function -\href{https://ms609.github.io/BigTreeDist/reference/NNIDist.html}{\code{NNIDist()}}; +\href{https://ms609.github.io/TreeDist/reference/NNIDist.html}{\code{NNIDist()}}; following Li \emph{et al.} (1996)), and calculates the size (function -\href{https://ms609.github.io/BigTreeDist/reference/MASTSize.html}{\code{MASTSize()}}) and +\href{https://ms609.github.io/TreeDist/reference/MASTSize.html}{\code{MASTSize()}}) and information content (function -\href{https://ms609.github.io/BigTreeDist/reference/MASTSize.html}{\code{MASTInfo()}}) of the +\href{https://ms609.github.io/TreeDist/reference/MASTSize.html}{\code{MASTInfo()}}) of the Maximum Agreement Subtree. For an implementation of the Tree Bisection and Reconnection (TBR) distance, see @@ -87,30 +87,30 @@ Map tree spaces and readily visualize mapped landscapes, avoiding common analytical pitfalls (Smith, forthcoming), using the inbuilt graphical user interface: -\if{html}{\out{
}}\preformatted{BigTreeDist::MapTrees() +\if{html}{\out{
}}\preformatted{TreeDist::MapTrees() }\if{html}{\out{
}} Serious analysts should consult the -\href{https://ms609.github.io/BigTreeDist/articles/treespace.html}{vignette} +\href{https://ms609.github.io/TreeDist/articles/treespace.html}{vignette} for a command-line interface. } \references{ \itemize{ -\item \insertRef{Bocker2013}{BigTreeDist} -\item \insertRef{Bogdanowicz2012}{BigTreeDist} -\item \insertRef{Kendall2016}{BigTreeDist} -\item \insertRef{Li1996}{BigTreeDist} -\item \insertRef{Lin2012}{BigTreeDist} -\item \insertRef{Nye2006}{BigTreeDist} -\item \insertRef{SmithDist}{BigTreeDist} -\item \insertRef{SmithSpace}{BigTreeDist} +\item \insertRef{Bocker2013}{TreeDist} +\item \insertRef{Bogdanowicz2012}{TreeDist} +\item \insertRef{Kendall2016}{TreeDist} +\item \insertRef{Li1996}{TreeDist} +\item \insertRef{Lin2012}{TreeDist} +\item \insertRef{Nye2006}{TreeDist} +\item \insertRef{SmithDist}{TreeDist} +\item \insertRef{SmithSpace}{TreeDist} } } \seealso{ Further documentation is available in the -\href{https://ms609.github.io/BigTreeDist/articles/}{package vignettes}, visible from -R using \code{vignette(package = "BigTreeDist")}. +\href{https://ms609.github.io/TreeDist/articles/}{package vignettes}, visible from +R using \code{vignette(package = "TreeDist")}. Other R packages implementing tree distance functions include: \itemize{ @@ -121,7 +121,7 @@ Other R packages implementing tree distance functions include: } \item \href{https://cran.r-project.org/package=phangorn}{phangorn} \itemize{ -\item \code{BigTreeDist()}: Path, Robinson–Foulds and approximate SPR distances. +\item \code{treedist()}: Path, Robinson–Foulds and approximate SPR distances. } \item \href{https://ms609.github.io/Quartet/}{Quartet}: Triplet and Quartet distances, using the tqDist algorithm. diff --git a/man/CompareAll.Rd b/man/CompareAll.Rd index 4412c1020..92b760bb0 100644 --- a/man/CompareAll.Rd +++ b/man/CompareAll.Rd @@ -10,7 +10,7 @@ CompareAll(x, Func, FUN.VALUE = Func(x[[1]], x[[1]], ...), ...) \item{x}{List of trees, in the format expected by \code{Func()}.} \item{Func}{distance function returning distance between two trees, -e.g. \code{\link[phangorn:BigTreeDist]{path.dist()}}.} +e.g. \code{\link[phangorn:treedist]{path.dist()}}.} \item{FUN.VALUE}{Format of output of \code{Func()}, to be passed to \code{\link[=vapply]{vapply()}}. If unspecified, calculated by running \code{Func(x[[1]], x[[1]])}.} diff --git a/man/JaccardRobinsonFoulds.Rd b/man/JaccardRobinsonFoulds.Rd index bcde46e8e..5b7774b09 100644 --- a/man/JaccardRobinsonFoulds.Rd +++ b/man/JaccardRobinsonFoulds.Rd @@ -69,9 +69,9 @@ or \code{splits1} and \code{splits2}. } \description{ Calculate the -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{Jaccard–Robinson–Foulds metric} -\insertCite{Bocker2013}{BigTreeDist}, a -\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds metric}. +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{Jaccard–Robinson–Foulds metric} +\insertCite{Bocker2013}{TreeDist}, a +\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds metric}. } \details{ In short, the Jaccard–Robinson–Foulds @@ -82,7 +82,7 @@ Matchings are scored according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index, and raised to an arbitrary exponent. A more detailed explanation is provided in the -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{vignettes}. +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{vignettes}. By default, conflicting splits may be paired. diff --git a/man/KendallColijn.Rd b/man/KendallColijn.Rd index 1bc147fea..ceac963d3 100644 --- a/man/KendallColijn.Rd +++ b/man/KendallColijn.Rd @@ -120,7 +120,7 @@ KCDiameter(4) \insertAllCited{} } \seealso{ -\href{https://CRAN.R-project.org/package=treespace/vignettes/introduction.html}{\code{treespace::BigTreeDist}} +\href{https://CRAN.R-project.org/package=treespace/vignettes/introduction.html}{\code{treespace::treeDist}} is a more sophisticated, if more cumbersome, implementation that supports lambda > 0, i.e. use of edge lengths in tree comparison. diff --git a/man/MapTrees.Rd b/man/MapTrees.Rd index edce6910a..87a0809b2 100644 --- a/man/MapTrees.Rd +++ b/man/MapTrees.Rd @@ -108,7 +108,7 @@ this application. The application itself can be cited using } \seealso{ Full detail of tree space analysis in R is provided in the accompanying -\href{https://ms609.github.io/BigTreeDist/articles/treespace.html}{vignette}. +\href{https://ms609.github.io/TreeDist/articles/treespace.html}{vignette}. Other tree space functions: \code{\link{MSTSegments}()}, diff --git a/man/MatchingSplitDistance.Rd b/man/MatchingSplitDistance.Rd index 1da70590f..068bde479 100644 --- a/man/MatchingSplitDistance.Rd +++ b/man/MatchingSplitDistance.Rd @@ -51,8 +51,8 @@ or \code{splits1} and \code{splits2}. } \description{ Calculate the -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#matching-split-distance}{Matching Split Distance} -\insertCite{Bogdanowicz2012,Lin2012}{BigTreeDist} for unrooted binary trees. +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#matching-split-distance}{Matching Split Distance} +\insertCite{Bogdanowicz2012,Lin2012}{TreeDist} for unrooted binary trees. } \details{ Trees need not contain identical leaves; scores are based on the leaves that diff --git a/man/NyeSimilarity.Rd b/man/NyeSimilarity.Rd index 5d98944a9..003147169 100644 --- a/man/NyeSimilarity.Rd +++ b/man/NyeSimilarity.Rd @@ -68,14 +68,14 @@ or \code{splits1} and \code{splits2}. } \description{ \code{NyeSimilarity()} and \code{NyeSplitSimilarity()} implement the -\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds} -tree comparison metric of \insertCite{Nye2006;textual}{BigTreeDist}. +\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds} +tree comparison metric of \insertCite{Nye2006;textual}{TreeDist}. In short, this finds the optimal matching that pairs each branch from one tree with a branch in the second, where matchings are scored according to the size of the largest split that is consistent with both of them, normalized against the Jaccard index. A more detailed account is available in the -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html#nye-et-al--tree-similarity-metric}{vignettes}. +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#nye-et-al--tree-similarity-metric}{vignettes}. } \details{ The measure is defined as a similarity score. If \code{similarity = FALSE}, the diff --git a/man/PathDist.Rd b/man/PathDist.Rd index f53c03c20..2a28e6640 100644 --- a/man/PathDist.Rd +++ b/man/PathDist.Rd @@ -21,7 +21,7 @@ Calculate the path distance between rooted or unrooted trees. } \details{ This function is a wrapper for the function -\code{\link[phangorn:BigTreeDist]{path.dist()}} in the phangorn package. +\code{\link[phangorn:treedist]{path.dist()}} in the phangorn package. It pre-processes trees to ensure that their internal representation does not cause the \code{path.dist()} function to crash R. diff --git a/man/SPRDist.Rd b/man/SPRDist.Rd index 95c9958bc..ff83a9b72 100644 --- a/man/SPRDist.Rd +++ b/man/SPRDist.Rd @@ -28,7 +28,7 @@ Approximate the Subtree Prune and Regraft (SPR) distance. } \details{ \code{SPRDist()} is a wrapper for the function -\code{\link[phangorn:BigTreeDist]{SPR.dist()}} in the phangorn package. +\code{\link[phangorn:treedist]{SPR.dist()}} in the phangorn package. It pre-processes trees to ensure that their internal representation does not cause the \code{SPR.dist()} function to crash R, and allows an improved (but slower) symmetric heuristic. diff --git a/man/SplitEntropy.Rd b/man/SplitEntropy.Rd index 217b84a96..36d66efb1 100644 --- a/man/SplitEntropy.Rd +++ b/man/SplitEntropy.Rd @@ -27,7 +27,7 @@ Calculate the entropy, joint entropy, entropy distance and information content of two splits, treating each split as a division of \emph{n} leaves into two groups. Further details are available in a -\href{https://ms609.github.io/BigTreeDist/articles/information.html}{vignette}, +\href{https://ms609.github.io/TreeDist/articles/information.html}{vignette}, \insertCite{Mackay2003;textual}{BigTreeDist} and \insertCite{Meila2007;textual}{BigTreeDist}. } diff --git a/man/SplitSharedInformation.Rd b/man/SplitSharedInformation.Rd index c7e6cdd88..e97137e1f 100644 --- a/man/SplitSharedInformation.Rd +++ b/man/SplitSharedInformation.Rd @@ -39,7 +39,7 @@ distinct to one of the two splits, in bits. \description{ Calculate the phylogenetic information shared, or not shared, between two splits. -See the \href{https://ms609.github.io/BigTreeDist/articles/information.html}{accompanying vignette} +See the \href{https://ms609.github.io/TreeDist/articles/information.html}{accompanying vignette} for definitions. } \details{ diff --git a/man/StartParallel.Rd b/man/StartParallel.Rd index 32dedd4ff..f52611992 100644 --- a/man/StartParallel.Rd +++ b/man/StartParallel.Rd @@ -24,7 +24,7 @@ StopParallel(quietly = FALSE) } \value{ \code{StartParallel()} and \code{SetParallel()} return the previous value of -\code{options("BigTreeDist-cluster")}. +\code{options("TreeDist-cluster")}. \code{GetParallel()} returns the currently specified cluster. @@ -35,17 +35,17 @@ StopParallel(quietly = FALSE) Accelerate distance calculation by employing multiple \acronym{CPU} workers. } \details{ -"BigTreeDist" parallelizes the calculation of tree to tree distances via +"TreeDist" parallelizes the calculation of tree to tree distances via the \code{\link[=parCapply]{parCapply()}} function, using a user-defined cluster specified in -\code{options("BigTreeDist-cluster")}. +\code{options("TreeDist-cluster")}. -\code{StartParallel()} calls \code{parallel::makeCluster()} and tells "BigTreeDist" to +\code{StartParallel()} calls \code{parallel::makeCluster()} and tells "TreeDist" to use the created cluster. -\code{SetParallel()} tells "BigTreeDist" to use a pre-existing or user-specified +\code{SetParallel()} tells "TreeDist" to use a pre-existing or user-specified cluster. -\code{StopParallel()} stops the current BigTreeDist cluster. +\code{StopParallel()} stops the current TreeDist cluster. } \examples{ if (interactive()) { # Only run in terminal diff --git a/man/TreeDistance.Rd b/man/TreeDistance.Rd index 5b61211e1..fffb9f4d6 100644 --- a/man/TreeDistance.Rd +++ b/man/TreeDistance.Rd @@ -142,15 +142,15 @@ phylogenetic or clustering information that two trees hold in common, as proposed in Smith (2020). } \details{ -\href{https://ms609.github.io/BigTreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds distances} +\href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds distances} calculate tree similarity by finding an optimal matching that the similarity between a split on one tree and its pair on a second, considering all possible ways to pair splits between trees (including leaving a split unpaired). The methods implemented here use the concepts of -\href{https://ms609.github.io/BigTreeDist/articles/information.html}{entropy and information} -\insertCite{Mackay2003}{BigTreeDist} to assign a similarity score between each +\href{https://ms609.github.io/TreeDist/articles/information.html}{entropy and information} +\insertCite{Mackay2003}{TreeDist} to assign a similarity score between each pair of splits. The returned tree similarity measures state the amount of information, @@ -166,7 +166,7 @@ hold information in common nor differ regarding these tips). \section{Concepts of information}{ The phylogenetic (Shannon) information content and entropy of a split are defined in -\href{https://ms609.github.io/BigTreeDist/articles/information.html}{a separate vignette}. +\href{https://ms609.github.io/TreeDist/articles/information.html}{a separate vignette}. Using the mutual (clustering) information \insertCite{Meila2007,Vinh2010}{BigTreeDist} of two splits to quantify their @@ -189,7 +189,7 @@ The Matching Split Information measure (\code{MatchingSplitInfo()}, splits as the phylogenetic information content of the most informative split that is consistent with both input splits; \code{MatchingSplitInfoDistance()} is the corresponding measure of tree difference. -(\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{More information here}.) +(\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{More information here}.) } \section{Conversion to distances}{ @@ -231,12 +231,12 @@ To balance memory demands and runtime with flexibility, these functions are implemented for trees with up to 2048 leaves. To analyse trees with up to 8192 leaves, you will need to a modified version of \pkg{TreeTools}. -First uninstall \pkg{BigTreeDist} and \pkg{TreeTools} using \code{remove.packages()}. +First uninstall \pkg{TreeDist} and \pkg{TreeTools} using \code{remove.packages()}. Then use \code{devtools::install_github("ms609/TreeTools", ref = "more-leaves")} to install the modified \pkg{TreeTools} package. -Finally, install \pkg{BigTreeDist} using -\code{devtools::install_github("ms609/BigTreeDist")}. -(\pkg{BigTreeDist} will need building from source \emph{after} the modified +Finally, install \pkg{TreeDist} using +\code{devtools::install_github("ms609/TreeDist")}. +(\pkg{TreeDist} will need building from source \emph{after} the modified \pkg{TreeTools} package has been installed, as its code links to values set in the TreeTools source code.) diff --git a/man/TreeInfo.Rd b/man/TreeInfo.Rd index 1162d94f4..9df72fe63 100644 --- a/man/TreeInfo.Rd +++ b/man/TreeInfo.Rd @@ -203,7 +203,7 @@ ConsensusInfo(trees, "clustering") \seealso{ An introduction to the phylogenetic information content of a split is given in \href{https://ms609.github.io/TreeTools/reference/SplitInformation.html}{\code{SplitInformation()}} -and in a \href{https://ms609.github.io/BigTreeDist/articles/information.html}{package vignette}. +and in a \href{https://ms609.github.io/TreeDist/articles/information.html}{package vignette}. Other information functions: \code{\link{SplitEntropy}()}, diff --git a/man/VisualizeMatching.Rd b/man/VisualizeMatching.Rd index 3d01d840f..f1edd25fa 100644 --- a/man/VisualizeMatching.Rd +++ b/man/VisualizeMatching.Rd @@ -43,7 +43,7 @@ similarity of the associated splits (\code{FALSE}).} } \description{ Depict the splits that are matched between two trees using a specified -\href{https://ms609.github.io/BigTreeDist/articles/Generalized-RF.html}{Generalized Robinson–Foulds} +\href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html}{Generalized Robinson–Foulds} similarity measure. } \details{ From d217a47a536a7209ee6f2b2b284ffbda6869e797 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:20:23 +0100 Subject: [PATCH 06/14] Big* --- tests/testthat.R | 2 +- vignettes/Generalized-RF.Rmd | 2 +- vignettes/Robinson-Foulds.Rmd | 2 +- vignettes/Using-TreeDist.Rmd | 4 ++-- vignettes/compare-treesets.Rmd | 2 +- vignettes/different-leaves.Rmd | 2 +- vignettes/information.Rmd | 4 ++-- vignettes/landscapes.Rmd | 4 ++-- vignettes/treespace.Rmd | 8 ++++---- vignettes/using-distances.Rmd | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/testthat.R b/tests/testthat.R index fc8ad6570..b84daabe5 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library("testthat", warn.conflicts = FALSE) library("BigTreeDist") -test_check("TreeDist") +test_check("BigTreeDist") diff --git a/vignettes/Generalized-RF.Rmd b/vignettes/Generalized-RF.Rmd index 90815f4d3..e4deb3876 100644 --- a/vignettes/Generalized-RF.Rmd +++ b/vignettes/Generalized-RF.Rmd @@ -12,7 +12,7 @@ vignette: > ```{r init, message=FALSE, warning=FALSE, echo = FALSE} library('ape') -library('TreeDist') +library('BigTreeDist') tree1 <- read.tree(text='((A, B), ((C, (D, E)), (F, (G, (H, I)))));') tree2 <- read.tree(text='((A, B), ((C, D, (E, I)), (F, (G, H))));') AtoJ <- read.tree(text='(((((A, B), C), D), E), (F, (G, (H, (I, J)))));') diff --git a/vignettes/Robinson-Foulds.Rmd b/vignettes/Robinson-Foulds.Rmd index 9ea5b7f33..1782edbc1 100644 --- a/vignettes/Robinson-Foulds.Rmd +++ b/vignettes/Robinson-Foulds.Rmd @@ -12,7 +12,7 @@ vignette: > ```{r init, echo=FALSE, warning=FALSE, message=FALSE} library('ape') -library('TreeDist') +library('BigTreeDist') standardMargin <- c(0.4, 0.4, 0.8, 0.4) cbPalette8 <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") diff --git a/vignettes/Using-TreeDist.Rmd b/vignettes/Using-TreeDist.Rmd index 86458f6f3..4d8fd388c 100644 --- a/vignettes/Using-TreeDist.Rmd +++ b/vignettes/Using-TreeDist.Rmd @@ -33,7 +33,7 @@ First we'll install the package. We can either install the stable version from the CRAN repository: ```r -install.packages('TreeDist') +install.packages('BigTreeDist') ``` or the development version, from GitHub -- which will contain the latest @@ -46,7 +46,7 @@ devtools::install_github('ms609/TreeDist') Then we'll load the package in to R's working environment: ```{r load-package, message=FALSE} -library('TreeDist') +library('BigTreeDist') ``` Now the package's functions are available within R. diff --git a/vignettes/compare-treesets.Rmd b/vignettes/compare-treesets.Rmd index b835a0872..7a4d52eb5 100644 --- a/vignettes/compare-treesets.Rmd +++ b/vignettes/compare-treesets.Rmd @@ -54,7 +54,7 @@ styles <- c(1, 2) # Select plotting colours / symbols treeStyle <- rep(styles, c(length(batch1), length(batch2))) # Calculate distances -library("TreeDist") +library("BigTreeDist") distances <- ClusteringInfoDistance(c(batch1, batch2)) # Construct over-simple 2D PCoA mapping diff --git a/vignettes/different-leaves.Rmd b/vignettes/different-leaves.Rmd index 7463d08b3..c94a7eabc 100644 --- a/vignettes/different-leaves.Rmd +++ b/vignettes/different-leaves.Rmd @@ -60,7 +60,7 @@ The information held in common between the trees is thus equal to the information held in common if only the common leaves are retained: ```{r drop-some} -library("TreeDist") +library("BigTreeDist") commonTips <- intersect(TipLabels(balAL), TipLabels(balCL)) # How much information is in tree balCL? diff --git a/vignettes/information.Rmd b/vignettes/information.Rmd index 0409d3297..d7cf065c4 100644 --- a/vignettes/information.Rmd +++ b/vignettes/information.Rmd @@ -85,7 +85,7 @@ These quantities can be calculated using functions in the ```{r} library("TreeTools", quietly = TRUE) -library("TreeDist") +library("BigTreeDist") treesMatchingSplit <- c( AB.CDEF = TreesMatchingSplit(2, 4), ABC.DEF = TreesMatchingSplit(3, 3) @@ -183,7 +183,7 @@ statement from the other; the maximum entropy distance occurs when the two statements are entirely independent. ```{r mackay-8-1, echo=FALSE, fig.width=4, out.width='50%', fig.height=3, fig.align='center'} -library('TreeDist') +library('BigTreeDist') H <- function(inBracket) { expression(paste(italic('H'), plain('('), italic(inBracket), plain(')'))) } diff --git a/vignettes/landscapes.Rmd b/vignettes/landscapes.Rmd index d3d2bd192..c98845179 100644 --- a/vignettes/landscapes.Rmd +++ b/vignettes/landscapes.Rmd @@ -32,7 +32,7 @@ score: ```{r col-trees-by-score} # Load required libraries library("TreeTools", quietly = TRUE) -library("TreeDist") +library("BigTreeDist") # Generate a set of trees trees <- as.phylo(as.TreeNumber(BalancedTree(16)) + 0:100 - 15, 16) @@ -126,4 +126,4 @@ if (requireNamespace("plotly", quietly = TRUE)) { (Use the mouse to reorient) -# References \ No newline at end of file +# References diff --git a/vignettes/treespace.Rmd b/vignettes/treespace.Rmd index ea2400c32..deee12e7b 100644 --- a/vignettes/treespace.Rmd +++ b/vignettes/treespace.Rmd @@ -37,8 +37,8 @@ the "TreeDist" R package. Simply install [R](https://www.r-project.org/) or line: ```r -install.packages("TreeDist") -TreeDist::MapTrees() +install.packages("BigTreeDist") +BigTreeDist::MapTrees() ``` This will allow you to conduct and evaluate basic tree space mappings @@ -76,7 +76,7 @@ The clustering information distance [@SmithDist] is a reliable alternative that is fast to calculate: ```{r calculate-distances, message=FALSE} -library("TreeDist") +library("BigTreeDist") distances <- ClusteringInfoDistance(trees) ``` @@ -301,7 +301,7 @@ two-dimensional tree space plot: ```{r pid, fig.asp = 1, fig.width = 4, fig.align = "center", echo = FALSE, message = FALSE} -library("TreeDist") +library("BigTreeDist") pid_distances <- PhylogeneticInfoDistance(trees) pid_mapping <- cmdscale(pid_distances, k = 6) pid_cluster <- cutree(protoclust(pid_distances), k = 2) diff --git a/vignettes/using-distances.Rmd b/vignettes/using-distances.Rmd index 3638aceaa..83ce4e511 100644 --- a/vignettes/using-distances.Rmd +++ b/vignettes/using-distances.Rmd @@ -37,7 +37,7 @@ Let's work through a simple example using the Nye _et al_. [-@Nye2006] similarity metric to compare two imperfectly-resolved trees. ```{r, fig.width=6, out.width="90%", fig.align="center"} -library("TreeDist") +library("BigTreeDist") tree1 <- ape::read.tree(text = '(A, ((B, ((C, D), (E, F))), (G, (H, (I, J, K)))));') tree2 <- ape::read.tree(text = '(A, (B, (C, D, E, (J, K)), (F, (G, H, I))));') VisualizeMatching(NyeSimilarity, tree1, tree2, From b98e40a0779a9f825551eef900731a6246232cd0 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:33:43 +0100 Subject: [PATCH 07/14] Big* --- R/BigTreeDist-package.R | 16 ++++++++-------- R/median.R | 2 +- R/plot.R | 2 +- man/BigTreeDist-package.Rd | 16 ++++++++-------- man/MSTSegments.Rd | 2 +- man/median.multiPhylo.Rd | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R/BigTreeDist-package.R b/R/BigTreeDist-package.R index b1a1b4c7f..05f6cf306 100644 --- a/R/BigTreeDist-package.R +++ b/R/BigTreeDist-package.R @@ -112,21 +112,21 @@ #' #' @references #' -#' - \insertRef{Bocker2013}{TreeDist} +#' - \insertRef{Bocker2013}{BigTreeDist} #' -#' - \insertRef{Bogdanowicz2012}{TreeDist} +#' - \insertRef{Bogdanowicz2012}{BigTreeDist} #' -#' - \insertRef{Kendall2016}{TreeDist} +#' - \insertRef{Kendall2016}{BigTreeDist} #' -#' - \insertRef{Li1996}{TreeDist} +#' - \insertRef{Li1996}{BigTreeDist} #' -#' - \insertRef{Lin2012}{TreeDist} +#' - \insertRef{Lin2012}{BigTreeDist} #' -#' - \insertRef{Nye2006}{TreeDist} +#' - \insertRef{Nye2006}{BigTreeDist} #' -#' - \insertRef{SmithDist}{TreeDist} +#' - \insertRef{SmithDist}{BigTreeDist} #' -#' - \insertRef{SmithSpace}{TreeDist} +#' - \insertRef{SmithSpace}{BigTreeDist} #' #' @encoding UTF-8 #' @keywords internal diff --git a/R/median.R b/R/median.R index ae1574a23..a01f8a990 100644 --- a/R/median.R +++ b/R/median.R @@ -56,7 +56,7 @@ #' #' @seealso Consensus methods: #' [`ape::consensus()`], -#' [`TreeTools::ConsensusWithout()`] +#' [`BigTreeTools::ConsensusWithout()`] #' #' @importFrom stats median #' @family tree space functions diff --git a/R/plot.R b/R/plot.R index 20195328f..258781aa7 100644 --- a/R/plot.R +++ b/R/plot.R @@ -277,7 +277,7 @@ VisualizeMatching <- function(Func, tree1, tree2, setPar = TRUE, #' @param mapping Two-column matrix giving _x_ and _y_ coordinates of plotted #' points. #' @param mstEnds Two-column matrix identifying rows of `mapping` at end of -#' each edge of the MST, as output by [`TreeTools::MSTEdges()`]. +#' each edge of the MST, as output by [`BigTreeTools::MSTEdges()`]. #' @param distances Matrix or `dist` object giving original distances between #' each pair of points. #' @param palette Vector of colours with which to colour edges. diff --git a/man/BigTreeDist-package.Rd b/man/BigTreeDist-package.Rd index 23a2d906a..f12eb8e0a 100644 --- a/man/BigTreeDist-package.Rd +++ b/man/BigTreeDist-package.Rd @@ -97,14 +97,14 @@ for a command-line interface. \references{ \itemize{ -\item \insertRef{Bocker2013}{TreeDist} -\item \insertRef{Bogdanowicz2012}{TreeDist} -\item \insertRef{Kendall2016}{TreeDist} -\item \insertRef{Li1996}{TreeDist} -\item \insertRef{Lin2012}{TreeDist} -\item \insertRef{Nye2006}{TreeDist} -\item \insertRef{SmithDist}{TreeDist} -\item \insertRef{SmithSpace}{TreeDist} +\item \insertRef{Bocker2013}{BigTreeDist} +\item \insertRef{Bogdanowicz2012}{BigTreeDist} +\item \insertRef{Kendall2016}{BigTreeDist} +\item \insertRef{Li1996}{BigTreeDist} +\item \insertRef{Lin2012}{BigTreeDist} +\item \insertRef{Nye2006}{BigTreeDist} +\item \insertRef{SmithDist}{BigTreeDist} +\item \insertRef{SmithSpace}{BigTreeDist} } } \seealso{ diff --git a/man/MSTSegments.Rd b/man/MSTSegments.Rd index 276421de8..c63682ba9 100644 --- a/man/MSTSegments.Rd +++ b/man/MSTSegments.Rd @@ -19,7 +19,7 @@ StrainCol( points.} \item{mstEnds}{Two-column matrix identifying rows of \code{mapping} at end of -each edge of the MST, as output by \code{\link[TreeTools:MSTEdges]{TreeTools::MSTEdges()}}.} +each edge of the MST, as output by \code{\link[BigTreeTools:MSTEdges]{BigTreeTools::MSTEdges()}}.} \item{\dots}{Additional arguments to \code{\link[=segments]{segments()}}.} diff --git a/man/median.multiPhylo.Rd b/man/median.multiPhylo.Rd index 1686df7a5..563b50551 100644 --- a/man/median.multiPhylo.Rd +++ b/man/median.multiPhylo.Rd @@ -77,7 +77,7 @@ median(structure(treeList, class = "multiPhylo")) \seealso{ Consensus methods: \code{\link[ape:consensus]{ape::consensus()}}, -\code{\link[TreeTools:ConsensusWithout]{TreeTools::ConsensusWithout()}} +\code{\link[BigTreeTools:ConsensusWithout]{BigTreeTools::ConsensusWithout()}} Other tree space functions: \code{\link{MSTSegments}()}, From 1004d3e0794f4e864831797c6a585098ea839e76 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Tue, 26 Sep 2023 09:10:34 +0100 Subject: [PATCH 08/14] Big* --- R/tree_distance_info.R | 2 +- R/tree_distance_msd.R | 2 +- R/tree_distance_nye.R | 4 ++-- man/JaccardRobinsonFoulds.Rd | 2 +- man/NyeSimilarity.Rd | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R index 22fa1b4ca..4fdd2d192 100644 --- a/R/tree_distance_info.R +++ b/R/tree_distance_info.R @@ -13,7 +13,7 @@ #' #' The methods implemented here use the concepts of #' [entropy and information](https://ms609.github.io/TreeDist/articles/information.html) -#' \insertCite{Mackay2003}{TreeDist} to assign a similarity score between each +#' \insertCite{Mackay2003}{BigTreeDist} to assign a similarity score between each #' pair of splits. #' #' The returned tree similarity measures state the amount of information, diff --git a/R/tree_distance_msd.R b/R/tree_distance_msd.R index 7a7ed5a79..fede185ab 100644 --- a/R/tree_distance_msd.R +++ b/R/tree_distance_msd.R @@ -2,7 +2,7 @@ #' #' Calculate the #' [Matching Split Distance](https://ms609.github.io/TreeDist/articles/Generalized-RF.html#matching-split-distance) -#' \insertCite{Bogdanowicz2012,Lin2012}{TreeDist} for unrooted binary trees. +#' \insertCite{Bogdanowicz2012,Lin2012}{BigTreeDist} for unrooted binary trees. #' #' Trees need not contain identical leaves; scores are based on the leaves that #' trees hold in common. Check for unexpected differences in tip labelling diff --git a/R/tree_distance_nye.R b/R/tree_distance_nye.R index 0cb5958bf..183caf4cf 100644 --- a/R/tree_distance_nye.R +++ b/R/tree_distance_nye.R @@ -2,7 +2,7 @@ #' #' `NyeSimilarity()` and `NyeSplitSimilarity()` implement the #' [Generalized Robinson–Foulds](https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances) -#' tree comparison metric of \insertCite{Nye2006;textual}{TreeDist}. +#' tree comparison metric of \insertCite{Nye2006;textual}{BigTreeDist}. #' In short, this finds the optimal matching that pairs each branch from #' one tree with a branch in the second, where matchings are scored according to #' the size of the largest split that is consistent with both of them, @@ -112,7 +112,7 @@ NyeSplitSimilarity <- function(splits1, splits2, #' #' Calculate the #' [Jaccard–Robinson–Foulds metric](https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric) -#' \insertCite{Bocker2013}{TreeDist}, a +#' \insertCite{Bocker2013}{BigTreeDist}, a #' [Generalized Robinson–Foulds metric](https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances). #' #' In short, the Jaccard–Robinson–Foulds diff --git a/man/JaccardRobinsonFoulds.Rd b/man/JaccardRobinsonFoulds.Rd index 5b7774b09..23c6190ce 100644 --- a/man/JaccardRobinsonFoulds.Rd +++ b/man/JaccardRobinsonFoulds.Rd @@ -70,7 +70,7 @@ or \code{splits1} and \code{splits2}. \description{ Calculate the \href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#jaccard-robinson-foulds-metric}{Jaccard–Robinson–Foulds metric} -\insertCite{Bocker2013}{TreeDist}, a +\insertCite{Bocker2013}{BigTreeDist}, a \href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds metric}. } \details{ diff --git a/man/NyeSimilarity.Rd b/man/NyeSimilarity.Rd index 003147169..a836e48be 100644 --- a/man/NyeSimilarity.Rd +++ b/man/NyeSimilarity.Rd @@ -69,7 +69,7 @@ or \code{splits1} and \code{splits2}. \description{ \code{NyeSimilarity()} and \code{NyeSplitSimilarity()} implement the \href{https://ms609.github.io/TreeDist/articles/Robinson-Foulds.html#generalized-robinson-foulds-distances}{Generalized Robinson–Foulds} -tree comparison metric of \insertCite{Nye2006;textual}{TreeDist}. +tree comparison metric of \insertCite{Nye2006;textual}{BigTreeDist}. In short, this finds the optimal matching that pairs each branch from one tree with a branch in the second, where matchings are scored according to the size of the largest split that is consistent with both of them, From c24dc2fd6f12cbad02b305743c0e8b8e6c154876 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Tue, 26 Sep 2023 09:10:47 +0100 Subject: [PATCH 09/14] document() --- man/MatchingSplitDistance.Rd | 2 +- man/TreeDistance.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/MatchingSplitDistance.Rd b/man/MatchingSplitDistance.Rd index 068bde479..84d2a180e 100644 --- a/man/MatchingSplitDistance.Rd +++ b/man/MatchingSplitDistance.Rd @@ -52,7 +52,7 @@ or \code{splits1} and \code{splits2}. \description{ Calculate the \href{https://ms609.github.io/TreeDist/articles/Generalized-RF.html#matching-split-distance}{Matching Split Distance} -\insertCite{Bogdanowicz2012,Lin2012}{TreeDist} for unrooted binary trees. +\insertCite{Bogdanowicz2012,Lin2012}{BigTreeDist} for unrooted binary trees. } \details{ Trees need not contain identical leaves; scores are based on the leaves that diff --git a/man/TreeDistance.Rd b/man/TreeDistance.Rd index fffb9f4d6..511d38b9c 100644 --- a/man/TreeDistance.Rd +++ b/man/TreeDistance.Rd @@ -150,7 +150,7 @@ between trees (including leaving a split unpaired). The methods implemented here use the concepts of \href{https://ms609.github.io/TreeDist/articles/information.html}{entropy and information} -\insertCite{Mackay2003}{TreeDist} to assign a similarity score between each +\insertCite{Mackay2003}{BigTreeDist} to assign a similarity score between each pair of splits. The returned tree similarity measures state the amount of information, From 6a994b2420c04e8a392311a36a69a78bdbbf54f3 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Tue, 26 Sep 2023 11:21:49 +0100 Subject: [PATCH 10/14] Remotes ms609/TreeTools@more-leaves --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 2a2ce5213..bec3fb789 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -78,6 +78,8 @@ Suggests: LinkingTo: Rcpp, BigTreeTools, +Remotes: + ms609/TreeTools@more-leaves RdMacros: Rdpack VignetteBuilder: knitr Config/Needs/check: rcmdcheck From e692c229c569d0dd02c75b7953e409f6b2296a59 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Tue, 26 Sep 2023 11:24:48 +0100 Subject: [PATCH 11/14] Get BigTreeTools --- .github/workflows/memcheck.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/memcheck.yml b/.github/workflows/memcheck.yml index 3b12e207e..51f6cd212 100644 --- a/.github/workflows/memcheck.yml +++ b/.github/workflows/memcheck.yml @@ -87,6 +87,7 @@ jobs: - name: Install dependencies run: | + remotes::install_github("ms609/TreeTools", "more-leaves") remotes::install_deps(dependencies = TRUE) remotes::install_cran("devtools") shell: Rscript {0} From d75b71aff62ba08b853e219d9aa581f49aa86335 Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Tue, 26 Sep 2023 14:47:59 +0100 Subject: [PATCH 12/14] TreeTools@more-leaves --- .github/workflows/R-CMD-check.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 26c82bdc2..bdef8a508 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -93,6 +93,7 @@ jobs: run: | install.packages("pak") install.packages("pkgdepends") + pak::pkg_install("ms609/TreeTools@more-leaves") shell: Rscript {0} - name: pak bug workaround - install TreeDist (CRAN) From 6b048b0e58dbd8d71e7a7f0bb4fa222957ec8b2b Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Wed, 27 Sep 2023 09:38:31 +0100 Subject: [PATCH 13/14] Use int32 --- src/tree_distances.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 8df39eb01..80bdf8835 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -28,26 +28,26 @@ List cpp_robinson_foulds_distance (const RawMatrix x, const RawMatrix y, for (int16 i = a.n_splits; i--; ) matching[i] = NA_INTEGER; splitbit b_complement[SL_MAX_SPLITS][SL_MAX_BINS]; - for (int16 i = b.n_splits; i--; ) { - for (int16 bin = last_bin; bin--; ) { + for (int32 i = b.n_splits; i--; ) { + for (int32 bin = last_bin; bin--; ) { b_complement[i][bin] = ~b.state[i][bin]; } b_complement[i][last_bin] = b.state[i][last_bin] ^ unset_mask; } - for (int16 ai = a.n_splits; ai--; ) { - for (int16 bi = b.n_splits; bi--; ) { + for (int32 ai = a.n_splits; ai--; ) { + for (int32 bi = b.n_splits; bi--; ) { bool all_match = true, all_complement = true; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b_complement[bi][bin])) { all_complement = false; break; From b5215c38a64092f0809aaa1cf50dc88e0de63f1b Mon Sep 17 00:00:00 2001 From: SMITH <1695515+ms609@users.noreply.github.com> Date: Wed, 27 Sep 2023 09:52:13 +0100 Subject: [PATCH 14/14] int32 for .state --- src/tree_distances.cpp | 86 +++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index 80bdf8835..8af519f72 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -87,26 +87,26 @@ List cpp_robinson_foulds_info (const RawMatrix x, const RawMatrix y, /* Dynamic allocation 20% faster for 105 tips, but VLA not permitted in C11 */ splitbit b_complement[SL_MAX_SPLITS][SL_MAX_BINS]; - for (int16 i = 0; i != b.n_splits; i++) { - for (int16 bin = 0; bin != last_bin; ++bin) { + for (int32 i = 0; i != b.n_splits; i++) { + for (int32 bin = 0; bin != last_bin; ++bin) { b_complement[i][bin] = ~b.state[i][bin]; } b_complement[i][last_bin] = b.state[i][last_bin] ^ unset_mask; } - for (int16 ai = 0; ai != a.n_splits; ++ai) { - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { bool all_match = true, all_complement = true; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { if ((a.state[ai][bin] != b_complement[bi][bin])) { all_complement = false; break; @@ -115,7 +115,7 @@ List cpp_robinson_foulds_info (const RawMatrix x, const RawMatrix y, } if (all_match || all_complement) { int16 leaves_in_split = 0; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { leaves_in_split += count_bits(a.state[ai][bin]); } @@ -154,20 +154,20 @@ List cpp_matching_split_distance (const RawMatrix x, const RawMatrix y, cost** score = new cost*[most_splits]; for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - for (int16 ai = 0; ai != a.n_splits; ++ai) { - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { score[ai][bi] = 0; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { score[ai][bi] += count_bits(a.state[ai][bin] ^ b.state[bi][bin]); } if (score[ai][bi] > half_tips) score[ai][bi] = n_tips - score[ai][bi]; } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + for (int32 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { + for (int32 ai = a.n_splits; ai < most_splits; ++ai) { + for (int32 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -217,18 +217,18 @@ List cpp_jaccard_similarity (const RawMatrix x, const RawMatrix y, cost** score = new cost*[most_splits]; for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { const int16 na = a.in_split[ai], nA = n_tips - na ; - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { // x divides tips into a|A; y divides tips into b|B int16 a_and_b = 0; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); } @@ -282,12 +282,12 @@ List cpp_jaccard_similarity (const RawMatrix x, const RawMatrix y, } } } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + for (int32 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { + for (int32 ai = a.n_splits; ai < most_splits; ++ai) { + for (int32 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -332,14 +332,14 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, splitbit different[SL_MAX_BINS]; - for (int16 ai = 0; ai != a.n_splits; ++ai) { - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { int16 n_different = 0, n_a_only = 0, n_a_and_b = 0 ; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { different[bin] = a.state[ai][bin] ^ b.state[bi][bin]; n_different += count_bits(different[bin]); n_a_only += count_bits(a.state[ai][bin] & different[bin]); @@ -351,12 +351,12 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, ((max_score / max_possible) * mmsi_score(n_same, n_a_and_b, n_different, n_a_only)); } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + for (int32 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi < most_splits; ++bi) { + for (int32 ai = a.n_splits; ai < most_splits; ++ai) { + for (int32 bi = 0; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -388,10 +388,12 @@ List cpp_msi_distance (const RawMatrix x, const RawMatrix y, // [[Rcpp::export]] List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, const IntegerVector nTip) { + Rcpp::Rcout << "\n\n\n\nASOUIGHAUI SFH ASHF KLASJHF KAJSF \n\n\n\n"; if (x.cols() != y.cols()) { Rcpp::stop("Input splits must address same number of tips."); } const SplitList a(x), b(y); + Rcpp::Rcout << "\n\n\n\nSPLITS LISTED \n\n\n\n"; const bool a_has_more_splits = (a.n_splits > b.n_splits); const int16 most_splits = a_has_more_splits ? a.n_splits : b.n_splits, @@ -414,18 +416,18 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, NumericVector a_match(a.n_splits); std::unique_ptr b_match = std::make_unique(b.n_splits); - for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { if (a_match[ai]) continue; const int16 na = a.in_split[ai], nA = n_tips - na ; - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { // x divides tips into a|A; y divides tips into b|B int16 a_and_b = 0; - for (int16 bin = 0; bin != a.n_bins; ++bin) { + for (int32 bin = 0; bin != a.n_bins; ++bin) { a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]); } @@ -461,7 +463,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, ); } } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + for (int32 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -482,21 +484,21 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, if (exact_matches) { int16 a_pos = 0; - for (int16 ai = 0; ai != a.n_splits; ++ai) { + for (int32 ai = 0; ai != a.n_splits; ++ai) { if (a_match[ai]) continue; int16 b_pos = 0; - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { if (b_match[bi]) continue; score[a_pos][b_pos] = score[ai][bi]; b_pos++; } - for (int16 bi = lap_dim - a_extra_splits; bi < lap_dim; ++bi) { + for (int32 bi = lap_dim - a_extra_splits; bi < lap_dim; ++bi) { score[a_pos][bi] = max_score; } a_pos++; } - for (int16 ai = lap_dim - b_extra_splits; ai < lap_dim; ++ai) { - for (int16 bi = 0; bi != lap_dim; ++bi) { + for (int32 ai = lap_dim - b_extra_splits; ai < lap_dim; ++ai) { + for (int32 bi = 0; bi != lap_dim; ++bi) { score[ai][bi] = max_score; } } @@ -512,7 +514,7 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, std::unique_ptr lap_decode = std::make_unique(lap_dim); int16 fuzzy_match = 0; - for (int16 bi = 0; bi != b.n_splits; ++bi) { + for (int32 bi = 0; bi != b.n_splits; ++bi) { if (!b_match[bi]) { assert(fuzzy_match < lap_dim); lap_decode[fuzzy_match++] = bi + 1; @@ -547,8 +549,8 @@ List cpp_mutual_clustering (const RawMatrix x, const RawMatrix y, return List::create(Named("score") = final_score, _["matching"] = final_matching); } else { - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { + for (int32 ai = a.n_splits; ai < most_splits; ++ai) { + for (int32 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } } @@ -597,8 +599,8 @@ List cpp_shared_phylo (const RawMatrix x, const RawMatrix y, cost** score = new cost*[most_splits]; for (int16 i = most_splits; i--; ) score[i] = new cost[most_splits]; - for (int16 ai = a.n_splits; ai--; ) { - for (int16 bi = b.n_splits; bi--; ) { + for (int32 ai = a.n_splits; ai--; ) { + for (int32 bi = b.n_splits; bi--; ) { const double spi_over = spi_overlap(a.state[ai], b.state[bi], n_tips, a.in_split[ai], b.in_split[bi], a.n_bins); @@ -608,12 +610,12 @@ List cpp_shared_phylo (const RawMatrix x, const RawMatrix y, max_score; } - for (int16 bi = b.n_splits; bi < most_splits; ++bi) { + for (int32 bi = b.n_splits; bi < most_splits; ++bi) { score[ai][bi] = max_score; } } - for (int16 ai = a.n_splits; ai < most_splits; ++ai) { - for (int16 bi = 0; bi != most_splits; ++bi) { + for (int32 ai = a.n_splits; ai < most_splits; ++ai) { + for (int32 bi = 0; bi != most_splits; ++bi) { score[ai][bi] = max_score; } }