From 3b255656577119d1c325afc340beac9eeda927a1 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 06:51:51 +0000 Subject: [PATCH 1/5] SplitInformation.Splits --- DESCRIPTION | 2 +- NAMESPACE | 3 +++ NEWS.md | 6 ++++++ R/Information.R | 20 ++++++++++++++++++++ man/SplitInformation.Rd | 9 +++++++++ tests/testthat/test-information.R | 8 ++++++++ 6 files changed, 47 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index af0703721..c05e9ec88 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeTools Title: Create, Modify and Analyse Phylogenetic Trees -Version: 2.1.0 +Version: 2.1.0.9000 Authors@R: c( person("Martin R.", 'Smith', role = c("aut", "cre", "cph"), email = "martin.smith@durham.ac.uk", diff --git a/NAMESPACE b/NAMESPACE index 2a9745831..400006615 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -151,6 +151,9 @@ S3method(SortTree,phylo) S3method(SplitImbalance,"NULL") S3method(SplitImbalance,Splits) S3method(SplitImbalance,phylo) +S3method(SplitInformation,Splits) +S3method(SplitInformation,numeric) +S3method(SplitInformation,phylo) S3method(SplitsInBinaryTree,"NULL") S3method(SplitsInBinaryTree,Splits) S3method(SplitsInBinaryTree,default) diff --git a/NEWS.md b/NEWS.md index 1828eb745..dd667930e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# TreeTools 2.1.0.9000 (2026-02-16) # + +- Add Splits and phylo methods for `SplitInformation()` + + # TreeTools 2.1.0 (2026-02-10) # - Add method `RenumberTips.Splits()`. @@ -12,6 +17,7 @@ - `MatrixToPhyDat()` gains `tipLabels` parameter. - Document return value for `J1Index()`. + # TreeTools 2.0.0 (2025-09-23) # ## New functionality diff --git a/R/Information.R b/R/Information.R index 30cc4696a..c5813263f 100644 --- a/R/Information.R +++ b/R/Information.R @@ -170,9 +170,29 @@ CharacterInformation <- function(tokens) { #' @template MRS #' @export SplitInformation <- function(A, B = A[1]) { + UseMethod("SplitInformation") +} + +#' @rdname SplitInformation +#' @export +SplitInformation.numeric <- function(A, B = A[1]) { -(Log2TreesMatchingSplit(A, B) - Log2Unrooted.int(A + B)) } +#' @rdname SplitInformation +#' @export +SplitInformation.Splits <- function(A, B) { + nTip <- NTip(A) + tis <- TipsInSplits(A) + Log2Unrooted.int(nTip) - Log2Rooted.int(tis) - Log2Rooted.int(nTip - tis) +} + +#' @rdname SplitInformation +#' @export +SplitInformation.phylo <- function(A, B) { + SplitInformation(as.Splits(A)) +} + #' @rdname SplitInformation #' @param partitionSizes Integer vector specifying the number of taxa in each #' partition of a multi-partition split. diff --git a/man/SplitInformation.Rd b/man/SplitInformation.Rd index db27e2894..ddaf1c83c 100644 --- a/man/SplitInformation.Rd +++ b/man/SplitInformation.Rd @@ -2,11 +2,20 @@ % Please edit documentation in R/Information.R \name{SplitInformation} \alias{SplitInformation} +\alias{SplitInformation.numeric} +\alias{SplitInformation.Splits} +\alias{SplitInformation.phylo} \alias{MultiSplitInformation} \title{Phylogenetic information content of splitting leaves into two partitions} \usage{ SplitInformation(A, B = A[1]) +\method{SplitInformation}{numeric}(A, B = A[1]) + +\method{SplitInformation}{Splits}(A, B) + +\method{SplitInformation}{phylo}(A, B) + MultiSplitInformation(partitionSizes) } \arguments{ diff --git a/tests/testthat/test-information.R b/tests/testthat/test-information.R index d65b78f73..f50f8fad5 100644 --- a/tests/testthat/test-information.R +++ b/tests/testthat/test-information.R @@ -12,6 +12,14 @@ test_that("Trees matching splits calculated correctly", { expect_equal(log(315/10395)/-log(2), SplitInformation(3, 5)) }) +test_that("SplitInformation() handles Splits", { + t6 <- BalancedTree(6) + expect_equal(SplitInformation(t6), SplitInformation(as.Splits(t6))) + expect_equal(SplitInformation(t6), c(`8` = SplitInformation(3, 3), + `9` = SplitInformation(2, 4), + `11` = SplitInformation(4, 2))) +}) + test_that("UnrootedTreesMatchingSplit() correct", { expect_equal(NRooted(3) * NRooted(5), UnrootedTreesMatchingSplit(c(3, 5))) expect_equal(LnRooted(30) + LnRooted(50), LnUnrootedTreesMatchingSplit(30, 50)) From 2364d564f93ccad028399846ae0b82ee7f1ab089 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 08:51:10 +0000 Subject: [PATCH 2/5] SplitFreq --- NEWS.md | 3 +- R/Support.R | 54 ++++++++++++++++++++++++----------- tests/testthat/test-Support.R | 16 +++++++++-- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index dd667930e..28642bf73 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # TreeTools 2.1.0.9000 (2026-02-16) # -- Add Splits and phylo methods for `SplitInformation()` +- Add Splits and phylo methods for `SplitInformation()`. +- `SplitFrequency(reference = NULL)` returns frequency of all splits. # TreeTools 2.1.0 (2026-02-10) # diff --git a/R/Support.R b/R/Support.R index 328bc7746..12af8c0b7 100644 --- a/R/Support.R +++ b/R/Support.R @@ -8,8 +8,9 @@ #' If multiple calculations are required, some time can be saved by using the #' constituent functions (see examples). #' -#' @param reference A tree of class `phylo`, a `Splits` object. -#' @param forest a list of trees of class `phylo`, or a `multiPhylo` object; or a +#' @param reference A tree of class `phylo`, a `Splits` object. If `NULL`, +#' the frequencies of all splits in `forest` will be returned. +#' @param forest A list of trees of class `phylo`, or a `multiPhylo` object; or a #' `Splits` object. See #' [vignette](https://ms609.github.io/TreeTools/articles/load-trees.html) for #' possible methods of loading trees into R. @@ -27,23 +28,42 @@ #' @family Splits operations #' @export SplitFrequency <- function(reference, forest) { - referenceSplits <- as.Splits(reference) - refLabels <- attr(referenceSplits, "tip.label") - forest <- lapply(forest, KeepTip, refLabels) - forestSplits <- as.Splits(forest, tipLabels = refLabels) - - logicals <- vapply(forestSplits, - function(cf) referenceSplits %in% cf, - logical(length(referenceSplits))) - ret <- if (is.null(dim(logicals))) { - sum(logicals) + if (is.null(reference)) { + if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { + stop("All trees must bear identical labels") + } + forestSplits <- do.call(c, as.Splits(forest, tipLabels = TipLabels(forest[[1]]))) + dup <- duplicated(forestSplits) + ret <- forestSplits[[!dup]] + logicals <- vapply(seq_along(forestSplits), + function(cf) ret %in% forestSplits[[cf]], + logical(sum(!dup))) + count <- if (is.null(dim(logicals))) { + sum(logicals) + } else { + rowSums(logicals) + } + attr(ret, "count") <- unname(count) + ret } else { - rowSums(logicals) + referenceSplits <- as.Splits(reference) + refLabels <- attr(referenceSplits, "tip.label") + forest <- lapply(forest, KeepTip, refLabels) + forestSplits <- as.Splits(forest, tipLabels = refLabels) + + logicals <- vapply(forestSplits, + function(cf) referenceSplits %in% cf, + logical(length(referenceSplits))) + ret <- if (is.null(dim(logicals))) { + sum(logicals) + } else { + rowSums(logicals) + } + names(ret) <- rownames(referenceSplits) + + # Return: + ret } - names(ret) <- rownames(referenceSplits) - - # Return: - ret } #' Label splits diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index da72bf683..4d0bf9c03 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -6,9 +6,19 @@ test_that("Node supports calculated correctly", { swapBC = ape::read.tree(text = "((((((A,C),B),D),E),F),out);"), DbyA = ape::read.tree(text = "((((((A,D),C),B),E),F,G),out);") ) - expect_equal(c("10" = 4, "11" = 4, "12" = 4, "13" = 3), - SplitFrequency(treeSample$correct, treeSample)) - + expect_equal( + SplitFrequency(treeSample$correct, treeSample), + c("10" = 4, "11" = 4, "12" = 4, "13" = 3) + ) + + expect_error(SplitFrequency(NULL, treeSample), "must bear identical") + sameTips <- KeepTip(treeSample, TipLabels(treeSample$correct)) + sameSplits <- do.call(c, as.Splits(sameTips)) + expect_equal(SplitFrequency(NULL, sameTips), + structure(sameSplits[[!duplicated(sameSplits)]], + count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) + ) + # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") freq <- SplitFrequency(balanced, treeSample) From a54f8f67158be524972a4830dedde53af761438f Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 08:53:07 +0000 Subject: [PATCH 3/5] document --- R/Support.R | 4 ++++ man/SplitFrequency.Rd | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/R/Support.R b/R/Support.R index 12af8c0b7..23d147e76 100644 --- a/R/Support.R +++ b/R/Support.R @@ -22,6 +22,10 @@ #' Note that the three nodes at the root of the tree correspond to a single #' split; see the example for how these might be plotted on a tree. #' +#' If `reference` is `NULL`, then `SplitFrequency()` returns a list of splits +#' (in the order encountered in forest) with attribute `"count"` stating the +#' number of times each split occurs in `forest`. +#' #' @template exampleNodeSupport #' #' @template MRS diff --git a/man/SplitFrequency.Rd b/man/SplitFrequency.Rd index 9e41c9e1d..136c957fd 100644 --- a/man/SplitFrequency.Rd +++ b/man/SplitFrequency.Rd @@ -7,9 +7,10 @@ SplitFrequency(reference, forest) } \arguments{ -\item{reference}{A tree of class \code{phylo}, a \code{Splits} object.} +\item{reference}{A tree of class \code{phylo}, a \code{Splits} object. If \code{NULL}, +the frequencies of all splits in \code{forest} will be returned.} -\item{forest}{a list of trees of class \code{phylo}, or a \code{multiPhylo} object; or a +\item{forest}{A list of trees of class \code{phylo}, or a \code{multiPhylo} object; or a \code{Splits} object. See \href{https://ms609.github.io/TreeTools/articles/load-trees.html}{vignette} for possible methods of loading trees into R.} @@ -21,6 +22,10 @@ If \code{reference} is a tree of class \code{phylo}, then the sequence will corr to the order of nodes (use \code{ape::nodelabels()} to view). Note that the three nodes at the root of the tree correspond to a single split; see the example for how these might be plotted on a tree. + +If \code{reference} is \code{NULL}, then \code{SplitFrequency()} returns a list of splits +(in the order encountered in forest) with attribute \code{"count"} stating the +number of times each split occurs in \code{forest}. } \description{ \code{SplitFrequency()} provides a simple way to count the number of times that From e033e6ee46334e579876c1bc558766a2018a9603 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 08:59:00 +0000 Subject: [PATCH 4/5] null forest --- R/Support.R | 5 +++-- tests/testthat/test-Support.R | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/Support.R b/R/Support.R index 23d147e76..65177f2a1 100644 --- a/R/Support.R +++ b/R/Support.R @@ -31,8 +31,9 @@ #' @template MRS #' @family Splits operations #' @export -SplitFrequency <- function(reference, forest) { - if (is.null(reference)) { +SplitFrequency <- function(reference, forest = NULL) { + if (is.null(reference) || is.null(forest)) { + if (is.null(forest)) forest <- reference if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { stop("All trees must bear identical labels") } diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index 4d0bf9c03..ce08d3a4c 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -14,7 +14,7 @@ test_that("Node supports calculated correctly", { expect_error(SplitFrequency(NULL, treeSample), "must bear identical") sameTips <- KeepTip(treeSample, TipLabels(treeSample$correct)) sameSplits <- do.call(c, as.Splits(sameTips)) - expect_equal(SplitFrequency(NULL, sameTips), + expect_equal(SplitFrequency(sameTips), structure(sameSplits[[!duplicated(sameSplits)]], count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) ) From 8e857c698faa7dd17b20bb08d2ca99517d698ce3 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 12:30:42 +0000 Subject: [PATCH 5/5] coverage --- R/Support.R | 4 ++-- man/SplitFrequency.Rd | 6 +++--- tests/testthat/test-Support.R | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/Support.R b/R/Support.R index 65177f2a1..6bcf22f79 100644 --- a/R/Support.R +++ b/R/Support.R @@ -10,8 +10,8 @@ #' #' @param reference A tree of class `phylo`, a `Splits` object. If `NULL`, #' the frequencies of all splits in `forest` will be returned. -#' @param forest A list of trees of class `phylo`, or a `multiPhylo` object; or a -#' `Splits` object. See +#' @param forest A list of trees of class `phylo`, or a `multiPhylo` object; +#' or a `Splits` object. See #' [vignette](https://ms609.github.io/TreeTools/articles/load-trees.html) for #' possible methods of loading trees into R. #' diff --git a/man/SplitFrequency.Rd b/man/SplitFrequency.Rd index 136c957fd..f3041386b 100644 --- a/man/SplitFrequency.Rd +++ b/man/SplitFrequency.Rd @@ -4,14 +4,14 @@ \alias{SplitFrequency} \title{Frequency of splits} \usage{ -SplitFrequency(reference, forest) +SplitFrequency(reference, forest = NULL) } \arguments{ \item{reference}{A tree of class \code{phylo}, a \code{Splits} object. If \code{NULL}, the frequencies of all splits in \code{forest} will be returned.} -\item{forest}{A list of trees of class \code{phylo}, or a \code{multiPhylo} object; or a -\code{Splits} object. See +\item{forest}{A list of trees of class \code{phylo}, or a \code{multiPhylo} object; +or a \code{Splits} object. See \href{https://ms609.github.io/TreeTools/articles/load-trees.html}{vignette} for possible methods of loading trees into R.} } diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index ce08d3a4c..8ad2cbb25 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -19,6 +19,10 @@ test_that("Node supports calculated correctly", { count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) ) + monoSplit <- ape::read.tree(text = "((a, b, c, d), (e, f, g));") + expect_equal(SplitFrequency(list(monoSplit)), + structure(as.Splits(monoSplit), count = 1)) + # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") freq <- SplitFrequency(balanced, treeSample)