diff --git a/NEWS.md b/NEWS.md index dd667930..28642bf7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # TreeTools 2.1.0.9000 (2026-02-16) # -- Add Splits and phylo methods for `SplitInformation()` +- Add Splits and phylo methods for `SplitInformation()`. +- `SplitFrequency(reference = NULL)` returns frequency of all splits. # TreeTools 2.1.0 (2026-02-10) # diff --git a/R/Support.R b/R/Support.R index 328bc774..6bcf22f7 100644 --- a/R/Support.R +++ b/R/Support.R @@ -8,9 +8,10 @@ #' If multiple calculations are required, some time can be saved by using the #' constituent functions (see examples). #' -#' @param reference A tree of class `phylo`, a `Splits` object. -#' @param forest a list of trees of class `phylo`, or a `multiPhylo` object; or a -#' `Splits` object. See +#' @param reference A tree of class `phylo`, a `Splits` object. If `NULL`, +#' the frequencies of all splits in `forest` will be returned. +#' @param forest A list of trees of class `phylo`, or a `multiPhylo` object; +#' or a `Splits` object. See #' [vignette](https://ms609.github.io/TreeTools/articles/load-trees.html) for #' possible methods of loading trees into R. #' @@ -21,29 +22,53 @@ #' Note that the three nodes at the root of the tree correspond to a single #' split; see the example for how these might be plotted on a tree. #' +#' If `reference` is `NULL`, then `SplitFrequency()` returns a list of splits +#' (in the order encountered in forest) with attribute `"count"` stating the +#' number of times each split occurs in `forest`. +#' #' @template exampleNodeSupport #' #' @template MRS #' @family Splits operations #' @export -SplitFrequency <- function(reference, forest) { - referenceSplits <- as.Splits(reference) - refLabels <- attr(referenceSplits, "tip.label") - forest <- lapply(forest, KeepTip, refLabels) - forestSplits <- as.Splits(forest, tipLabels = refLabels) - - logicals <- vapply(forestSplits, - function(cf) referenceSplits %in% cf, - logical(length(referenceSplits))) - ret <- if (is.null(dim(logicals))) { - sum(logicals) +SplitFrequency <- function(reference, forest = NULL) { + if (is.null(reference) || is.null(forest)) { + if (is.null(forest)) forest <- reference + if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { + stop("All trees must bear identical labels") + } + forestSplits <- do.call(c, as.Splits(forest, tipLabels = TipLabels(forest[[1]]))) + dup <- duplicated(forestSplits) + ret <- forestSplits[[!dup]] + logicals <- vapply(seq_along(forestSplits), + function(cf) ret %in% forestSplits[[cf]], + logical(sum(!dup))) + count <- if (is.null(dim(logicals))) { + sum(logicals) + } else { + rowSums(logicals) + } + attr(ret, "count") <- unname(count) + ret } else { - rowSums(logicals) + referenceSplits <- as.Splits(reference) + refLabels <- attr(referenceSplits, "tip.label") + forest <- lapply(forest, KeepTip, refLabels) + forestSplits <- as.Splits(forest, tipLabels = refLabels) + + logicals <- vapply(forestSplits, + function(cf) referenceSplits %in% cf, + logical(length(referenceSplits))) + ret <- if (is.null(dim(logicals))) { + sum(logicals) + } else { + rowSums(logicals) + } + names(ret) <- rownames(referenceSplits) + + # Return: + ret } - names(ret) <- rownames(referenceSplits) - - # Return: - ret } #' Label splits diff --git a/man/SplitFrequency.Rd b/man/SplitFrequency.Rd index 9e41c9e1..f3041386 100644 --- a/man/SplitFrequency.Rd +++ b/man/SplitFrequency.Rd @@ -4,13 +4,14 @@ \alias{SplitFrequency} \title{Frequency of splits} \usage{ -SplitFrequency(reference, forest) +SplitFrequency(reference, forest = NULL) } \arguments{ -\item{reference}{A tree of class \code{phylo}, a \code{Splits} object.} +\item{reference}{A tree of class \code{phylo}, a \code{Splits} object. If \code{NULL}, +the frequencies of all splits in \code{forest} will be returned.} -\item{forest}{a list of trees of class \code{phylo}, or a \code{multiPhylo} object; or a -\code{Splits} object. See +\item{forest}{A list of trees of class \code{phylo}, or a \code{multiPhylo} object; +or a \code{Splits} object. See \href{https://ms609.github.io/TreeTools/articles/load-trees.html}{vignette} for possible methods of loading trees into R.} } @@ -21,6 +22,10 @@ If \code{reference} is a tree of class \code{phylo}, then the sequence will corr to the order of nodes (use \code{ape::nodelabels()} to view). Note that the three nodes at the root of the tree correspond to a single split; see the example for how these might be plotted on a tree. + +If \code{reference} is \code{NULL}, then \code{SplitFrequency()} returns a list of splits +(in the order encountered in forest) with attribute \code{"count"} stating the +number of times each split occurs in \code{forest}. } \description{ \code{SplitFrequency()} provides a simple way to count the number of times that diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index da72bf68..8ad2cbb2 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -6,9 +6,23 @@ test_that("Node supports calculated correctly", { swapBC = ape::read.tree(text = "((((((A,C),B),D),E),F),out);"), DbyA = ape::read.tree(text = "((((((A,D),C),B),E),F,G),out);") ) - expect_equal(c("10" = 4, "11" = 4, "12" = 4, "13" = 3), - SplitFrequency(treeSample$correct, treeSample)) - + expect_equal( + SplitFrequency(treeSample$correct, treeSample), + c("10" = 4, "11" = 4, "12" = 4, "13" = 3) + ) + + expect_error(SplitFrequency(NULL, treeSample), "must bear identical") + sameTips <- KeepTip(treeSample, TipLabels(treeSample$correct)) + sameSplits <- do.call(c, as.Splits(sameTips)) + expect_equal(SplitFrequency(sameTips), + structure(sameSplits[[!duplicated(sameSplits)]], + count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) + ) + + monoSplit <- ape::read.tree(text = "((a, b, c, d), (e, f, g));") + expect_equal(SplitFrequency(list(monoSplit)), + structure(as.Splits(monoSplit), count = 1)) + # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") freq <- SplitFrequency(balanced, treeSample)