Skip to content

Commit e9b4ce6

Browse files
authored
Draft implementation of HMI (#161)
1 parent 46b4550 commit e9b4ce6

39 files changed

Lines changed: 1756 additions & 33 deletions

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
CONTRIBUTING
1010
README.md
1111
^\.github
12+
^\.lintr
1213
cran-comments.md
1314
man-roxygen
1415
data-raw

.github/workflows/benchmark.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,28 @@ name: Benchmark
33
on:
44
workflow_dispatch:
55
pull_request:
6+
paths:
7+
- "src/**"
8+
- "R/**"
9+
- "**.R"
10+
- "**.cpp"
11+
- "**.c"
12+
- "**.h"
13+
- "**.hpp"
14+
- "configure*"
15+
- "Makevars*"
616
paths-ignore:
17+
- "DESCRIPTION"
18+
- ".**"
719
- "Meta**"
820
- "memcheck**"
921
- "docs**"
22+
- "inst**"
23+
- "man**"
24+
- "man-roxygen**"
25+
- "memcheck**"
26+
- "tests**"
27+
- "vignettes**"
1028
- "**.git"
1129
- "**.json"
1230
- "**.md"

.github/workflows/memcheck.yml

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,8 @@ on:
3434
name: mem-check
3535

3636
jobs:
37-
mem-check:
38-
runs-on: ubuntu-20.04
39-
# stringi requires libicui18n - apt get libicu-dev too recent,
40-
# libicu66 deprecated in ubuntu 22.04
41-
# Reinstalling stringi seems not to help
42-
37+
mem-check-templated:
38+
runs-on: ubuntu-24.04
4339
name: valgrind ${{ matrix.config.test }}, ubuntu, R release
4440

4541
strategy:
@@ -53,10 +49,32 @@ jobs:
5349
env:
5450
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
5551
_R_CHECK_FORCE_SUGGESTS_: false
56-
RSPM: https://packagemanager.rstudio.com/cran/__linux__/focal/latest
52+
RSPM: https://packagemanager.rstudio.com/cran/__linux__/noble/latest
5753
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
5854

5955
steps:
56+
- uses: ms609/actions/memcheck@main
57+
with:
58+
test: ${{ matrix.config.test}}
59+
60+
mem-check-legacy:
61+
runs-on: ubuntu-24.04
62+
name: valgrind ${{ matrix.config.test }}, ubuntu, R release
63+
64+
strategy:
65+
fail-fast: false
66+
matrix:
67+
config:
68+
- {test: 'tests'}
69+
- {test: 'examples'}
70+
- {test: 'vignettes'}
71+
72+
env:
73+
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
74+
_R_CHECK_FORCE_SUGGESTS_: false
75+
RSPM: https://packagemanager.rstudio.com/cran/__linux__/noble/latest
76+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
77+
6078
- uses: actions/checkout@v5
6179

6280
- uses: r-lib/actions/setup-r@v2

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ src/*.dll
1818
*.utf8.md
1919
/* *.Rd */
2020
revdep/
21+
inst/__pycache__*
2122
vignettes/*.html
2223
vignettes/*.pdf
2324
vignettes/*_files

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: TreeDist
22
Type: Package
33
Title: Calculate and Map Distances Between Phylogenetic Trees
4-
Version: 2.10.1.9001
4+
Version: 2.10.1.9002
55
Authors@R: c(person("Martin R.", "Smith",
66
email = "martin.smith@durham.ac.uk",
77
role = c("aut", "cre", "cph", "prg"),
@@ -23,6 +23,8 @@ Description: Implements measures of tree similarity, including
2323
including the Nye et al. (2006) metric <doi:10.1093/bioinformatics/bti720>;
2424
the Matching Split Distance (Bogdanowicz & Giaro 2012)
2525
<doi:10.1109/TCBB.2011.48>;
26+
the Hierarchical Mutual Information (Perotti et al. 2015)
27+
<doi:10.1103/PhysRevE.92.062825>;
2628
Maximum Agreement Subtree distances;
2729
the Kendall-Colijn (2016) distance <doi:10.1093/molbev/msw124>, and the
2830
Nearest Neighbour Interchange (NNI) distance, approximated per Li et al.

NAMESPACE

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ S3method(NNIDiameter,list)
2727
S3method(NNIDiameter,multiPhylo)
2828
S3method(NNIDiameter,numeric)
2929
S3method(NNIDiameter,phylo)
30+
S3method(NTip,HPart)
31+
S3method(RenumberTips,HPart)
3032
S3method(SPRDist,list)
3133
S3method(SPRDist,multiPhylo)
3234
S3method(SPRDist,phylo)
@@ -35,8 +37,17 @@ S3method(SplitwiseInfo,Splits)
3537
S3method(SplitwiseInfo,list)
3638
S3method(SplitwiseInfo,multiPhylo)
3739
S3method(SplitwiseInfo,phylo)
40+
S3method(as.HPart,HPart)
41+
S3method(as.HPart,default)
42+
S3method(as.HPart,list)
43+
S3method(as.HPart,phylo)
44+
S3method(as.phylo,HPart)
45+
S3method(clone,HPart)
3846
S3method(median,multiPhylo)
47+
S3method(plot,HPart)
48+
S3method(print,HPart)
3949
export(.TreeDistance)
50+
export(AHMI)
4051
export(AllSplitPairings)
4152
export(CalculateTreeDistance)
4253
export(ClusteringEntropy)
@@ -49,10 +60,15 @@ export(DifferentPhylogeneticInfo)
4960
export(DisplayMatching)
5061
export(DistFromMed)
5162
export(DistanceFromMedian)
63+
export(EHMI)
5264
export(Entropy)
5365
export(ExpectedVariation)
5466
export(GeneralizedRF)
5567
export(GetParallel)
68+
export(HH)
69+
export(HMI)
70+
export(HierarchicalMutualInfo)
71+
export(HierarchicalMutualInformation)
5672
export(InfoRobinsonFoulds)
5773
export(InfoRobinsonFouldsSplits)
5874
export(Islands)
@@ -105,6 +121,7 @@ export(RobinsonFouldsInfo)
105121
export(RobinsonFouldsMatching)
106122
export(RobinsonFouldsSplits)
107123
export(SPRDist)
124+
export(SelfHMI)
108125
export(SetParallel)
109126
export(SharedPhylogeneticInfo)
110127
export(SharedPhylogeneticInfoSplits)
@@ -127,7 +144,10 @@ export(TreeDistance)
127144
export(TreesConsistentWithTwoSplits)
128145
export(VisualiseMatching)
129146
export(VisualizeMatching)
147+
export(as.HPart)
148+
export(clone)
130149
export(entropy_int)
150+
export(is.HPart)
131151
importFrom(Rdpack,reprompt)
132152
importFrom(TreeTools,AllAncestors)
133153
importFrom(TreeTools,DropTip)
@@ -141,6 +161,7 @@ importFrom(TreeTools,Log2Unrooted)
141161
importFrom(TreeTools,Log2Unrooted.int)
142162
importFrom(TreeTools,MSTEdges)
143163
importFrom(TreeTools,MSTLength)
164+
importFrom(TreeTools,MatchStrings)
144165
importFrom(TreeTools,NRooted)
145166
importFrom(TreeTools,NSplits)
146167
importFrom(TreeTools,NTip)
@@ -162,6 +183,7 @@ importFrom(TreeTools,as.ClusterTable)
162183
importFrom(TreeTools,as.Splits)
163184
importFrom(TreeTools,edge_to_splits)
164185
importFrom(ape,Nnode.phylo)
186+
importFrom(ape,as.phylo)
165187
importFrom(ape,drop.tip)
166188
importFrom(ape,edgelabels)
167189
importFrom(ape,nodelabels)

NEWS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
# TreeDist 2.10.1.9001 (development)
1+
# TreeDist 2.10.1.9002 (development)
22

3+
- `HierarchicalMutualInformation()` calculates the information shared between
4+
pairs of hierarchical partition structures <doi:10.1103/PhysRevE.92.062825>.
35
- Fix crash in `robinson_foulds_all_pairs()` and `RobinsonFoulds(list)`.
46
- Fix bug in calculation of `MutualClusteringInfo()`: greedy optimization
57
was not guaranteed to find globally optimal matching, causing distances to be

R/HPart.R

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#' Hierarchical partition structure
2+
#'
3+
#' A structure of class `HPart` comprises a pointer to a C++ representation of
4+
#' hierarchical partitions, with the attribute `tip.label` recording the
5+
#' character labels of its leaves. `HPart` objects with identical tip labels
6+
#' can be compared using [`HierarchicalMutualInfo()`].
7+
#'
8+
#'
9+
#' An `HPart` object may be created from various representations of hierarchical
10+
#' structures:
11+
#'
12+
#' - a tree (possibly phylogenetic) of class `phylo`
13+
#' - A hierarchical list of lists, in which elements are represented by integers
14+
#' 1\dots{}n
15+
#' - A vector, which will be interpreted as a flat structure
16+
#' in which all elements bearing the same label are assigned to the same cluster
17+
#'
18+
#' @param tree An object to convert to an HPart structure, in a supported format
19+
#' (see details).
20+
#' @returns `HPart()` returns a structure containing a pointer to a C++
21+
#' representation of a hierarchical partition structure.
22+
#' @name HPart
23+
#' @export
24+
as.HPart <- function(tree, tipLabels) {
25+
UseMethod("as.HPart")
26+
}
27+
28+
#' @export
29+
#' @rdname HPart
30+
as.HPart.HPart <- function(tree, tipLabels = NULL) {
31+
if (is.null(tipLabels) || identical(tipLabels, TipLabels(tree))) {
32+
tree
33+
} else {
34+
RenumberTips(tree, tipLabels)
35+
}
36+
}
37+
38+
#' @rdname HPart
39+
#' @export
40+
as.HPart.default <- function(tree, tipLabels = NULL) {
41+
if (is.null(dim(tree))) {
42+
structure(build_hpart_from_list(
43+
lapply(unique(tree), function(x) as.list(which(tree == x))),
44+
length(tree)),
45+
tip.label = seq_along(tree),
46+
class = "HPart"
47+
)
48+
} else {
49+
stop("no applicable method for 'as.HPart' applied to an object of class \"",
50+
paste(class(tree), collapse = "\", \""), "\"")
51+
}
52+
}
53+
54+
55+
#' @rdname HPart
56+
#' @export
57+
as.HPart.list <- function(tree, tipLabels = NULL) {
58+
# Flatten to verify leaves
59+
leaves <- unlist(tree, recursive = TRUE)
60+
if (!all(is.numeric(leaves)) || any(leaves != as.integer(leaves))) {
61+
stop("All leaves must be integers.")
62+
}
63+
tree <- rapply(tree, as.integer, how = "replace")
64+
if (0 %in% leaves) {
65+
tree <- rapply(tree, function(x) x + 1L, how = "replace")
66+
leaves <- leaves + 1
67+
}
68+
n_tip <- length(unique(leaves))
69+
expected <- seq_len(n_tip)
70+
if (!isTRUE(all.equal(sort(leaves), expected))) {
71+
stop("Leaves must contain each integer 1..n exactly once")
72+
}
73+
74+
hpart_ptr <- build_hpart_from_list(tree, n_tip)
75+
ret <- structure(hpart_ptr, tip.label = as.character(expected), class = "HPart")
76+
if (!is.null(tipLabels) && !is.list(tipLabels)) {
77+
RenumberTips(ret, tipLabels)
78+
}
79+
ret
80+
}
81+
82+
#' @export
83+
#' @inheritParams TreeTools::as.ClusterTable
84+
#' @rdname HPart
85+
as.HPart.phylo <- function(tree, tipLabels = TipLabels(tree)) {
86+
if (!identical(TipLabels(tree), tipLabels)) {
87+
tree <- RenumberTips(tree, tipLabels)
88+
}
89+
structure(build_hpart_from_phylo(tree), tip.label = tipLabels,
90+
class = "HPart")
91+
}
92+
93+
#' @rdname HPart
94+
#' @export
95+
is.HPart <- function(x) {
96+
inherits(x, "HPart")
97+
}
98+
99+
#' @export
100+
NTip.HPart <- function(phy) {
101+
length(TipLabels(phy))
102+
}
103+
104+
#' @rdname HPart
105+
#' @export
106+
print.HPart <- function(x, ...) {
107+
nTip <- NTip(x)
108+
tips <- TipLabels(x)
109+
cat("Hierarchical partition on", nTip, "leaves: ")
110+
if (nTip > 5) {
111+
cat(paste0(c(tips[1:2], "...", tips[length(tips) - 1:0]), collapse = ", "))
112+
} else {
113+
cat(paste0(tips, collapse = ", "))
114+
}
115+
}
116+
117+
#' @rdname HPart
118+
#' @importFrom ape as.phylo
119+
#' @export
120+
as.phylo.HPart <- function(x, ...) {
121+
edge <- hpart_to_edge(x)
122+
labels <- TipLabels(x)
123+
nNode <- dim(edge)[[1]] - length(labels) + 1
124+
structure(list(edge = edge, Nnode = nNode, tip.label = labels),
125+
class = "phylo",
126+
order = "cladewise")
127+
}
128+
129+
#' @rdname HPart
130+
#' @param x `HPart` object to plot.
131+
#' @param \dots Additional arguments to \code{\link[ape:plot.phylo]{plot.phylo}}.
132+
#' @export
133+
plot.HPart <- function(x, ...) {
134+
plot(as.phylo(x), ...)
135+
}
136+
137+
#' Clone / duplicate an object
138+
#' `clone()` physically duplicates objects
139+
#' @param x the object to be cloned
140+
#' @param \dots additional parameters for methods
141+
#' @return `clone()` typically returns an object of the same class and "value"
142+
#' as the input `x`.
143+
#' @export
144+
clone <- function(x, ...) UseMethod("clone")
145+
146+
#' @template MRS
147+
#' @rdname clone
148+
#' @inheritParams TreeTools::as.ClusterTable
149+
#' @export
150+
clone.HPart <- function(x, tipLabels = attr(x, "tip.label"), ...) {
151+
structure(clone_hpart(x), tip.label = tipLabels,
152+
class = "HPart")
153+
}
154+
155+
#' @importFrom TreeTools MatchStrings
156+
#' @inheritParams TreeTools::RenumberTips
157+
#' @export
158+
RenumberTips.HPart <- function(tree, tipOrder) {
159+
startOrder <- TipLabels(tree)
160+
newOrder <- MatchStrings(TipLabels(tipOrder, single = TRUE), startOrder)
161+
162+
if (!identical(newOrder, startOrder)) {
163+
if (length(newOrder) != length(startOrder)) {
164+
stop("Tree labels ", paste0(setdiff(startOrder, tipOrder), collapse = ", "),
165+
" missing from `tipOrder`")
166+
}
167+
newIndices <- match(newOrder, startOrder)
168+
tree <- clone(tree, newOrder)
169+
relabel_hpart(tree, newIndices - 1L)
170+
# Return:
171+
tree
172+
} else {
173+
clone(tree)
174+
}
175+
}

0 commit comments

Comments
 (0)