Skip to content
Open
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ cpp_mci_impl_score <- function(x, y, n_tips) {
.Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips)
}

cpp_max_tips <- function() {
.Call(`_TreeDist_cpp_max_tips`)
}

cpp_robinson_foulds_distance <- function(x, y, nTip) {
.Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
}
Expand Down
4 changes: 2 additions & 2 deletions R/transfer_consensus.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ TransferConsensus <- function(trees,
if (nTip < 4L) {
return(StarTree(tipLabels))
}
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Convert each tree to a raw split matrix (TreeTools C++ internally).
# as.Splits() will error if a tree's tips don't match tipLabels.
Expand Down Expand Up @@ -115,7 +115,7 @@ tc_profile <- function(trees, scale = TRUE, greedy = "best",
tipLabels <- TipLabels(trees[[1]])
nTip <- length(tipLabels)
if (nTip < 4L) stop("Need at least 4 tips for profiling.")
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splitsList <- lapply(trees, function(tr) unclass(as.Splits(tr, tipLabels)))

Expand Down
4 changes: 2 additions & 2 deletions R/tree_distance.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,
}
nTip <- length(tipLabels)
if (nTip < 4) return(NULL) # nocov
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splits_list <- as.Splits(tree1, tipLabels = tipLabels)
n_threads <- as.integer(getOption("mc.cores", 1L))
Expand Down Expand Up @@ -203,7 +203,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,

nTip <- length(tipLabels1)
if (nTip < 4) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splits1 <- as.Splits(tree1, tipLabels = tipLabels1)
splits2 <- as.Splits(tree2, tipLabels = tipLabels1) # Use tipLabels1 to ensure order consistency
Expand Down
5 changes: 3 additions & 2 deletions R/tree_distance_mast.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ MASTSize <- function(tree1, tree2 = tree1, rooted = TRUE) {
if (nrow(edge1) != nrow(edge2)) {
stop("Both trees must contain the same number of edges.")
}
if (nTip > 4096L) {
stop("Tree too large; please contact maintainer for advice.")
maxTips <- min(4096L, cpp_max_tips())
if (nTip > maxTips) {
stop("Trees with > ", maxTips, " tips are not yet supported for MAST.")
}
cpp_mast(edge1 - 1L, Postorder(edge2) - 1L, nTip)
}
Expand Down
4 changes: 1 addition & 3 deletions R/tree_distance_nni.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ NNIDist <- function(tree1, tree2 = tree1) {
#' @importFrom TreeTools Postorder RenumberTips
#' @importFrom ape Nnode.phylo
.NNIDistSingle <- function(tree1, tree2, nTip, ...) {
if (nTip > 32768L) {
stop("Cannot calculate NNI distance for trees with so many tips.")
}
.CheckMaxTips(nTip, "NNI")
if (nrow(tree1[["edge"]]) != nrow(tree2[["edge"]])) {
stop("Both trees must have the same number of edges. ",
"Is one rooted and the other unrooted?")
Expand Down
4 changes: 2 additions & 2 deletions R/tree_distance_transfer.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ TransferDistSplits <- function(splits1, splits2,
if (is.null(tipLabels)) return(NULL)
nTip <- length(tipLabels)
if (nTip < 4L) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Check all trees share same tip set
allLabels <- TipLabels(tree1)
Expand Down Expand Up @@ -211,7 +211,7 @@ TransferDistSplits <- function(splits1, splits2,
if (is.null(tipLabels)) return(NULL)
nTip <- length(tipLabels)
if (nTip < 4L) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Check all trees share same tip set
allLabels1 <- TipLabels(trees1)
Expand Down
46 changes: 33 additions & 13 deletions R/tree_distance_utilities.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
.CompiledTipLimit <- local({
cached <- NA_integer_
function() {
if (is.na(cached) || cached <= 0L) {
cached <<- cpp_max_tips()
}
cached
}
})

.CheckMaxTips <- function(nTip, context = "") {
if (is.na(nTip)) {
return(invisible(NULL))
}

# Compiled limit from C++ integer types (not TreeTools stack thresholds).
maxTips <- .CompiledTipLimit()
if (nTip > maxTips) {
suffix <- if (!nzchar(context)) "." else paste0(" for ", context, ".")
stop("Trees with > ", maxTips, " tips are not yet supported", suffix)
}

# NNI uses fixed-size lookup tables in C++.
if (identical(context, "NNI") && nTip > 32768L) {
stop("Trees with > 32768 tips are not yet supported for NNI.")
}
invisible(NULL)
}

#' Wrapper for tree distance calculations
#'
#' Calls tree distance functions from trees or lists of trees
Expand All @@ -11,15 +40,6 @@
#' @importFrom TreeTools as.Splits TipLabels
#' @importFrom utils combn
#' @export
# Keep in sync with C++ guard: min(SL_MAX_TIPS, int16_t::max()).
.MaxSupportedTips <- 32767L

.AssertNtipSupported <- function(nTip) {
if (!is.na(nTip) && nTip > .MaxSupportedTips) {
stop("This many tips are not (yet) supported.")
}
}

CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
reportMatching = FALSE, ...) {
supportedClasses <- c("phylo", "Splits")
Expand Down Expand Up @@ -141,7 +161,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
# Fast paths: use OpenMP batch functions when all trees share the same tip
# set and no R-level cluster has been configured. Each branch mirrors the
# generic path exactly but avoids per-pair R overhead.
.AssertNtipSupported(nTip)
.CheckMaxTips(nTip)
if (!is.na(nTip) && is.null(cluster)) {
.n_threads <- as.integer(getOption("mc.cores", 1L))
.batch_result <- if (identical(Func, MutualClusteringInfoSplits)) {
Expand Down Expand Up @@ -242,7 +262,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
#' @importFrom stats setNames
.SplitDistanceManyMany <- function(Func, splits1, splits2,
tipLabels, nTip = length(tipLabels), ...) {
.AssertNtipSupported(nTip)
.CheckMaxTips(nTip)
if (is.na(nTip)) {
tipLabels <- union(unlist(tipLabels, use.names = FALSE),
unlist(TipLabels(splits2), use.names = FALSE))
Expand Down Expand Up @@ -331,7 +351,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
#' @param checks Logical specifying whether to perform basic sanity checks to
#' avoid crashes in C++.
#' @keywords internal
#' @seealso [`CalculateTreeDistance`]
#' @seealso [`CalculateTreeDistance()`]
#' @export
.TreeDistance <- function(Func, tree1, tree2, checks = TRUE, ...) {
single1 <- inherits(tree1, "phylo")
Expand Down Expand Up @@ -413,7 +433,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
if (ncol(x) != ncol(y)) {
stop("Input splits must address same number of tips.")
}
.AssertNtipSupported(nTip)
.CheckMaxTips(nTip)
}

.CheckLabelsSame <- function(labelList) {
Expand Down
7 changes: 4 additions & 3 deletions R/tree_information.R
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,10 @@ consensus_info <- function(trees, phylo, p) {
stop("p must be >= 0.5 in consensus_info()")
}
nTip <- NTip(trees[[1]])
# CT_MAX_LEAVES = 16383 in information.h (lookup table size limit)
if (nTip > 16383L) {
stop("This many leaves are not yet supported")
# CT_MAX_LEAVES = 16383 in information.h (lookup-table size limit).
maxTips <- min(16383L, cpp_max_tips())
if (nTip > maxTips) {
stop("Trees with > ", maxTips, " tips are not yet supported for consensus info.")
}
.Call(`_TreeDist_consensus_info`, trees, phylo, p)
}
Expand Down
55 changes: 46 additions & 9 deletions inst/include/TreeDist/mutual_clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@

namespace TreeDist {

constexpr double LOG2_E = 1.4426950408889634;

// ---- Lookup tables (populated by init_lg2_tables) ----
//
// lg2[i] = log2(i) for 0 <= i <= SL_MAX_TIPS
// lg2_double_factorial[i] = log2(i!!) for 0 <= i < 2*SL_MAX_TIPS - 2
// lg2_unrooted[i] = log2((2i-5)!!) for i >= 3
// lg2_rooted = &lg2_unrooted[0] + 1 (so lg2_rooted[i] = lg2_unrooted[i+1])
//
// These are defined in mutual_clustering_impl.h.
// These are fast-path caches sized to TreeTools' stack threshold.
// For larger trees we fall back to on-the-fly computation.
// Definitions are in mutual_clustering_impl.h.

extern double lg2[SL_MAX_TIPS + 1];
extern double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
Expand All @@ -33,15 +37,48 @@ namespace TreeDist {
// computation. max_tips should be >= the largest tree size used.
void init_lg2_tables(int max_tips);

[[nodiscard]] inline double lg2_lookup(split_int x) noexcept {
if (x <= static_cast<split_int>(SL_MAX_TIPS)) {
return lg2[x];
}
return std::log2(static_cast<double>(x));
}

[[nodiscard]] inline double lg2_unrooted_lookup(split_int n_tips) noexcept {
if (n_tips <= static_cast<split_int>(SL_MAX_TIPS + 1)) {
return lg2_unrooted[n_tips];
}
if (n_tips < 3) { // LCOV_EXCL_START
return 0.0; // LCOV_EXCL_STOP
}
const double n = static_cast<double>(n_tips);
// log2((2n - 5)!!) = log2((2n - 4)!) - (n - 2) - log2((n - 2)!)
return (std::lgamma((2.0 * n) - 3.0) - std::lgamma(n - 1.0)) * LOG2_E
- (n - 2.0);
}

[[nodiscard]] inline double lg2_rooted_lookup(split_int n_tips) noexcept {
if (n_tips <= static_cast<split_int>(SL_MAX_TIPS + 1)) {
return lg2_rooted[n_tips];
}
if (n_tips < 2) { // LCOV_EXCL_START
return 0.0; // LCOV_EXCL_STOP
}
const double n = static_cast<double>(n_tips);
// log2((2n - 3)!!) = log2((2n - 2)!) - (n - 1) - log2((n - 1)!)
return (std::lgamma((2.0 * n) - 1.0) - std::lgamma(n)) * LOG2_E
- (n - 1.0);
}

// ---- Inline helpers ----

// Information content of a perfectly-matching split pair.
// ic_matching(a, b, n) = (a + b) * lg2[n] - a * lg2[a] - b * lg2[b]
[[nodiscard]] inline double ic_matching(int16 a, int16 b,
int16 n) noexcept {
const double lg2a = lg2[a];
const double lg2b = lg2[b];
const double lg2n = lg2[n];
[[nodiscard]] inline double ic_matching(split_int a, split_int b,
split_int n) noexcept {
const double lg2a = lg2_lookup(a);
const double lg2b = lg2_lookup(b);
const double lg2n = lg2_lookup(n);
return (a + b) * lg2n - a * lg2a - b * lg2b;
}

Expand Down Expand Up @@ -77,9 +114,9 @@ namespace TreeDist {
// Implementation in mutual_clustering_impl.h.

double mutual_clustering_score(
const splitbit* const* a_state, const int16* a_in, int16 a_n_splits,
const splitbit* const* b_state, const int16* b_in, int16 b_n_splits,
int16 n_bins, int32 n_tips,
const splitbit* const* a_state, const split_int* a_in, split_int a_n_splits,
const splitbit* const* b_state, const split_int* b_in, split_int b_n_splits,
split_int n_bins, int32 n_tips,
LapScratch& scratch);

} // namespace TreeDist
Expand Down
Loading
Loading