diff --git a/.github/workflows/memcheck.yml b/.github/workflows/memcheck.yml index 2b9a3c8f..dbf58e22 100644 --- a/.github/workflows/memcheck.yml +++ b/.github/workflows/memcheck.yml @@ -6,6 +6,7 @@ on: branches: - main - master + - '**valgrind**' paths: - '.github/workflows/memcheck.yml' - 'src/**' @@ -30,7 +31,8 @@ name: mem-check jobs: mem-check: runs-on: ubuntu-24.04 - name: valgrind ${{ matrix.config.test }}, ubuntu, R release + + name: valgrind ${{ matrix.config.test }} strategy: fail-fast: false @@ -39,12 +41,13 @@ jobs: - {test: 'tests'} - {test: 'examples'} - {test: 'vignettes'} - + env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true _R_CHECK_FORCE_SUGGESTS_: false RSPM: https://packagemanager.rstudio.com/cran/__linux__/noble/latest GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + ASAN_OPTIONS: verify_asan_link_order=0 steps: - uses: ms609/actions/memcheck@main diff --git a/DESCRIPTION b/DESCRIPTION index bb031a33..504ffdb4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeDist Title: Calculate and Map Distances Between Phylogenetic Trees -Version: 2.13.0.9002 +Version: 2.13.0.9003 Authors@R: c(person("Martin R.", "Smith", email = "martin.smith@durham.ac.uk", role = c("aut", "cre", "cph", "prg"), diff --git a/NEWS.md b/NEWS.md index b3829196..a051898a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# TreeDist 2.13.0.9002 +# TreeDist 2.13.0.9003 ## New features @@ -17,6 +17,12 @@ Information) C++ implementations are now exposed via `inst/include/TreeDist/` headers, allowing downstream packages to use `LinkingTo: TreeDist`. +## Internals + +- Stack-allocated split buffers replaced with dynamically-sized vectors, + removing a hard dependency on the compile-time `SL_MAX_SPLITS` constant. + TreeDist now supports trees of any size permitted by TreeTools. + ## Performance - `RobinsonFoulds()` now uses a fast C++ batch path for cross-distance diff --git a/R/RcppExports.R b/R/RcppExports.R index 7cd2fcd0..8dd5b1a2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -275,6 +275,10 @@ cpp_mci_impl_score <- function(x, y, n_tips) { .Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips) } +cpp_sl_max_tips <- function() { + .Call(`_TreeDist_cpp_sl_max_tips`) +} + cpp_robinson_foulds_distance <- function(x, y, nTip) { .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip) } diff --git a/R/transfer_consensus.R b/R/transfer_consensus.R index 3410cf09..6cef4490 100644 --- a/R/transfer_consensus.R +++ b/R/transfer_consensus.R @@ -66,7 +66,7 @@ TransferConsensus <- function(trees, if (nTip < 4L) { return(StarTree(tipLabels)) } - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) # Convert each tree to a raw split matrix (TreeTools C++ internally). # as.Splits() will error if a tree's tips don't match tipLabels. @@ -115,7 +115,7 @@ tc_profile <- function(trees, scale = TRUE, greedy = "best", tipLabels <- TipLabels(trees[[1]]) nTip <- length(tipLabels) if (nTip < 4L) stop("Need at least 4 tips for profiling.") - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) splitsList <- lapply(trees, function(tr) unclass(as.Splits(tr, tipLabels))) diff --git a/R/tree_distance.R b/R/tree_distance.R index 11bf9e91..5ea3a9cb 100644 --- a/R/tree_distance.R +++ b/R/tree_distance.R @@ -149,7 +149,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer, } nTip <- length(tipLabels) if (nTip < 4) return(NULL) # nocov - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) splits_list <- as.Splits(tree1, tipLabels = tipLabels) n_threads <- as.integer(getOption("mc.cores", 1L)) @@ -203,7 +203,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer, nTip <- length(tipLabels1) if (nTip < 4) return(NULL) - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) splits1 <- as.Splits(tree1, tipLabels = tipLabels1) splits2 <- as.Splits(tree2, tipLabels = tipLabels1) # Use tipLabels1 to ensure order consistency diff --git a/R/tree_distance_transfer.R b/R/tree_distance_transfer.R index cd0942fd..55dce2e2 100644 --- a/R/tree_distance_transfer.R +++ b/R/tree_distance_transfer.R @@ -168,7 +168,7 @@ TransferDistSplits <- function(splits1, splits2, if (is.null(tipLabels)) return(NULL) nTip <- length(tipLabels) if (nTip < 4L) return(NULL) - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) # Check all trees share same tip set allLabels <- TipLabels(tree1) @@ -211,7 +211,7 @@ TransferDistSplits <- function(splits1, splits2, if (is.null(tipLabels)) return(NULL) nTip <- length(tipLabels) if (nTip < 4L) return(NULL) - if (nTip > 32767L) stop("This many tips are not (yet) supported.") + .CheckMaxTips(nTip) # Check all trees share same tip set allLabels1 <- TipLabels(trees1) diff --git a/R/tree_distance_utilities.R b/R/tree_distance_utilities.R index 01b0ddca..b85fab32 100644 --- a/R/tree_distance_utilities.R +++ b/R/tree_distance_utilities.R @@ -1,3 +1,19 @@ +# Validate that nTip does not exceed the compiled SL_MAX_TIPS limit. +# Called from every distance entry point before any C++ work. +.CheckMaxTips <- function(nTip) { + if (!is.na(nTip) && nTip > .SL_MAX_TIPS) { + if (.SL_MAX_TIPS < 32704L) { + stop( + "Trees with ", nTip, " tips exceed the compiled limit of ", + .SL_MAX_TIPS, " tips.", + "\nUpdate TreeTools and reinstall TreeDist to support more tips." + ) + } + stop("Trees with ", nTip, " tips are not yet supported (maximum ", + .SL_MAX_TIPS, ")") + } +} + #' Wrapper for tree distance calculations #' #' Calls tree distance functions from trees or lists of trees @@ -12,14 +28,6 @@ #' @importFrom utils combn #' @export # Keep in sync with C++ guard: min(SL_MAX_TIPS, int16_t::max()). -.MaxSupportedTips <- 32767L - -.AssertNtipSupported <- function(nTip) { - if (!is.na(nTip) && nTip > .MaxSupportedTips) { - stop("This many tips are not (yet) supported.") - } -} - CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, reportMatching = FALSE, ...) { supportedClasses <- c("phylo", "Splits") @@ -141,7 +149,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, # Fast paths: use OpenMP batch functions when all trees share the same tip # set and no R-level cluster has been configured. Each branch mirrors the # generic path exactly but avoids per-pair R overhead. - .AssertNtipSupported(nTip) + .CheckMaxTips(nTip) if (!is.na(nTip) && is.null(cluster)) { .n_threads <- as.integer(getOption("mc.cores", 1L)) .batch_result <- if (identical(Func, MutualClusteringInfoSplits)) { @@ -242,7 +250,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, #' @importFrom stats setNames .SplitDistanceManyMany <- function(Func, splits1, splits2, tipLabels, nTip = length(tipLabels), ...) { - .AssertNtipSupported(nTip) + .CheckMaxTips(nTip) if (is.na(nTip)) { tipLabels <- union(unlist(tipLabels, use.names = FALSE), unlist(TipLabels(splits2), use.names = FALSE)) @@ -331,7 +339,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, #' @param checks Logical specifying whether to perform basic sanity checks to #' avoid crashes in C++. #' @keywords internal -#' @seealso [`CalculateTreeDistance`] +#' @seealso [`CalculateTreeDistance()`] #' @export .TreeDistance <- function(Func, tree1, tree2, checks = TRUE, ...) { single1 <- inherits(tree1, "phylo") @@ -413,7 +421,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL, if (ncol(x) != ncol(y)) { stop("Input splits must address same number of tips.") } - .AssertNtipSupported(nTip) + .CheckMaxTips(nTip) } .CheckLabelsSame <- function(labelList) { diff --git a/R/zzz.R b/R/zzz.R index 826528ea..614ebbda 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,9 @@ +.SL_MAX_TIPS <- NULL # populated in .onLoad + +.onLoad <- function(libname, pkgname) { + .SL_MAX_TIPS <<- cpp_sl_max_tips() +} + .onUnload <- function(libpath) { StopParallel(quietly = TRUE) library.dynam.unload("TreeDist", libpath) diff --git a/man/dot-TreeDistance.Rd b/man/dot-TreeDistance.Rd index a704ea42..5df43547 100644 --- a/man/dot-TreeDistance.Rd +++ b/man/dot-TreeDistance.Rd @@ -14,7 +14,7 @@ avoid crashes in C++.} Calculate distance between trees, or lists of trees } \seealso{ -\code{\link{CalculateTreeDistance}} +\code{\link[=CalculateTreeDistance]{CalculateTreeDistance()}} } \author{ \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith} diff --git a/memcheck/all.R b/memcheck/all.R deleted file mode 100644 index 622f618f..00000000 --- a/memcheck/all.R +++ /dev/null @@ -1,4 +0,0 @@ -devtools::load_all() -devtools::run_examples() -devtools::build_vignettes() -devtools::test() \ No newline at end of file diff --git a/memcheck/tests.R b/memcheck/tests.R index 4456a9d4..e85bebd2 100644 --- a/memcheck/tests.R +++ b/memcheck/tests.R @@ -1,5 +1,4 @@ -# Code to be run with -# R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R -# First build and install the package. +# Code to be run with +# R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R library("TreeDist") devtools::test() diff --git a/memcheck/vignettes.R b/memcheck/vignettes.R index 74430cd1..0c02f716 100644 --- a/memcheck/vignettes.R +++ b/memcheck/vignettes.R @@ -1,5 +1,3 @@ -# Code to be run with -# R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R -# First build and install the package. -library("TreeDist") -devtools::build_vignettes() +# Code to be run with +# R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R +devtools::build_vignettes(install = FALSE) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index be5b8963..c78c3efc 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -636,6 +636,16 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// cpp_sl_max_tips +int cpp_sl_max_tips(); +RcppExport SEXP _TreeDist_cpp_sl_max_tips() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(cpp_sl_max_tips()); + return rcpp_result_gen; +END_RCPP +} // cpp_robinson_foulds_distance List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip); RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { @@ -780,6 +790,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4}, {"_TreeDist_cpp_transfer_dist_cross_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_cross_pairs, 5}, {"_TreeDist_cpp_mci_impl_score", (DL_FUNC) &_TreeDist_cpp_mci_impl_score, 3}, + {"_TreeDist_cpp_sl_max_tips", (DL_FUNC) &_TreeDist_cpp_sl_max_tips, 0}, {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3}, {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3}, {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3}, diff --git a/src/pairwise_distances.cpp b/src/pairwise_distances.cpp index c66b770f..67db48ab 100644 --- a/src/pairwise_distances.cpp +++ b/src/pairwise_distances.cpp @@ -305,6 +305,7 @@ NumericVector cpp_mutual_clustering_all_pairs( const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); @@ -397,6 +398,7 @@ NumericVector cpp_rf_info_all_pairs( const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); const int n_pairs = N * (N - 1) / 2; @@ -516,6 +518,7 @@ NumericVector cpp_msd_all_pairs( const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); const int n_pairs = N * (N - 1) / 2; @@ -618,6 +621,7 @@ NumericVector cpp_msi_all_pairs( const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); const int n_pairs = N * (N - 1) / 2; @@ -710,6 +714,7 @@ NumericVector cpp_shared_phylo_all_pairs( const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); const int n_pairs = N * (N - 1) / 2; @@ -875,6 +880,7 @@ NumericVector cpp_jaccard_all_pairs( const bool allow_conflict = true, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); if (N < 2) return NumericVector(0); const int n_pairs = N * (N - 1) / 2; @@ -944,6 +950,7 @@ NumericMatrix cpp_mutual_clustering_cross_pairs( const List& splits_a, const List& splits_b, const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -987,6 +994,7 @@ NumericMatrix cpp_rf_info_cross_pairs( const List& splits_a, const List& splits_b, const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -1027,6 +1035,7 @@ NumericMatrix cpp_msd_cross_pairs( const List& splits_a, const List& splits_b, const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -1070,6 +1079,7 @@ NumericMatrix cpp_msi_cross_pairs( const List& splits_a, const List& splits_b, const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -1110,6 +1120,7 @@ NumericMatrix cpp_shared_phylo_cross_pairs( const List& splits_a, const List& splits_b, const int n_tip, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -1153,6 +1164,7 @@ NumericMatrix cpp_jaccard_cross_pairs( const bool allow_conflict = true, const int n_threads = 1 ) { + TreeDist::check_ntip(n_tip); const int nA = splits_a.size(); const int nB = splits_b.size(); if (nA == 0 || nB == 0) return NumericMatrix(nA, nB); @@ -1206,6 +1218,7 @@ NumericVector cpp_clustering_entropy_batch( const List& splits_list, const int n_tip ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); NumericVector result(N); if (N == 0 || n_tip <= 0) return result; @@ -1239,6 +1252,7 @@ NumericVector cpp_splitwise_info_batch( const List& splits_list, const int n_tip ) { + TreeDist::check_ntip(n_tip); const int N = splits_list.size(); NumericVector result(N); if (N == 0 || n_tip < 4) return result; diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index f79b48da..0da45e65 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -29,22 +29,29 @@ namespace TreeDist { } } - void check_ntip(const double n) { - // SplitList dimensions are bounded by SL_MAX_TIPS, and current scoring - // paths use int16-sized counts internally. - static_assert(SL_MAX_TIPS <= std::numeric_limits::max(), - "SL_MAX_TIPS must fit in int32"); - constexpr int32 max_supported_tips = std::min( - int32(SL_MAX_TIPS), int32(std::numeric_limits::max()) - ); - if (n > max_supported_tips) { - Rcpp::stop("This many tips are not (yet) supported."); + void check_ntip(const int32 n) { + if (n > SL_MAX_TIPS) { + if (SL_MAX_TIPS <= 2048) { + Rcpp::stop("Trees with %d tips exceed the compiled limit of %d. " + "Update TreeTools to support more tips, then reinstall " + "TreeDist.", static_cast(n), + static_cast(SL_MAX_TIPS)); + } else { + Rcpp::stop("Trees with %d tips are not yet supported (maximum %d). ", + static_cast(n), + static_cast(SL_MAX_TIPS)); + } } } } +// [[Rcpp::export]] +int cpp_sl_max_tips() { + return static_cast(SL_MAX_TIPS); +} + using TreeDist::resize_uninitialized; inline List robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y, @@ -59,30 +66,33 @@ inline List robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y, grf_match matching(a.n_splits, NA_INTEGER); - // Heap-backed scratch avoids large fixed-size stack allocation. - std::vector b_complement(size_t(b.n_splits) * size_t(a.n_bins)); + const int32 n_bins = a.n_bins; + std::vector b_complement(b.n_splits * n_bins); for (int32 i = b.n_splits; i--; ) { + splitbit* bc_i = &b_complement[i * n_bins]; for (int32 bin = last_bin; bin--; ) { - b_complement[size_t(i) * a.n_bins + bin] = ~b.state[i][bin]; + bc_i[bin] = ~b.state[i][bin]; } - b_complement[size_t(i) * a.n_bins + last_bin] = b.state[i][last_bin] ^ unset_mask; + bc_i[last_bin] = b.state[i][last_bin] ^ unset_mask; } for (int32 ai = a.n_splits; ai--; ) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); for (int32 bi = b.n_splits; bi--; ) { bool all_match = true; bool all_complement = true; + const splitbit* bc_bi = &b_complement[bi * n_bins]; - for (int32 bin = 0; bin < a.n_bins; ++bin) { + for (int32 bin = 0; bin < n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int32 bin = 0; bin < a.n_bins; ++bin) { - if (a.state[ai][bin] != b_complement[size_t(bi) * a.n_bins + bin]) { + for (int32 bin = 0; bin < n_bins; ++bin) { + if (a.state[ai][bin] != bc_bi[bin]) { all_complement = false; break; } @@ -114,29 +124,32 @@ inline List robinson_foulds_info(const RawMatrix &x, const RawMatrix &y, grf_match matching(a.n_splits, NA_INTEGER); - // Heap-backed scratch avoids large fixed-size stack allocation. - std::vector b_complement(size_t(b.n_splits) * size_t(a.n_bins)); + const int16 n_bins = a.n_bins; + std::vector b_complement(b.n_splits * n_bins); for (int16 i = 0; i < b.n_splits; i++) { + splitbit* bc_i = &b_complement[i * n_bins]; for (int16 bin = 0; bin < last_bin; ++bin) { - b_complement[size_t(i) * a.n_bins + bin] = ~b.state[i][bin]; + bc_i[bin] = ~b.state[i][bin]; } - b_complement[size_t(i) * a.n_bins + last_bin] = b.state[i][last_bin] ^ unset_mask; + bc_i[last_bin] = b.state[i][last_bin] ^ unset_mask; } for (int16 ai = 0; ai < a.n_splits; ++ai) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); for (int16 bi = 0; bi < b.n_splits; ++bi) { bool all_match = true, all_complement = true; + const splitbit* bc_bi = &b_complement[bi * n_bins]; - for (int16 bin = 0; bin < a.n_bins; ++bin) { + for (int16 bin = 0; bin < n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin < a.n_bins; ++bin) { - if ((a.state[ai][bin] != b_complement[size_t(bi) * a.n_bins + bin])) { + for (int16 bin = 0; bin < n_bins; ++bin) { + if ((a.state[ai][bin] != bc_bi[bin])) { all_complement = false; break; } @@ -177,6 +190,7 @@ inline List matching_split_distance(const RawMatrix &x, const RawMatrix &y, cost_matrix score(most_splits); for (int16 ai = 0; ai < a.n_splits; ++ai) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); for (int16 bi = 0; bi < b.n_splits; ++bi) { splitbit total = 0; for (int16 bin = 0; bin < a.n_bins; ++bin) { @@ -236,6 +250,7 @@ inline List jaccard_similarity(const RawMatrix &x, const RawMatrix &y, cost_matrix score(most_splits); for (int16 ai = 0; ai < a.n_splits; ++ai) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); const int16 na = a.in_split[ai]; const int16 nA = n_tips - na; @@ -342,6 +357,7 @@ List msi_distance(const RawMatrix &x, const RawMatrix &y, const int32 n_tips) { splitbit different[SL_MAX_BINS]; for (int16 ai = 0; ai < a.n_splits; ++ai) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); for (int16 bi = 0; bi < b.n_splits; ++bi) { int16 n_different = 0, @@ -419,6 +435,7 @@ List mutual_clustering(const RawMatrix &x, const RawMatrix &y, std::unique_ptr b_match = std::make_unique(b.n_splits); for (int16 ai = 0; ai < a.n_splits; ++ai) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); if (a_match[ai]) continue; const int16 na = a.in_split[ai]; @@ -575,6 +592,7 @@ inline List shared_phylo (const RawMatrix &x, const RawMatrix &y, cost_matrix score(most_splits); for (int16 ai = a.n_splits; ai--; ) { + if ((ai & 1023) == 0) Rcpp::checkUserInterrupt(); for (int16 bi = b.n_splits; bi--; ) { const double spi_over = TreeDist::spi_overlap( a.state[ai], b.state[bi], n_tips, a.in_split[ai], b.in_split[bi], diff --git a/src/tree_distances.h b/src/tree_distances.h index 8d4f7b4f..1b24c2b3 100644 --- a/src/tree_distances.h +++ b/src/tree_distances.h @@ -18,6 +18,10 @@ constexpr splitbit ALL_ONES = (std::numeric_limits::max)(); namespace TreeDist { + // Validate that n_tips does not exceed the compiled SL_MAX_TIPS limit. + // Defined in tree_distances.cpp; calls Rcpp::stop() on failure. + void check_ntip(int32 n); + // Re-exported from mutual_clustering.h: // ic_matching(int16 a, int16 b, int16 n) @@ -39,7 +43,8 @@ namespace TreeDist { // Returns lg2_unrooted[x] - lg2_trees_matching_split(y, x - y) [[nodiscard]] inline double mmsi_pair_score(const int16 x, const int16 y) noexcept { - assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max()); // verify int16 ok + static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max(), + "int16 too narrow for SL_MAX_TIPS"); return lg2_unrooted[x] - (lg2_rooted[y] + lg2_rooted[x - y]); } @@ -60,7 +65,8 @@ namespace TreeDist { [[nodiscard]] inline double one_overlap(const int16 a, const int16 b, const int16 n) noexcept { - assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max()); // verify int16 ok + static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max(), + "int16 too narrow for SL_MAX_TIPS"); if (a == b) { return lg2_rooted[a] + lg2_rooted[n - a]; } @@ -71,7 +77,8 @@ namespace TreeDist { } [[nodiscard]] inline double one_overlap_notb(const int16 a, const int16 n_minus_b, const int16 n) noexcept { - assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max()); // verify int16 ok + static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits::max(), + "int16 too narrow for SL_MAX_TIPS"); const int16 b = n - n_minus_b; if (a == b) { return lg2_rooted[b] + lg2_rooted[n_minus_b]; @@ -90,7 +97,8 @@ namespace TreeDist { const int16 n_tips, const int16 in_a, const int16 in_b, const int16 n_bins) noexcept { - assert(SL_MAX_BINS <= INT16_MAX); + static_assert(SL_MAX_BINS <= INT16_MAX, + "int16 too narrow for SL_MAX_BINS"); int16 n_ab = 0; for (int16 bin = 0; bin < n_bins; ++bin) { diff --git a/tests/testthat/test-ntip-limit.R b/tests/testthat/test-ntip-limit.R new file mode 100644 index 00000000..6bee634e --- /dev/null +++ b/tests/testthat/test-ntip-limit.R @@ -0,0 +1,24 @@ +test_that(".SL_MAX_TIPS is populated", { + expect_true(is.integer(TreeDist:::.SL_MAX_TIPS)) + expect_true(TreeDist:::.SL_MAX_TIPS >= 2048L) +}) + +test_that("Trees exceeding SL_MAX_TIPS are rejected", { + limit <- TreeDist:::.SL_MAX_TIPS + # Build a mock Splits raw matrix with limit + 1 tips. + # The matrix needs: nrow = limit - 2 splits, ncol = ceil((limit + 1) / 8) + bad_nTip <- limit + 1L + n_splits <- bad_nTip - 3L + n_cols <- ceiling(bad_nTip / 8) + mock <- matrix(as.raw(0), nrow = n_splits, ncol = n_cols) + class(mock) <- c("Splits", class(mock)) + attr(mock, "nTip") <- bad_nTip + attr(mock, "tip.label") <- paste0("t", seq_len(bad_nTip)) + + expect_error( + TreeDist:::GeneralizedRF(mock, mock, bad_nTip, + TreeDist:::cpp_robinson_foulds_distance, + maximize = FALSE, reportMatching = FALSE), + "exceed" + ) +}) diff --git a/tests/testthat/test-tree_distance_utilities.R b/tests/testthat/test-tree_distance_utilities.R index 61f513d8..c453d5b7 100644 --- a/tests/testthat/test-tree_distance_utilities.R +++ b/tests/testthat/test-tree_distance_utilities.R @@ -32,23 +32,29 @@ test_that("CalculateTreeDistance() errs appropriately", { }) test_that("Tip-count guard is applied consistently", { - expect_no_error(.AssertNtipSupported(1000L)) - expect_no_error(.AssertNtipSupported(32766L)) - expect_no_error(.AssertNtipSupported(32767L)) - expect_error(.AssertNtipSupported(32768L), - "This many tips are not \\(yet\\) supported\\.") - + expect_no_error(.CheckMaxTips(1000L)) + + limit32704 <- packageVersion("TreeTools") >= "2.2.0.9002" + if (limit32704) expect_no_error(.CheckMaxTips(32704L)) + + errMsg <- if (limit32704) { + "Trees with 327.. tips are not yet supported \\(maximum 32704\\)" + } else { + "Trees with 327.. tips exceed the compiled limit of 2048" + } + expect_error(.CheckMaxTips(32705L), errMsg) + splits8 <- unclass(as.Splits(BalancedTree(8))) expect_error(cpp_robinson_foulds_distance(splits8, splits8, 32768L), - "This many tips are not \\(yet\\) supported\\.") + errMsg) expect_error(cpp_robinson_foulds_info(splits8, splits8, 32768L), - "This many tips are not \\(yet\\) supported\\.") + errMsg) trees <- list(BalancedTree(8), PectinateTree(8)) class(trees) <- "multiPhylo" expect_error( .SplitDistanceAllPairs(RobinsonFouldsSplits, trees, letters[1:8], 32768L), - "This many tips are not \\(yet\\) supported\\." + errMsg ) })