From 822d8c41ba46b2556c7d0a275e9eff48d29061cb Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Wed, 18 Mar 2026 14:04:01 +0000 Subject: [PATCH 1/3] Batch renumberTips --- .positai/settings.json | 3 +- NEWS.md | 2 ++ R/RcppExports.R | 4 +++ R/tree_numbering.R | 51 ++++++++++++++++++++++++++-- src/RcppExports.cpp | 14 ++++++++ src/renumber_tips.cpp | 35 +++++++++++++++++++ tests/testthat/test-tree_numbering.R | 23 +++++++++++++ 7 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 src/renumber_tips.cpp diff --git a/.positai/settings.json b/.positai/settings.json index 500757de..645911e2 100644 --- a/.positai/settings.json +++ b/.positai/settings.json @@ -14,6 +14,7 @@ }, "model": { "id": "claude-opus-4-6", - "provider": "positai" + "provider": "positai", + "thinkingEffort": "high" } } \ No newline at end of file diff --git a/NEWS.md b/NEWS.md index 5d1315bb..541b04d3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ replacing iterative R while-loop. - `duplicated.Splits()` uses hash-based O(n) de-duplication, replacing O(n²) pairwise comparison. +- `RenumberTips.multiPhylo()` applies tip permutation in a single C++ call, + avoiding per-tree overhead. ## Fixes diff --git a/R/RcppExports.R b/R/RcppExports.R index 27743d54..a2654bde 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -93,6 +93,10 @@ path_lengths <- function(edge, weight, init_nas) { .Call(`_TreeTools_path_lengths`, edge, weight, init_nas) } +renumber_tips_batch <- function(trees, perm, n_tip) { + .Call(`_TreeTools_renumber_tips_batch`, trees, perm, n_tip) +} + cpp_edge_to_splits <- function(edge, order, nTip) { .Call(`_TreeTools_cpp_edge_to_splits`, edge, order, nTip) } diff --git a/R/tree_numbering.R b/R/tree_numbering.R index 49264c46..15829680 100644 --- a/R/tree_numbering.R +++ b/R/tree_numbering.R @@ -665,10 +665,55 @@ RenumberTips.Splits <- function(tree, tipOrder) { RenumberTips.multiPhylo <- function(tree, tipOrder) { at <- attributes(tree) labelled <- !is.null(at[["TipLabel"]]) - tree <- lapply(tree, RenumberTips.phylo, tipOrder) - if (labelled) { - at[["TipLabel"]] <- TipLabels(tipOrder) + + startOrder <- if (labelled) at[["TipLabel"]] else tree[[1L]][["tip.label"]] + newOrder <- if (is.numeric(tipOrder)) { + startOrder[tipOrder] + } else { + TipLabels(tipOrder, single = TRUE) + } + + if (identical(startOrder, newOrder)) return(tree) + + if (any(duplicated(newOrder))) { + stop("Tree labels ", + paste0(newOrder[duplicated(newOrder)], collapse = ", "), + " repeated in `tipOrder`") + } + + if (length(startOrder) != length(newOrder)) { + startOnly <- setdiff(startOrder, newOrder) + newOnly <- setdiff(newOrder, startOrder) + if (length(startOnly)) { + stop("Tree labels and tipOrder must match.", + if (length(newOnly)) "\n Missing in `tree`: ", + paste0(newOnly, collapse = ", "), + if (length(startOnly)) "\n Missing in `tipOrder`: ", + paste0(startOnly, collapse = ", ") + ) + } + newOrder <- intersect(newOrder, startOrder) } + + nTip <- length(startOrder) + matchOrder <- match(startOrder, newOrder) + if (any(is.na(matchOrder))) { + stop("Tree labels ", + paste0(startOrder[is.na(matchOrder)], collapse = ", "), + " missing from `tipOrder`") + } + + new_edges <- .Call(`_TreeTools_renumber_tips_batch`, tree, matchOrder, nTip) + + for (i in seq_along(tree)) { + tree[[i]][["edge"]] <- new_edges[[i]] + if (!labelled) tree[[i]][["tip.label"]] <- newOrder + if (identical(attr(tree[[i]], "order"), "preorder")) { + attr(tree[[i]], "order") <- "cladewise" + } + } + + if (labelled) at[["TipLabel"]] <- newOrder attributes(tree) <- at tree } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 4cf5bd0f..abb10e80 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -298,6 +298,19 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// renumber_tips_batch +Rcpp::List renumber_tips_batch(Rcpp::List trees, const Rcpp::IntegerVector perm, int n_tip); +RcppExport SEXP _TreeTools_renumber_tips_batch(SEXP treesSEXP, SEXP permSEXP, SEXP n_tipSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type trees(treesSEXP); + Rcpp::traits::input_parameter< const Rcpp::IntegerVector >::type perm(permSEXP); + Rcpp::traits::input_parameter< int >::type n_tip(n_tipSEXP); + rcpp_result_gen = Rcpp::wrap(renumber_tips_batch(trees, perm, n_tip)); + return rcpp_result_gen; +END_RCPP +} // cpp_edge_to_splits Rcpp::RawMatrix cpp_edge_to_splits(const Rcpp::IntegerMatrix& edge, const Rcpp::IntegerVector& order, const Rcpp::IntegerVector& nTip); RcppExport SEXP _TreeTools_cpp_edge_to_splits(SEXP edgeSEXP, SEXP orderSEXP, SEXP nTipSEXP) { @@ -532,6 +545,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_n_cherries_wrapper", (DL_FUNC) &_TreeTools_n_cherries_wrapper, 3}, {"_TreeTools_node_depth_unrooted", (DL_FUNC) &_TreeTools_node_depth_unrooted, 4}, {"_TreeTools_path_lengths", (DL_FUNC) &_TreeTools_path_lengths, 3}, + {"_TreeTools_renumber_tips_batch", (DL_FUNC) &_TreeTools_renumber_tips_batch, 3}, {"_TreeTools_cpp_edge_to_splits", (DL_FUNC) &_TreeTools_cpp_edge_to_splits, 3}, {"_TreeTools_duplicated_splits", (DL_FUNC) &_TreeTools_duplicated_splits, 2}, {"_TreeTools_mask_splits", (DL_FUNC) &_TreeTools_mask_splits, 1}, diff --git a/src/renumber_tips.cpp b/src/renumber_tips.cpp new file mode 100644 index 00000000..3e12e0d7 --- /dev/null +++ b/src/renumber_tips.cpp @@ -0,0 +1,35 @@ +#include +using namespace Rcpp; + +// Apply a precomputed tip permutation to every tree's edge matrix in batch. +// perm: 1-indexed permutation vector (matchOrder from R's match()) +// n_tip: number of tips +// Returns a list of new (cloned) edge matrices with remapped tip indices. +// [[Rcpp::export]] +Rcpp::List renumber_tips_batch( + Rcpp::List trees, + const Rcpp::IntegerVector perm, + int n_tip +) { + const int n_trees = trees.size(); + Rcpp::List result(n_trees); + + for (int i = 0; i < n_trees; ++i) { + Rcpp::List tree_i = Rcpp::as(trees[i]); + Rcpp::IntegerMatrix edge = Rcpp::clone( + Rcpp::as(tree_i["edge"]) + ); + const int n_edge = edge.nrow(); + + for (int j = 0; j < n_edge; ++j) { + int& child = edge(j, 1); + if (child <= n_tip) { + child = perm[child - 1]; + } + } + + result[i] = edge; + } + + return result; +} diff --git a/tests/testthat/test-tree_numbering.R b/tests/testthat/test-tree_numbering.R index 6aff8e2b..85f2d61a 100644 --- a/tests/testthat/test-tree_numbering.R +++ b/tests/testthat/test-tree_numbering.R @@ -167,6 +167,29 @@ test_that("RenumberTips() works correctly", { }) +test_that("RenumberTips.multiPhylo() batch matches per-tree", { + set.seed(7429) + trees <- c( + replicate(50, RandomTree(12, root = TRUE), simplify = FALSE), + replicate(50, PectinateTree(12), simplify = FALSE) + ) + class(trees) <- "multiPhylo" + target <- sort(TipLabels(trees[[1]])) + + batch <- RenumberTips(trees, target) + per_tree <- structure( + lapply(trees, RenumberTips.phylo, target), + class = "multiPhylo" + ) + + for (i in seq_along(batch)) { + expect_equal(batch[[i]][["edge"]], per_tree[[i]][["edge"]], + info = paste("tree", i)) + expect_equal(batch[[i]][["tip.label"]], per_tree[[i]][["tip.label"]], + info = paste("tree", i)) + } +}) + test_that("postorder_order() works", { edg7 <- BalancedTree(7)$edge expect_postorder(edg7[postorder_order(edg7), ]) From bc054637f87b91e043e5ad4d212ddfbbfcc3e062 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Wed, 18 Mar 2026 14:14:27 +0000 Subject: [PATCH 2/3] Label in C++ --- R/RcppExports.R | 4 ++-- R/tree_numbering.R | 11 ++--------- src/RcppExports.cpp | 9 +++++---- src/renumber_tips.cpp | 32 ++++++++++++++++++++++++-------- 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index a2654bde..ee96e8d0 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -93,8 +93,8 @@ path_lengths <- function(edge, weight, init_nas) { .Call(`_TreeTools_path_lengths`, edge, weight, init_nas) } -renumber_tips_batch <- function(trees, perm, n_tip) { - .Call(`_TreeTools_renumber_tips_batch`, trees, perm, n_tip) +renumber_tips_batch <- function(trees, perm, n_tip, new_labels) { + .Call(`_TreeTools_renumber_tips_batch`, trees, perm, n_tip, new_labels) } cpp_edge_to_splits <- function(edge, order, nTip) { diff --git a/R/tree_numbering.R b/R/tree_numbering.R index 15829680..1e95b8c3 100644 --- a/R/tree_numbering.R +++ b/R/tree_numbering.R @@ -703,15 +703,8 @@ RenumberTips.multiPhylo <- function(tree, tipOrder) { " missing from `tipOrder`") } - new_edges <- .Call(`_TreeTools_renumber_tips_batch`, tree, matchOrder, nTip) - - for (i in seq_along(tree)) { - tree[[i]][["edge"]] <- new_edges[[i]] - if (!labelled) tree[[i]][["tip.label"]] <- newOrder - if (identical(attr(tree[[i]], "order"), "preorder")) { - attr(tree[[i]], "order") <- "cladewise" - } - } + tree <- .Call(`_TreeTools_renumber_tips_batch`, tree, matchOrder, nTip, + newOrder) if (labelled) at[["TipLabel"]] <- newOrder attributes(tree) <- at diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index abb10e80..123c171a 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -299,15 +299,16 @@ BEGIN_RCPP END_RCPP } // renumber_tips_batch -Rcpp::List renumber_tips_batch(Rcpp::List trees, const Rcpp::IntegerVector perm, int n_tip); -RcppExport SEXP _TreeTools_renumber_tips_batch(SEXP treesSEXP, SEXP permSEXP, SEXP n_tipSEXP) { +Rcpp::List renumber_tips_batch(Rcpp::List trees, const Rcpp::IntegerVector perm, int n_tip, const Rcpp::CharacterVector new_labels); +RcppExport SEXP _TreeTools_renumber_tips_batch(SEXP treesSEXP, SEXP permSEXP, SEXP n_tipSEXP, SEXP new_labelsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::List >::type trees(treesSEXP); Rcpp::traits::input_parameter< const Rcpp::IntegerVector >::type perm(permSEXP); Rcpp::traits::input_parameter< int >::type n_tip(n_tipSEXP); - rcpp_result_gen = Rcpp::wrap(renumber_tips_batch(trees, perm, n_tip)); + Rcpp::traits::input_parameter< const Rcpp::CharacterVector >::type new_labels(new_labelsSEXP); + rcpp_result_gen = Rcpp::wrap(renumber_tips_batch(trees, perm, n_tip, new_labels)); return rcpp_result_gen; END_RCPP } @@ -545,7 +546,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_n_cherries_wrapper", (DL_FUNC) &_TreeTools_n_cherries_wrapper, 3}, {"_TreeTools_node_depth_unrooted", (DL_FUNC) &_TreeTools_node_depth_unrooted, 4}, {"_TreeTools_path_lengths", (DL_FUNC) &_TreeTools_path_lengths, 3}, - {"_TreeTools_renumber_tips_batch", (DL_FUNC) &_TreeTools_renumber_tips_batch, 3}, + {"_TreeTools_renumber_tips_batch", (DL_FUNC) &_TreeTools_renumber_tips_batch, 4}, {"_TreeTools_cpp_edge_to_splits", (DL_FUNC) &_TreeTools_cpp_edge_to_splits, 3}, {"_TreeTools_duplicated_splits", (DL_FUNC) &_TreeTools_duplicated_splits, 2}, {"_TreeTools_mask_splits", (DL_FUNC) &_TreeTools_mask_splits, 1}, diff --git a/src/renumber_tips.cpp b/src/renumber_tips.cpp index 3e12e0d7..11285483 100644 --- a/src/renumber_tips.cpp +++ b/src/renumber_tips.cpp @@ -1,34 +1,50 @@ #include using namespace Rcpp; -// Apply a precomputed tip permutation to every tree's edge matrix in batch. -// perm: 1-indexed permutation vector (matchOrder from R's match()) -// n_tip: number of tips -// Returns a list of new (cloned) edge matrices with remapped tip indices. +// Apply a precomputed tip permutation to every tree in batch. +// Returns a plain list of modified phylo objects (shallow-cloned, with new +// edge matrices, updated tip.label, and "preorder" downgraded to "cladewise"). // [[Rcpp::export]] Rcpp::List renumber_tips_batch( Rcpp::List trees, const Rcpp::IntegerVector perm, - int n_tip + int n_tip, + const Rcpp::CharacterVector new_labels ) { const int n_trees = trees.size(); Rcpp::List result(n_trees); for (int i = 0; i < n_trees; ++i) { - Rcpp::List tree_i = Rcpp::as(trees[i]); + // Shallow-clone the phylo list so other components are shared + Rcpp::List tree_i = Rcpp::clone( + Rcpp::as(trees[i]) + ); + + // Clone and permute the edge matrix Rcpp::IntegerMatrix edge = Rcpp::clone( Rcpp::as(tree_i["edge"]) ); const int n_edge = edge.nrow(); - for (int j = 0; j < n_edge; ++j) { int& child = edge(j, 1); if (child <= n_tip) { child = perm[child - 1]; } } + tree_i["edge"] = edge; + + // Replace tip labels (shared across all output trees) + tree_i["tip.label"] = new_labels; + + // Downgrade "preorder" to "cladewise" + if (tree_i.hasAttribute("order")) { + Rcpp::CharacterVector ord = tree_i.attr("order"); + if (ord[0] == "preorder") { + tree_i.attr("order") = Rcpp::CharacterVector::create("cladewise"); + } + } - result[i] = edge; + result[i] = tree_i; } return result; From d1fb6a31dc168a04c1b1f75d8e46af7377fb1950 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Wed, 18 Mar 2026 14:29:02 +0000 Subject: [PATCH 3/3] Cover additions --- AGENTS.md | 4 +++- tests/testthat/test-tree_numbering.R | 32 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 4b42e822..f01ca11f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,7 +34,9 @@ binary 0/1 in an underlying `raw` object. before moving on to the next task. - Increment the `.900X` dev version suffix in `DESCRIPTION` with each `NEWS.md` update. -- Check that existing tests cover all new code. (The GHA test suite uses codecov.) +- All new and changed code must have test coverage. The GHA test suite uses + codecov; uncovered lines will block the PR. Cover happy paths, error + branches, and edge cases (e.g. early returns). ## Optimization notes diff --git a/tests/testthat/test-tree_numbering.R b/tests/testthat/test-tree_numbering.R index 85f2d61a..9e66bd75 100644 --- a/tests/testthat/test-tree_numbering.R +++ b/tests/testthat/test-tree_numbering.R @@ -167,6 +167,38 @@ test_that("RenumberTips() works correctly", { }) +test_that("RenumberTips.multiPhylo() covers edge cases", { + mp8 <- structure( + list(BalancedTree(8), PectinateTree(8)), + class = "multiPhylo" + ) + + # Numeric tipOrder + result <- RenumberTips(mp8, 8:1) + expect_equal(TipLabels(result[[1]]), paste0("t", 8:1)) + + # Early return when order matches + expect_identical(RenumberTips(mp8, TipLabels(mp8[[1]])), mp8) + + # Duplicate error + expect_error(RenumberTips(mp8, rep("t1", 8)), "repeated") + + # Length mismatch error + expect_error(RenumberTips(mp8, paste0("t", 0:5)), + "Missing in `tree`.*Missing in `tipOrder`") + + # Missing label error + mp_shared <- structure( + list(BalancedTree(8), PectinateTree(8)), + TipLabel = paste0("t", 1:8), + class = "multiPhylo" + ) + expect_error( + RenumberTips(mp_shared, c(paste0("t", 1:7), "t_unknown")), + "missing from `tipOrder`" + ) +}) + test_that("RenumberTips.multiPhylo() batch matches per-tree", { set.seed(7429) trees <- c(