From 6b5bc14491d1a72de45f30875bc7644e440372b2 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Sat, 13 Jun 2026 10:58:24 +0200 Subject: [PATCH] [treeplayer] Fix TTreeFormula vector index into vector-of-vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a vector-type branch was used as the index of a vector-of-vectors branch, TTreeFormula computed the wrong number of instances and emitted out-of-bounds errors. For example, with v2 a vector used as an index: vv1[v2][0] looped over the summed size of all sub-vectors of vv1 instead of the length of v2, printing a spurious extra instance and "index out of bound" errors. vv1[0][v2] silently dropped its last instance. Two issues in the multi-variable-dimension machinery: 1. DefineDimensions registered the inner jagged dimension as a variable dimension even when the outer dimension is selected through a variable "gather" index (-2) and the inner dimension is pinned to a constant. In that configuration the formula does not iterate over the jagged sub-dimension, so the manager must not sum the physical sub-sizes; the number of instances is simply the length of the index variable. 2. GetRealInstance performed a premature out-of-bounds check against the physical sub-size for a variable inner index (-2), using the raw instance number (the index of the index variable) rather than the evaluated index. The real bounds are already checked after evaluating the index variable. Adds regression tests to treeplayer_regressions. Closes #19290 (originally JIRA ROOT-8726). 🤖 Done with the help of [Claude Code](https://claude.com/claude-code) (Claude Opus 4.8) --- tree/treeplayer/src/TTreeFormula.cxx | 21 ++++++- tree/treeplayer/test/regressions.cxx | 91 ++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/tree/treeplayer/src/TTreeFormula.cxx b/tree/treeplayer/src/TTreeFormula.cxx index ccac1d5fe458c..b46d0ebc3a3b2 100644 --- a/tree/treeplayer/src/TTreeFormula.cxx +++ b/tree/treeplayer/src/TTreeFormula.cxx @@ -375,10 +375,22 @@ void TTreeFormula::DefineDimensions(Int_t code, Int_t size, Int_t& virt_dim) { if (info) { fManager->EnableMultiVarDims(); - //if (fIndexes[code][info->fDim]<0) { // removed because the index might be out of bounds! + // When the primary (first) dimension is selected through a variable index + // (a "gather", fIndexes[code][0]==-2) and this inner variable-size dimension + // is itself pinned to a constant index, the formula does not iterate over the + // jagged sub-dimension: each gathered row yields a single value, so the number + // of instances is simply the length of the index variable. Registering the + // dimension as a variable one would make the manager sum the physical + // sub-sizes (e.g. vv1[v2][0] would loop over the total size of vv1 instead of + // the length of v2). So only register it when it actually varies during the + // iteration. See https://github.com/root-project/root/issues/19290 + bool pinnedInnerDuringGather = (fIndexes[code][0] == -2) && (fIndexes[code][fNdimensions[code]] >= 0); + if (!pinnedInnerDuringGather) { + // NOTE: a dimension with a (constant) index is still registered here, because the + // index might be out of bounds and the dimension must still be tracked. info->fVirtDim = virt_dim; fManager->AddVarDims(virt_dim); // if (!fVarDims[virt_dim]) fVarDims[virt_dim] = new TArrayI; - //} + } } Int_t vsize = 0; @@ -3631,8 +3643,11 @@ Int_t TTreeFormula::GetRealInstance(Int_t instance, Int_t codeindex) { } else { local_index = instance; } - if (info && local_index>=fCumulSizes[codeindex][max_dim]) { + if (info && fIndexes[codeindex][max_dim] != -2 && local_index >= fCumulSizes[codeindex][max_dim]) { // We are out of bounds! [Multiple var dims, See same message a few line above] + // For a variable index (-2) local_index is the instance number of the + // index variable, not the physical position; the actual bounds are + // checked below after evaluating the index variable. return fNdata[0]+1; } if (fIndexes[codeindex][max_dim]==-2) { diff --git a/tree/treeplayer/test/regressions.cxx b/tree/treeplayer/test/regressions.cxx index 06a0c626eed25..62bec41974ec7 100644 --- a/tree/treeplayer/test/regressions.cxx +++ b/tree/treeplayer/test/regressions.cxx @@ -661,3 +661,94 @@ TEST(TTreeScan, ULong64Precision) EXPECT_EQ(scanToString("colsize=21 col=lld:lld:lld"), expectedScanOut); EXPECT_EQ(scanToString("col=21lld:21lld:21lld"), expectedScanOut); } + +// https://github.com/root-project/root/issues/19290 (originally JIRA ROOT-8726) +// TTreeFormula gave wrong results when a vector-type branch was used as the index +// of a vector-of-vectors branch. For vv1[v2][0] (v2 a vector used as the index of +// the outer dimension of the vector-of-vectors vv1) the number of instances was +// computed as the total/summed size of all the sub-vectors of vv1 instead of the +// length of v2, and out-of-bounds errors were emitted. The companion case +// vv1[0][v2] (constant outer index, vector inner index) silently dropped its last +// instance. +namespace { + +// Evaluate a TTreeFormula on the (single) current entry of the tree and return +// all of its instances. +std::vector GH19290EvalAll(TTree &tree, const char *expr) +{ + TTreeFormula formula("formula", expr, &tree); + EXPECT_FALSE(formula.GetTree() == nullptr) << "could not compile formula " << expr; + + std::vector result; + const Int_t ndata = formula.GetNdata(); + result.reserve(ndata); + for (Int_t i = 0; i < ndata; ++i) + result.push_back(formula.EvalInstance(i)); + return result; +} + +} // namespace + +struct GH19290 : public ::testing::Test { + TTree fTree{"t", "t"}; + + int fN = 1; + // The branch addresses must outlive SetUp(), so keep the objects and the + // pointers used by TTree::Branch as data members. + std::vector fV1{5, 7}; + std::vector> fVV1{{2, 1}, {3, 4}}; + std::vector fV2{0, 1, 0}; + std::vector *fV1Ptr = &fV1; + std::vector> *fVV1Ptr = &fVV1; + std::vector *fV2Ptr = &fV2; + + void SetUp() override + { + gInterpreter->GenerateDictionary("vector>"); + + fTree.Branch("n", &fN); + fTree.Branch("v1", &fV1Ptr); + fTree.Branch("vv1", &fVV1Ptr); + fTree.Branch("v2", &fV2Ptr); + fTree.Fill(); + fTree.GetEntry(0); + } +}; + +// Sanity: indexing a plain vector with a vector index already worked. +TEST_F(GH19290, VectorIndexIntoVector) +{ + // v1[v2] loops over v2 == {0,1,0} -> {v1[0], v1[1], v1[0]} + EXPECT_EQ(GH19290EvalAll(fTree, "v1[v2]"), (std::vector{5, 7, 5})); +} + +// Constant outer index, vector inner index. Used to drop the last instance. +TEST_F(GH19290, ConstantOuterVectorInner) +{ + // vv1[0][v2] loops over v2 -> {vv1[0][0], vv1[0][1], vv1[0][0]} + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[0][v2]"), (std::vector{2, 1, 2})); +} + +// Vector outer index, constant inner index. Used to over-count (length was the +// summed size of all sub-vectors) and emit out-of-bounds errors. +TEST_F(GH19290, VectorOuterConstantInner) +{ + // vv1[v2][0] loops over v2 -> {vv1[0][0], vv1[1][0], vv1[0][0]} + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[v2][0]"), (std::vector{2, 3, 2})); +} + +// Scalar indices into a vector of vectors (these always worked). +TEST_F(GH19290, ScalarIndices) +{ + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[n][0]"), (std::vector{3})); // vv1[1][0] + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[0][n]"), (std::vector{1})); // vv1[0][1] +} + +// A few related expressions that must keep working (no regressions). +TEST_F(GH19290, NoRegressions) +{ + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[0]"), (std::vector{2, 1})); + EXPECT_EQ(GH19290EvalAll(fTree, "vv1[1]"), (std::vector{3, 4})); + EXPECT_EQ(GH19290EvalAll(fTree, "vv1"), (std::vector{2, 1, 3, 4})); + EXPECT_EQ(GH19290EvalAll(fTree, "v1[n]"), (std::vector{7})); +}