From 7867da8a3441ecdb621ba79468a05fa8b4070fa2 Mon Sep 17 00:00:00 2001 From: Georgi Haralanov Date: Sun, 26 Apr 2026 00:12:31 +0300 Subject: [PATCH] [df] Enable IMT with GlobalEntryRanges for a single file Increases speed when only processing one file with IMT enabled --- tree/dataframe/src/RNTupleDS.cxx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tree/dataframe/src/RNTupleDS.cxx b/tree/dataframe/src/RNTupleDS.cxx index 82c72ee81102e..5ef923daafd6e 100644 --- a/tree/dataframe/src/RNTupleDS.cxx +++ b/tree/dataframe/src/RNTupleDS.cxx @@ -655,7 +655,7 @@ void ROOT::RDF::RNTupleDS::PrepareNextRanges() // Easy work scheduling: one file per slot. We skip empty files (files without entries). - if ((nRemainingFiles >= fNSlots) || (fGlobalEntryRange.has_value())) { + if ((nRemainingFiles >= fNSlots) || (fGlobalEntryRange.has_value() && nRemainingFiles != 1)) { while ((fNextRanges.size() < fNSlots) && (fNextFileIndex < nFiles)) { REntryRangeDS range; @@ -708,13 +708,26 @@ void ROOT::RDF::RNTupleDS::PrepareNextRanges() const auto descGuard = source->GetSharedDescriptorGuard(); return ROOT::Internal::GetClusterBoundaries(descGuard.GetRef()); }(); + unsigned int iFirstRange = 0; + unsigned int iLastRange = rangesByCluster.size() - 1; + if (fGlobalEntryRange.has_value() && fGlobalEntryRange->first != fGlobalEntryRange->second) { + for (size_t j = 0; j < rangesByCluster.size(); j++) { + if (rangesByCluster[j].fFirstEntry + fSeenEntriesNoGlobalRange <= fGlobalEntryRange->first) { + iFirstRange = j; + } + if (rangesByCluster[j].fLastEntryPlusOne + fSeenEntriesNoGlobalRange >= fGlobalEntryRange->second) { + iLastRange = j; + break; + } + } + } - const unsigned int nRangesByCluster = rangesByCluster.size(); + const unsigned int nRangesByCluster = iLastRange - iFirstRange + 1; // Distribute slots equidistantly over the entry range, aligned on cluster boundaries const auto nClustersPerSlot = nRangesByCluster / nSlotsPerFile; const auto remainder = nRangesByCluster % nSlotsPerFile; - std::size_t iRange = 0; + std::size_t iRange = iFirstRange; unsigned int iSlot = 0; const unsigned int N = std::min(nSlotsPerFile, nRangesByCluster); for (; iSlot < N; ++iSlot) {