From 9b5950d748757453d68dca5f947d23e21da35631 Mon Sep 17 00:00:00 2001 From: Antal van den Bosch Date: Fri, 19 Jun 2026 17:26:42 +0200 Subject: [PATCH] DistanceTester: take a direct path for Overlap features The inner distance loop dispatched every feature through a virtual metricTestFunction::test() -> Feature::fvDistance() (which re-checks the storable/numeric branches on every call) -> a virtual metric->distance(), plus a permutation[] indirection -- all to compute, for a plain Overlap feature, just (F == G ? 0 : weight). Precompute once, in permuted order, a flat metricTestFunction array (removing the permutation indirection) and an Overlap flag per feature, and let Overlap features take the direct (F == G ? 0 : weight) path. Other metrics (MVDM, numeric, ...) keep the existing path. Measured (TRIBL2, 20k test instances, reused saved base; deterministic instruction counts, min of 3): 161.30 B -> 155.19 B, i.e. about -8% of the test phase. Only IB1/TRIBL2 use DistanceTester (IGTree computes no distances). Output is byte-identical, including with +v db (which depends on the exact distances and neighbour ordering). Co-Authored-By: Claude Opus 4.8 --- include/timbl/Testers.h | 7 +++++++ src/Testers.cxx | 23 ++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/timbl/Testers.h b/include/timbl/Testers.h index 00f89ca..3c89118 100644 --- a/include/timbl/Testers.h +++ b/include/timbl/Testers.h @@ -92,6 +92,13 @@ namespace Timbl{ double ) override; private: std::vector metricTest; + // Per-feature test info precomputed once, in permuted order, so the inner + // test loop avoids the permutation indirection and, for plain Overlap + // features (the common case), the virtual metricTestFunction / fvDistance + // / metric->distance() chain -- which for Overlap only computes + // (F==G ? 0 : weight). + std::vector permTest; // metricTest in permuted order + std::vector isOverlap; // 1 if feature uses Overlap }; class SimilarityTester: public TesterClass { diff --git a/src/Testers.cxx b/src/Testers.cxx index 093c717..b7b7742 100644 --- a/src/Testers.cxx +++ b/src/Testers.cxx @@ -152,6 +152,17 @@ namespace Timbl{ metricTest[i] = new overlapTestFunction(); } } + // Precompute, in permuted order, the metric test function and an Overlap + // flag for each feature, so test() can use a flat index and take a direct + // path for Overlap features. + permTest.resize(_size,0); + isOverlap.resize(_size,0); + for ( size_t j=0; j < _size; ++j ){ + Feature *feat = permFeatures[j]; + permTest[j] = metricTest[permutation[j]]; + isOverlap[j] = ( feat && !feat->Ignore() + && feat->getMetricType() == Overlap ) ? 1 : 0; + } } size_t DistanceTester::test( const vector& G, @@ -164,9 +175,15 @@ namespace Timbl{ cerr << "feature " << TrueF << " (perm=" << permutation[TrueF] << ")" << endl; #endif - double result = metricTest[permutation[TrueF]]->test( (*FV)[TrueF], - G[i], - permFeatures[TrueF] ); + double result; + if ( isOverlap[TrueF] ){ + // plain Overlap: distance is 0 for an exact value match, otherwise + // the feature weight -- no virtual metric dispatch needed. + result = ( (*FV)[TrueF] == G[i] ) ? 0.0 : permFeatures[TrueF]->Weight(); + } + else { + result = permTest[TrueF]->test( (*FV)[TrueF], G[i], permFeatures[TrueF] ); + } distances[i+1] = distances[i] + result; if ( distances[i+1] > Threshold ){ #ifdef DBGTEST