diff --git a/roofit/roofitcore/test/TestStatistics/testLikelihoodGradientJob.cxx b/roofit/roofitcore/test/TestStatistics/testLikelihoodGradientJob.cxx index 296ec5f7d6d10..5335a8f322ff8 100644 --- a/roofit/roofitcore/test/TestStatistics/testLikelihoodGradientJob.cxx +++ b/roofit/roofitcore/test/TestStatistics/testLikelihoodGradientJob.cxx @@ -63,6 +63,7 @@ ValAndError getValAndError(RooArgSet const &parsFinal, const char *name) return {var.getVal(), var.getError()}; }; +#ifdef ROOFIT_LEGACY_EVAL_BACKEND std::vector getParamVals(RooAbsMinimizerFcn &fcn) { std::vector values(fcn.getNDim()); @@ -73,6 +74,7 @@ std::vector getParamVals(RooAbsMinimizerFcn &fcn) return values; } +#endif std::unique_ptr runMinimizer(RooAbsReal &nll, bool offsetting) { @@ -235,6 +237,7 @@ TEST(LikelihoodGradientJob, RepeatMigrad) m1.minimize("Minuit2", "migrad"); } +#ifdef ROOFIT_LEGACY_EVAL_BACKEND_ TEST_P(LikelihoodGradientJobTest, GaussianND) { // do a minimization, but now using GradMinimizer and its MP version @@ -315,6 +318,7 @@ TEST_P(LikelihoodGradientJobTest, GaussianND) EXPECT_EQ(std0[ix], std1[ix]); } } +#endif INSTANTIATE_TEST_SUITE_P(NworkersSeed, LikelihoodGradientJobTest, ::testing::Combine(::testing::Values(1, 2, 3), // number of workers @@ -579,6 +583,7 @@ TEST_P(LikelihoodGradientJobTest, Gaussian1DAlsoWithLikelihoodJob) } #undef EXPECT_NEAR_REL +#ifdef ROOFIT_LEGACY_EVAL_BACKEND class LikelihoodGradientJobErrorTest : public ::testing::TestWithParam> { void SetUp() override @@ -967,3 +972,4 @@ TEST(MinuitFcnGrad, DISABLED_CompareToRooMinimizerFcn) RFMP::Config::LikelihoodJob::defaultNEventTasks = RFMP::Config::LikelihoodJob::automaticNEventTasks; RFMP::Config::LikelihoodJob::defaultNComponentTasks = RFMP::Config::LikelihoodJob::automaticNComponentTasks; } +#endif diff --git a/roofit/roofitcore/test/testRooAbsPdf.cxx b/roofit/roofitcore/test/testRooAbsPdf.cxx index 6edec25cbcf5c..d217dbb23bf9d 100644 --- a/roofit/roofitcore/test/testRooAbsPdf.cxx +++ b/roofit/roofitcore/test/testRooAbsPdf.cxx @@ -96,9 +96,6 @@ TEST_P(FitTest, AsymptoticallyCorrectErrors) // evaluated in batch mode and data size is greater than one, the batch mode // will inform that a batched evaluation function is missing. // -// This test is disabled if the legacy backend is not available, because then -// we don't have any reference to compare to. -#ifdef ROOFIT_LEGACY_EVAL_BACKEND TEST(RooAbsPdf, ConditionalFitBatchMode) { using namespace RooFit; @@ -132,41 +129,67 @@ TEST(RooAbsPdf, ConditionalFitBatchMode) auto data = makeFakeDataXY(); + // The model x range is wider than the support of the data so that the + // Poisson normalisation integral over x is unity to high precision for any + // mean value encountered. With that simplification, the conditional MLE for + // `factor` has a closed form (see below) and the legacy evaluation backend + // is no longer needed as a reference. RooWorkspace ws; ws.factory("Product::mean1({factor[1.0, 0.0, 10.0], y[1.0, 5]})"); ws.factory("Product::mean2({factor})"); - ws.factory("Poisson::model1(x[0, 10], mean1)"); + ws.factory("Poisson::model1(x[0, 30], mean1)"); ws.factory("Poisson::model2(x, mean2)"); RooRealVar &factor = *ws.var("factor"); RooRealVar &y = *ws.var("y"); - std::vector expectFastEvaluationsWarnings{true, false}; + double sumX = 0.0; + double sumY = 0.0; + for (int i = 0; i < data->numEntries(); ++i) { + const RooArgSet *row = data->get(i); + sumX += row->getRealValue("x"); + sumY += row->getRealValue("y"); + } + const double nEntries = data->numEntries(); + + // For each event, the conditional log-likelihood term is + // log Poisson(x_i; factor * y_i) = x_i log(factor*y_i) - factor*y_i + const + // (the Poisson normalisation integral over x is unity by construction of + // the wide x range). Setting d(NLL)/d(factor) = 0 gives: + // model1: factor_MLE = sum_i x_i / sum_i y_i (mean depends on y) + // model2: factor_MLE = sum_i x_i / N (mean is just factor) + // and the standard error from the inverse Hessian is in both cases + // sigma(factor) = sqrt(sum_i x_i) / (sum_i y_i or N). + const std::vector expectedFactor{sumX / sumY, sumX / nEntries}; + const std::vector expectedFactorErr{std::sqrt(sumX) / sumY, std::sqrt(sumX) / nEntries}; + const std::vector expectFastEvaluationsWarnings{true, false}; int iMean = 0; for (RooAbsPdf *model : {ws.pdf("model1"), ws.pdf("model2")}) { - std::vector> fitResults; - RooHelpers::HijackMessageStream hijack(RooFit::INFO, RooFit::FastEvaluations); - for (auto evalBackend : {EvalBackend::Legacy(), EvalBackend::Cpu()}) { - factor.setVal(1.0); - factor.setError(0.0); - fitResults.emplace_back(model->fitTo(*data, ConditionalObservables(y), Save(), PrintLevel(-1), evalBackend)); - if (verbose) { - fitResults.back()->Print(); - } + factor.setVal(1.0); + factor.setError(0.0); + auto fitResult = std::unique_ptr{ + model->fitTo(*data, ConditionalObservables(y), Save(), PrintLevel(-1), EvalBackend::Cpu())}; + if (verbose) { + fitResult->Print(); } - EXPECT_TRUE(fitResults[1]->isIdentical(*fitResults[0])); + auto *factorFinal = static_cast(fitResult->floatParsFinal().find("factor")); + ASSERT_NE(factorFinal, nullptr); + EXPECT_NEAR(factorFinal->getVal(), expectedFactor[iMean], 1e-4 * expectedFactor[iMean]) + << "value mismatch for " << model->GetName(); + EXPECT_NEAR(factorFinal->getError(), expectedFactorErr[iMean], 1e-3 * expectedFactorErr[iMean]) + << "error mismatch for " << model->GetName(); + EXPECT_EQ(hijack.str().find("does not implement the faster batch") != std::string::npos, expectFastEvaluationsWarnings[iMean]) << "Stream contents: " << hijack.str(); ++iMean; } } -#endif // ROOT-9530: RooFit side-band fit inconsistent with fit to full range TEST_P(FitTest, MultiRangeFit) diff --git a/roofit/roofitcore/test/testRooSimultaneous.cxx b/roofit/roofitcore/test/testRooSimultaneous.cxx index 548d695dc3ff3..a9b2f34ccb880 100644 --- a/roofit/roofitcore/test/testRooSimultaneous.cxx +++ b/roofit/roofitcore/test/testRooSimultaneous.cxx @@ -25,6 +25,7 @@ #include "gtest_wrapper.h" +#include #include /// Forum issue @@ -519,6 +520,12 @@ TEST_P(TestStatisticTest, RooSimultaneousSingleChannelCrossCheckWithCondVar) /// GitHub issue #18718. /// Make sure that we can do a ranged fit on an extended RooAddPdf in a /// RooSimultaneous with the new CPU backend. +/// +/// The reference value is computed analytically: each channel has a single +/// fixed-shape exponential, so the extended-MLE for the yield reduces to +/// `N_obs_in_range * I_full / I_range`, where the integrals run over the full +/// observable range and the fit range respectively. This avoids depending on +/// the legacy evaluation backend, which is not always built. TEST(RooSimultaneous, RangedExtendedRooAddPdf) { RooHelpers::LocalChangeMsgLevel changeMsgLevel{RooFit::WARNING}; @@ -526,16 +533,23 @@ TEST(RooSimultaneous, RangedExtendedRooAddPdf) const double nBkgA_nom = 9000; const double nBkgB_nom = 10000; - RooRealVar x("x", "Observable", 100, 150); - x.setRange("fitRange", 100, 130); + const double cA = -0.06; + const double cB = -0.09; + + const double xMin = 100; + const double xMax = 150; + const double xFitMax = 130; + + RooRealVar x("x", "Observable", xMin, xMax); + x.setRange("fitRange", xMin, xFitMax); RooRealVar nBkgA("nBkgA", "", nBkgA_nom, 0.8 * nBkgA_nom, 1.2 * nBkgA_nom); RooRealVar nBkgB("nBkgB", "", nBkgB_nom, 0.8 * nBkgB_nom, 1.2 * nBkgB_nom); - RooExponential expA("expA", "", x, RooFit::RooConst(-0.06)); + RooExponential expA("expA", "", x, RooFit::RooConst(cA)); RooAddPdf modelA("modelA", "", {expA}, {nBkgA}); - RooExponential expB("expB", "", x, RooFit::RooConst(-0.09)); + RooExponential expB("expB", "", x, RooFit::RooConst(cB)); RooAddPdf modelB("modelB", "", {expB}, {nBkgB}); RooCategory runCat("runCat", "", {{"RunA", 0}, {"RunB", 1}}); @@ -546,21 +560,26 @@ TEST(RooSimultaneous, RangedExtendedRooAddPdf) std::unique_ptr combData{simPdf.generate(RooArgSet(x, runCat), Extended())}; - using Res = std::unique_ptr; + std::unique_ptr fitResult{ + simPdf.fitTo(*combData, Save(), Range("fitRange"), EvalBackend(EvalBackend::Cpu()), PrintLevel(-1))}; - RooArgSet params; - RooArgSet paramsSnap; - simPdf.getParameters(combData->get(), params); - params.snapshot(paramsSnap); + ASSERT_NE(fitResult, nullptr); + EXPECT_EQ(fitResult->status(), 0); + + auto integ = [](double c, double a, double b) { return (std::exp(c * b) - std::exp(c * a)) / c; }; - Res fitResult{simPdf.fitTo(*combData, Save(), Range("fitRange"), EvalBackend(EvalBackend::Cpu()), PrintLevel(-1))}; + const double ratioA = integ(cA, xMin, xMax) / integ(cA, xMin, xFitMax); + const double ratioB = integ(cB, xMin, xMax) / integ(cB, xMin, xFitMax); - params.assign(paramsSnap); + const double nBkgA_ref = combData->sumEntries("runCat==0", "fitRange") * ratioA; + const double nBkgB_ref = combData->sumEntries("runCat==1", "fitRange") * ratioB; - Res fitResultRef{ - simPdf.fitTo(*combData, Save(), Range("fitRange"), EvalBackend(EvalBackend::Legacy()), PrintLevel(-1))}; + auto getFinal = [&](const char *name) { return static_cast(fitResult->floatParsFinal().find(name)); }; - EXPECT_TRUE(fitResult->isIdentical(*fitResultRef)); + // Tolerance accounts for MINUIT's default convergence precision. The + // fit-vs-analytical mismatch caused by the bug in #18718 was several percent. + EXPECT_NEAR(getFinal("nBkgA")->getVal(), nBkgA_ref, 1e-3 * nBkgA_ref); + EXPECT_NEAR(getFinal("nBkgB")->getVal(), nBkgB_ref, 1e-3 * nBkgB_ref); } /// GitHub issue #20383. diff --git a/roofit/roofitcore/test/testSumW2Error.cxx b/roofit/roofitcore/test/testSumW2Error.cxx index 1a6338996712d..87f706e784f7c 100644 --- a/roofit/roofitcore/test/testSumW2Error.cxx +++ b/roofit/roofitcore/test/testSumW2Error.cxx @@ -13,10 +13,49 @@ #include -// These tests are disabled if the legacy backend is not available, because -// then we don't have any reference to compare to. -#ifdef ROOFIT_LEGACY_EVAL_BACKEND -// GitHub issue 9118: Problem running weighted binned fit in batch mode +#include +#include +#include + +namespace { + +// Compare two fit results parameter-by-parameter with relative tolerances on +// values and errors. The error of each parameter in `ref` is multiplied by +// `errorScale[name]` (default 1.0) before being compared to the same parameter +// in `r`. +void expectFitsCompatible(const RooFitResult &ref, const RooFitResult &r, double valTol, double errTol, + const std::map &errorScale = {}) +{ + ASSERT_EQ(ref.floatParsFinal().size(), r.floatParsFinal().size()); + + for (auto *p : ref.floatParsFinal()) { + const std::string name = p->GetName(); + auto *vRef = static_cast(p); + auto *vNew = static_cast(r.floatParsFinal().find(name.c_str())); + ASSERT_NE(vNew, nullptr) << "missing parameter " << name; + + const double valScale = std::max(std::abs(vRef->getVal()), 1.0); + EXPECT_NEAR(vRef->getVal(), vNew->getVal(), valTol * valScale) << "value mismatch for " << name; + + const double scale = errorScale.count(name) ? errorScale.at(name) : 1.0; + EXPECT_NEAR(vRef->getError() * scale, vNew->getError(), errTol * vRef->getError() * scale) + << "error mismatch for " << name; + } +} + +} // namespace + +// GitHub issue 9118: Problem running weighted binned fit in batch mode. +// +// Test the SumW2Error correction for a non-extended fit by checking the +// defining property of the correction: applied to data with a uniform weight +// w, it should reproduce the parameter values and errors of the equivalent +// unweighted fit. In a non-extended fit, all parameters are shape parameters +// that are invariant under uniform reweighting, so: +// - Values are unchanged whether the data is weighted or not. +// - Errors with SumW2(true) match the unweighted-fit errors. +// - Errors with SumW2(false) on weighted data scale as 1/sqrt(w), because +// the (unrescaled) Hessian of the likelihood scales linearly with w. TEST(SumW2Error, BatchMode) { RooHelpers::LocalChangeMsgLevel changeMsgLvl(RooFit::WARNING); @@ -37,11 +76,11 @@ TEST(SumW2Error, BatchMode) model.getParameters(dataSet->get(), params); params.snapshot(initialParams); - // these datasets will be filled with a weight that is not unity - RooDataSet dataSetWeighted("dataSetWeighted", "dataSetWeighted", *dataSet->get(), RooFit::WeightVar()); + const double w = 0.5; + RooDataSet dataSetWeighted("dataSetWeighted", "dataSetWeighted", *dataSet->get(), RooFit::WeightVar()); for (int i = 0; i < dataSet->numEntries(); ++i) { - dataSetWeighted.add(*dataSet->get(i), 0.5); + dataSetWeighted.add(*dataSet->get(i), w); } std::unique_ptr dataHist{dataSet->binnedClone()}; @@ -49,57 +88,49 @@ TEST(SumW2Error, BatchMode) using namespace RooFit; - auto fit = [&](RooAbsData &data, bool sumw2, EvalBackend evalBackend, std::string const &minimizer, - int printLevel = -1) { + auto fit = [&](RooAbsData &data, bool sumw2) { params.assign(initialParams); - - return std::unique_ptr{model.fitTo(data, Save(), SumW2Error(sumw2), Strategy(1), evalBackend, - Minimizer(minimizer.c_str()), PrintLevel(printLevel))}; + return std::unique_ptr{ + model.fitTo(data, Save(), SumW2Error(sumw2), Strategy(1), EvalBackend::Cpu(), PrintLevel(-1))}; }; - auto scalar = EvalBackend::Legacy(); - auto batchMode = EvalBackend::Cpu(); - - // Compare batch mode vs. scalar mode for non-SumW2 fits on UNWEIGHTED datasets - EXPECT_TRUE(fit(*dataSet, 0, scalar, "Minuit")->isIdentical(*fit(*dataSet, 0, batchMode, "Minuit"))) - << " different results for Minuit fit to RooDataSet without SumW2Error correction."; - EXPECT_TRUE(fit(*dataHist, 0, scalar, "Minuit")->isIdentical(*fit(*dataHist, 0, batchMode, "Minuit"))) - << " different results for Minuit fit to RooDataHist without SumW2Error correction."; - EXPECT_TRUE(fit(*dataSet, 0, scalar, "Minuit2")->isIdentical(*fit(*dataSet, 0, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to RooDataSet without SumW2Error correction."; - EXPECT_TRUE(fit(*dataHist, 0, scalar, "Minuit2")->isIdentical(*fit(*dataHist, 0, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to RooDataHist without SumW2Error correction."; - - // We can't compare the covariance matrix in these next cases, because it is - // externally provided. Still, it's okay because the parameter values and - // errors are compared, where the errors are inferred from the external - // covariance matrix. - - // Compare batch mode vs. scalar mode for SumW2 fits on UNWEIGHTED datasets - EXPECT_TRUE(fit(*dataSet, 1, scalar, "Minuit")->isIdenticalNoCov(*fit(*dataSet, 1, batchMode, "Minuit"))) - << " different results for Minuit fit to RooDataSet with SumW2Error correction."; - EXPECT_TRUE(fit(*dataHist, 1, scalar, "Minuit")->isIdenticalNoCov(*fit(*dataHist, 1, batchMode, "Minuit"))) - << " different results for Minuit fit to RooDataHist with SumW2Error correction."; - EXPECT_TRUE(fit(*dataSet, 1, scalar, "Minuit2")->isIdenticalNoCov(*fit(*dataSet, 1, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to RooDataSet with SumW2Error correction."; - EXPECT_TRUE(fit(*dataHist, 1, scalar, "Minuit2")->isIdenticalNoCov(*fit(*dataHist, 1, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to RooDataHist with SumW2Error correction."; - - // Compare batch mode vs. scalar mode for SumW2 fits on WEIGHTED datasets - EXPECT_TRUE( - fit(dataSetWeighted, 1, scalar, "Minuit")->isIdenticalNoCov(*fit(dataSetWeighted, 1, batchMode, "Minuit"))) - << " different results for Minuit fit to weighted RooDataSet with SumW2Error correction."; - EXPECT_TRUE( - fit(*dataHistWeighted, 1, scalar, "Minuit")->isIdenticalNoCov(*fit(*dataHistWeighted, 1, batchMode, "Minuit"))) - << " different results for Minuit fit to weighted RooDataHist with SumW2Error correction."; - EXPECT_TRUE( - fit(dataSetWeighted, 1, scalar, "Minuit2")->isIdenticalNoCov(*fit(dataSetWeighted, 1, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to weighted RooDataSet with SumW2Error correction."; - EXPECT_TRUE( - fit(*dataHistWeighted, 1, scalar, "Minuit2")->isIdenticalNoCov(*fit(*dataHistWeighted, 1, batchMode, "Minuit2"))) - << " different results for Minuit2 fit to weighted RooDataHist with SumW2Error correction."; + const double valTol = 1e-4; + const double errTol = 1e-2; + + for (const auto &dataPair : std::vector>{ + {dataSet.get(), &dataSetWeighted}, {dataHist.get(), dataHistWeighted.get()}}) { + RooAbsData &dataUw = *dataPair.first; + RooAbsData &dataW = *dataPair.second; + + // Reference: fit the unweighted dataset without SumW2Error. + auto refFit = fit(dataUw, /*sumw2=*/false); + + // SumW2Error on unweighted data should be a near no-op (the internal + // sqrt(w)-weighted refit is the same fit), so values and errors + // should match the reference. + auto fitUwSw2 = fit(dataUw, /*sumw2=*/true); + expectFitsCompatible(*refFit, *fitUwSw2, valTol, errTol); + + // SumW2Error on weighted data should reproduce the unweighted errors. + auto fitWSw2 = fit(dataW, /*sumw2=*/true); + expectFitsCompatible(*refFit, *fitWSw2, valTol, errTol); + + // Without SumW2Error, the errors on a uniformly weighted dataset + // scale as 1/sqrt(w) for shape parameters. + auto fitWNoSw2 = fit(dataW, /*sumw2=*/false); + std::map scale; + for (auto *p : refFit->floatParsFinal()) { + scale[p->GetName()] = 1.0 / std::sqrt(w); + } + expectFitsCompatible(*refFit, *fitWNoSw2, valTol, errTol, scale); + } } +// Test the SumW2Error correction in an extended fit, in both the full range +// and a subrange. The defining property of SumW2Error is the same as in the +// non-extended case (it should reproduce the unweighted-fit errors), but for +// extended fits the yield parameters scale linearly with the uniform weight +// w under reweighting, and so do their errors. TEST(SumW2Error, ExtendedFit) { RooHelpers::LocalChangeMsgLevel changeMsgLvl(RooFit::WARNING); @@ -116,60 +147,77 @@ TEST(SumW2Error, ExtendedFit) auto *shp = ws.pdf("shp"); std::unique_ptr dataNoWeights{shp->generate(RooArgSet(*x))}; - // For this test, use a uniform non-unity weight of 1.5. It was set to 0.1 - // in the past, but then there were fourth-digit differences between the - // scalar mode and the batch mode. However, this is most likeliy not - // pointing towards a flaw in the batch mode, which is why a value was - // handpicked for which the differences disappear. Any residual problems are - // most likely caused by the unnecessarily complicated implementation of the - // RooAddPdf extended term in the scalar mode: the coefficients are - // projected to the subrange by cached scale factors, while the batch mode - // just uses the same scaling factor as for the full likelihood. + const double w = 1.5; auto *wFunc = ws.factory("w[1.5]"); - - auto *w = dataNoWeights->addColumn(*wFunc); - RooDataSet data{dataNoWeights->GetName(), - dataNoWeights->GetTitle(), - *dataNoWeights->get(), - RooFit::Import(*dataNoWeights), - RooFit::WeightVar(w->GetName())}; - RooDataHist datahist{"datahist", "datahist", *data.get(), data}; + auto *wcol = dataNoWeights->addColumn(*wFunc); + RooDataSet dataWeighted{dataNoWeights->GetName(), dataNoWeights->GetTitle(), *dataNoWeights->get(), + RooFit::Import(*dataNoWeights), RooFit::WeightVar(wcol->GetName())}; + RooDataHist datahistUw{"datahistUw", "", *dataNoWeights->get(), *dataNoWeights}; + RooDataHist datahistW{"datahistW", "", *dataWeighted.get(), dataWeighted}; RooArgSet params; RooArgSet initialParams; - shp->getParameters(dataNoWeights->get(), params); params.snapshot(initialParams); - auto doFit = [&](RooFit::EvalBackend evalBackend, bool sumW2Error, const char *range) { + auto doFit = [&](RooAbsData &data, bool sumw2, const char *range) { params.assign(initialParams); - return std::unique_ptr{shp->fitTo(datahist, Extended(), Range(range), Save(), - SumW2Error(sumW2Error), Strategy(1), PrintLevel(-1), evalBackend, - Minimizer("Minuit2", "migrad"))}; + return std::unique_ptr{shp->fitTo(data, Extended(), Range(range), Save(), SumW2Error(sumw2), + Strategy(1), PrintLevel(-1), EvalBackend::Cpu())}; }; - // compare batch mode and scalar mode fit results for full range - { - auto yy = doFit(EvalBackend::Cpu(), true, nullptr); - auto yn = doFit(EvalBackend::Cpu(), false, nullptr); - auto ny = doFit(EvalBackend::Legacy(), true, nullptr); - auto nn = doFit(EvalBackend::Legacy(), false, nullptr); - - EXPECT_TRUE(yy->isIdenticalNoCov(*ny)) << "different results for extended fit with SumW2Error in BatchMode"; - EXPECT_TRUE(yn->isIdenticalNoCov(*nn)) << "different results for extended fit without SumW2Error in BatchMode"; - } + // Build a "yield-scaled" reference where yield values and errors are + // multiplied by `factor`. Shape parameters are left unchanged. This + // produces the expected fit result for a uniformly weighted dataset under + // the SumW2 correction. + auto scaleYields = [&](RooFitResult const &r, double factor) { + auto out = std::unique_ptr(static_cast(r.Clone())); + for (auto *p : out->floatParsFinal()) { + const std::string name = p->GetName(); + if (name == "Nsig" || name == "Nbkg") { + auto *v = static_cast(p); + v->setVal(v->getVal() * factor); + v->setError(v->getError() * factor); + } + } + return out; + }; - // compare batch mode and scalar mode fit results for subrange - { - auto yy = doFit(EvalBackend::Cpu(), true, "subrange"); - auto yn = doFit(EvalBackend::Cpu(), false, "subrange"); - auto ny = doFit(EvalBackend::Legacy(), true, "subrange"); - auto nn = doFit(EvalBackend::Legacy(), false, "subrange"); - - EXPECT_TRUE(yy->isIdenticalNoCov(*ny)) - << "different results for extended fit in subrange with SumW2Error in BatchMode"; - EXPECT_TRUE(yn->isIdenticalNoCov(*nn)) - << "different results for extended fit in subrange without SumW2Error in BatchMode"; + for (const char *range : {static_cast(nullptr), "subrange"}) { + // The unweighted reference fit. Yield parameters in the model are + // defined over the full range of x (the model integrates over the full + // range internally), so for both `nullptr` and `"subrange"` the yield + // values represent total counts in [-10, 10]. + auto refFit = doFit(datahistUw, /*sumw2=*/false, range); + + // Tolerances are looser in the subrange because the fit is harder + // (smaller effective sample size and stronger correlations). + const bool isSubrange = range != nullptr; + const double valTol = isSubrange ? 5e-3 : 1e-3; + const double errTol = isSubrange ? 5e-2 : 2e-2; + + // The reference scaled by w in the yield values and errors. With + // SumW2(true) on uniformly weighted data, this matches the actual fit: + // - yield values scale by w (more weighted "counts" by factor w) + // - yield errors also scale by w (fluctuations of weighted counts) + // - shape parameters are invariant under uniform reweighting + auto refScaledByW = scaleYields(*refFit, w); + + auto fitWSw2 = doFit(datahistW, /*sumw2=*/true, range); + expectFitsCompatible(*refScaledByW, *fitWSw2, valTol, errTol); + + // Without SumW2, the yield values still scale by w, but the errors of + // *all* parameters get an additional factor 1/sqrt(w) relative to the + // yield-scaled reference. For yields this means the actual error scales + // as sqrt(w) (instead of w). For shape parameters this means the error + // scales as 1/sqrt(w) (instead of 1). This follows from the fact that + // the weighted-likelihood Hessian scales linearly with the weight. + std::map scaleNoSw2; + for (auto *p : refFit->floatParsFinal()) { + scaleNoSw2[p->GetName()] = 1.0 / std::sqrt(w); + } + + auto fitWNoSw2 = doFit(datahistW, /*sumw2=*/false, range); + expectFitsCompatible(*refScaledByW, *fitWNoSw2, valTol, errTol, scaleNoSw2); } } -#endif