From 3aa961d583609821f11bbb228592252b27fb6da6 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Mon, 2 Sep 2024 20:02:51 +0200 Subject: [PATCH 1/4] Use CB + Argus for Lc jets --- .../database_ml_parameters_LcJet_pp.yml | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml index aa54125cee..0c842cf154 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml @@ -405,37 +405,44 @@ LcJet_pp: wide: fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))' model: - fn: 'SUM::sig(f_peak[0.,1.]*peak, wide)' + # fn: 'SUM::sig(f_peak[0.,1.]*peak, wide)' + fn: 'CrystalBall::sig(m[1., 5.], mean[2.27, 2.29], sigma[.005, .035], alpha_l[0., 5.], n_l[0., 5.], alpha_r[0., 5.], n_r[0., 5.])' - ptrange: [1., 5.] range: [2.13, 2.44] - fix_params: ['n', 'f_peak'] + # fix_params: ['n', 'f_peak'] + fix_params: ['alpha_l', 'alpha_r', 'n_l', 'n_r'] # per_ptjet: true components: # sig: # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.005,.015])' bkg: - fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + # fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + fn: 'ArgusBG::bkg(m, m0[1., 10.], c[0., 10.], p[1., .5, 2.])' model: fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' - ptrange: [5., 8.] range: [2.1, 2.48] - fix_params: ['n', 'f_peak'] + # fix_params: ['n', 'f_peak'] + fix_params: ['alpha_l', 'alpha_r', 'n_l', 'n_r'] # per_ptjet: true components: # sig: # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' bkg: - fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + # fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + fn: 'ArgusBG::bkg(m, m0[1., 10.], c[0., 10.], p[1., .5, 2.])' model: fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' - range: [2.05, 2.5] - fix_params: ['n', 'f_peak'] + # fix_params: ['n', 'f_peak'] + fix_params: ['alpha_l', 'alpha_r', 'n_l', 'n_r'] # per_ptjet: true components: # sig: # fn: 'Gaussian::sig(m, mean[2.28,2.29], sigma_g1[.005,.03])' bkg: - fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + # fn: 'Polynomial::bkg(m, {a0[0.2, -3, 3], a1[0.2 , -3, 3], a2[0.2, -3, 3]})' + fn: 'ArgusBG::bkg(m, m0[1., 10.], c[0., 10.], p[1., .5, 2.])' model: fn: 'SUM::sum(f_sig[0.,1.]*sig, bkg)' From 808a4708656f54b6dc3b442ac2366b3b615071d6 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Mon, 2 Sep 2024 20:53:06 +0200 Subject: [PATCH 2/4] Restrict fits to above 1.75 for D0 jets --- .../database_ml_parameters_D0Jet_pp.yml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml index 64bf1eb168..54c2ebe846 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml @@ -564,7 +564,7 @@ D0Jet_pp: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - ptrange: [1., 2.] per_ptjet: true - range: [1.72, 2.02] + range: [1.75, 2.02] fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] @@ -572,64 +572,64 @@ D0Jet_pp: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' - ptrange: [2., 3.] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] - range: [1.72, 2.04] + range: [1.75, 2.04] components: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' - ptrange: [3., 4.] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] - range: [1.72, 2.06] + range: [1.75, 2.06] components: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' - ptrange: [4., 5.] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] # fix_params: ['mean', 'sigma_g1', 'frac_refl'] - range: [1.72, 2.08] + range: [1.75, 2.08] components: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' - ptrange: [5., 6.] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] - range: [1.72, 2.10] + range: [1.75, 2.10] components: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' - ptrange: [6., 8.] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] fix_params_ptjet: ['mean', 'sigma_g1'] - range: [1.72, 2.14] + range: [1.75, 2.14] components: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' - - range: [1.72, 2.20] + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' + - range: [1.75, 2.20] per_ptjet: true fix_params: ['frac_refl'] free_params: ['sigma_g1'] @@ -638,7 +638,7 @@ D0Jet_pp: bkg: fn: 'Exponential::bkg(m, alpha[-100,0])' model: - fn: 'SUM::sum(frac[0.,1.]*sigrefl, bkg)' + fn: 'SUM::sum(frac[0.,.9]*sigrefl, bkg)' sidesub_per_ptjet: true sidesub: From 9c3382e2f3a428bb5a7d671cc737105746ab3559 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Wed, 4 Sep 2024 18:45:00 +0200 Subject: [PATCH 3/4] Allow different ptjet binning for gen and det --- machine_learning_hep/processer_jet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index 2a998ef26e..9cddd488ad 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -85,7 +85,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.binarrays_ptjet[level][v] = self.binarray_ptjet self.binarrays_obs['gen']['fPt'] = self.binarray_pthf self.binarrays_obs['det']['fPt'] = self.binarray_pthf - self.binarrays_ptjet['gen']['fPt'] = np.asarray(self.cfg('bins_ptjet_eff'), 'd') + self.binarrays_ptjet['gen']['fPt'] = np.asarray(self.cfg('bins_ptjet_eff_gen', self.cfg('bins_ptjet_eff')), 'd') self.binarrays_ptjet['det']['fPt'] = np.asarray(self.cfg('bins_ptjet_eff'), 'd') @@ -233,7 +233,7 @@ def process_histomass_single(self, index): h = create_hist( f'h_mass-ptjet-pthf-{obs}', f';M (GeV/#it{{c}}^{{2}});p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{obs}', - self.binarray_mass, self.binarray_ptjet, self.binarray_pthf, + self.binarray_mass, self.binarrays_ptjet['det'][obs], self.binarray_pthf, *[self.binarrays_obs['det'][v] for v in var]) for i, v in enumerate(var): get_axis(h, 3+i).SetTitle(self.cfg(f'observables.{v}.label', v)) From 10407ce5a38f1f73a321c164de376448661a4763 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Wed, 4 Sep 2024 19:40:28 +0200 Subject: [PATCH 4/4] Draft for prior --- machine_learning_hep/analysis/analyzer_jets.py | 15 +++++++++++---- .../data_run3/database_ml_parameters_LcJet_pp.yml | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index dbb4fb0cdf..3691296820 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -1051,9 +1051,11 @@ def _build_effkine(self, h_nocuts, h_cuts): h_cuts.Divide(h_nocuts) return h_cuts - def _build_response_matrix(self, h_response, h_eff = None): + + def _build_response_matrix(self, h_response, h_eff = None, frac_flat = 0.): rm = ROOT.RooUnfoldResponse( project_hist(h_response, [0, 1], {}), project_hist(h_response, [2, 3], {})) + h_gen = project_hist(h_response, [2, 3], {}) for hbin in itertools.product( enumerate(get_axis(h_response, 0).GetXbins(), 1), enumerate(get_axis(h_response, 1).GetXbins(), 1), @@ -1066,8 +1068,12 @@ def _build_response_matrix(self, h_response, h_eff = None): if np.isclose(eff, 0.): self.logger.error('efficiency 0 for %s', hbin[4]) continue - for _ in range(int(n)): - rm.Fill(hbin[0][1], hbin[1][1], hbin[2][1], hbin[3][1], 1./eff) + if (cnt_gen := h_gen.GetBinContent(hbin[2][0], hbin[3][0])) > 0.: + fac = 1. + if frac_flat > 0.: + fac += frac_flat * (1. / cnt_gen - 1.) + for _ in range(int(n)): + rm.Fill(hbin[0][1], hbin[1][1], hbin[2][1], hbin[3][1], 1./eff * fac) # rm.Mresponse().Print() return rm @@ -1096,7 +1102,8 @@ def _unfold(self, hist, var, mcordata): self.logger.error('Response matrix for %s not available, cannot unfold', var + suffix) return [] response_matrix_pr = self._build_response_matrix( - h_response, self.hcandeff['pr'] if mcordata == 'data' else None) + h_response, self.hcandeff['pr'] if mcordata == 'data' else None, + self.cfg('unfolding_frac_flat', 0.)) self._save_hist(response_matrix_pr.Hresponse(), f'uf/h_ptjet-{var}-responsematrix_pr_lin_{mcordata}.png', 'colz') diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml index 0c842cf154..24d5746e04 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml @@ -470,6 +470,7 @@ LcJet_pp: unfolding_iterations: 8 # used, maximum iteration unfolding_iterations_sel: 5 # used, selected iteration # systematics + unfolding_frac_flat: .1 fd_folding_method: 3d fd_root: '/data2/vkucera/powheg/trees_powheg_fd_F05_R05.root'