Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RooFit-based fitter and update DBs #897

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions machine_learning_hep/analysis/analyzer_jets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from ROOT import TF1, TCanvas, TFile, gStyle

from machine_learning_hep.analysis.analyzer import Analyzer
from machine_learning_hep.fitting.roofitter import RooFitter
from machine_learning_hep.utilities import folding
from machine_learning_hep.utils.hist import (bin_array, create_hist,
fill_hist_fast, get_axis, get_dim,
Expand Down Expand Up @@ -76,11 +77,13 @@ def __init__(self, datap, case, typean, period):
self.n_colls = {}

self.path_fig = Path(f'fig/{self.case}/{self.typean}')
for folder in ['qa', 'fit', 'sideband', 'signalextr', 'fd', 'uf']:
for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']:
(self.path_fig / folder).mkdir(parents=True, exist_ok=True)

self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate')

self.fitter = RooFitter()

#region helpers
def _save_canvas(self, canvas, filename):
# folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata
Expand Down Expand Up @@ -139,8 +142,10 @@ def calculate_efficiencies(self):
cats = {'pr', 'np'}
rfilename = self.n_fileeff
with TFile(rfilename) as rfile:
h_gen = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_gen'), [1], {}) for cat in cats}
h_det = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_det'), [1], {}).Clone(f'h_eff_{cat}')
bins_ptjet = (1, 2)
# TODO: fix projection range
h_gen = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_gen'), [1], {0: bins_ptjet}) for cat in cats}
h_det = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_det'), [1], {0: bins_ptjet}).Clone(f'h_eff_{cat}')
for cat in cats}

for cat in cats:
Expand Down Expand Up @@ -173,6 +178,13 @@ def _correct_efficiency(self, hist, ipt):


#region fitting
def _roofit_mass(self, hist, filename = None):
_ws, frame = self.fitter.fit_mass(hist, self.cfg('mass_roofit', {}), True)
c = TCanvas()
frame.Draw()
self._save_canvas(c, filename)


def _fit_mass(self, hist, filename = None):
if hist.GetEntries() == 0:
raise UserWarning('Cannot fit histogram with no entries')
Expand Down Expand Up @@ -231,10 +243,11 @@ def fit(self):
with TFile(rfilename) as rfile:
h = rfile.Get('h_mass-ptjet-pthf')
for ipt in range(get_nbins(h, 2)):
h_invmass = project_hist(h, [0], {2: (ipt+1, ipt+1)})
if h_invmass.GetEntries() < 100:
h_invmass = project_hist(h, [0], {2: (ipt+1, ipt+1)}) # TODO: under-/overflow for jets
if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
self.logger.error('Not enough entries to fit for %s bin %d', mcordata, ipt)
continue
self._roofit_mass(h_invmass, f'roofit/h_mass_fitted_{ipt}_{mcordata}.png')
fit_res, _, func_bkg = self._fit_mass( h_invmass, f'fit/h_mass_fitted_{ipt}_{mcordata}.png')
if fit_res and fit_res.Get() and fit_res.IsValid():
self.fit_sigma[mcordata][ipt] = fit_res.Parameter(2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
## along with this program. if not, see <https://www.gnu.org/licenses/>. ##
#############################################################################

D0jet_pp:

Check warning on line 15 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

15:1 [document-start] missing document start "---"
doml: true
mass: 1.864
sel_reco_unp: "fPt > 1."
Expand All @@ -20,8 +20,8 @@
sel_gen_unp: "fPt > 1. and fPt < 10."
sel_cen_unp: null
sel_good_evt_unp: null
sel_reco_skim: [null,null,null,null,null,null,null] # sel_skim_binmin bins

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:24 [commas] too few spaces after comma

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:29 [commas] too few spaces after comma

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:34 [commas] too few spaces after comma

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:39 [commas] too few spaces after comma

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:44 [commas] too few spaces after comma

Check failure on line 23 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

23:49 [commas] too few spaces after comma
sel_gen_skim: [null,null,null,null,null,null,null] # sel_skim_binmin bins

Check failure on line 24 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

24:23 [commas] too few spaces after comma

Check failure on line 24 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

24:28 [commas] too few spaces after comma

Check failure on line 24 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

24:33 [commas] too few spaces after comma

Check failure on line 24 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

24:38 [commas] too few spaces after comma
sel_skim_binmin: [1,2,4,6,8,12,24] # skimming pt bins (sel_skim_binmin bins)
sel_skim_binmax: [2,4,6,8,12,24,48] # skimming pt bins (sel_skim_binmin bins)
var_binning: fPt
Expand All @@ -48,7 +48,7 @@
ismcbkg: [[],[1]]
ismcrefl: [[1],[1]] # probably missing from tree creator

#region dfs

Check warning on line 51 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

51:4 [comments] missing starting space in comment
dfs:
read:
evtorig:
Expand All @@ -71,9 +71,9 @@
O2hfd0pbase: [fPt, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen]
O2d0cmcpjeto: [fIndexD0CMCPJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2d0cmcpjetmo: [fIndexArrayD0CMCDJetOs_hf, fIndexArrayD0CMCDJetOs_geo, fIndexArrayD0CMCDJetOs_pt]
O2d0cmcpjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta] # fNSub2DR, fNSub1, fNSub2
O2d0cmcpjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated

Check warning on line 76 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

76:107 [comments] missing starting space in comment
tags:
isstd: {var: fFlagMcMatchGen, req: [[1],[]]}
ismcsignal: {var: fFlagMcMatchGen, req: [[0],[]], abs: true}
Expand Down Expand Up @@ -105,14 +105,14 @@
O2hfd0sel: [fCandidateSelFlag]
O2d0cmcdjeto: [fIndexD0CMCDJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2d0cmcdjetmo: [fIndexArrayD0CMCPJetOs_hf, fIndexArrayD0CMCPJetOs_geo, fIndexArrayD0CMCPJetOs_pt]
O2d0cmcdjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta] # fNSub2DR, fNSub1, fNSub2
O2d0cmcdjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated

Check warning on line 110 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

110:107 [comments] missing starting space in comment
isd0: fFlagMcMatchRec == 1
isd0bar: fFlagMcMatchRec == -1
tags:
ismcsignal: {var: fFlagMcMatchRec, req: [[1], []], abs: True}

Check warning on line 114 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

114:67 [truthy] truthy value should be one of [false, true]
ismcbkg: {var: fFlagMcMatchRec, req: [[], [1]], abs: True}

Check warning on line 115 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

115:64 [truthy] truthy value should be one of [false, true]
seld0: {var: fCandidateSelFlag, req: [[0], []]}
seld0bar: {var: fCandidateSelFlag, req: [[1], []]}
ismcprompt: {var: fOriginMcRec, req: [[0],[]]}
Expand Down Expand Up @@ -143,7 +143,7 @@
O2d0cjeto: [fIndexD0CJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2d0cjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated

Check warning on line 146 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

146:107 [comments] missing starting space in comment
filter: "fPt >= 1. and abs(fY) <= 0.8"

merge:
Expand Down Expand Up @@ -175,7 +175,7 @@
variables:
var_all: [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]
var_training: [[fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]]
#TODO: add new variables for dca, max_norm_d0d0exp

Check warning on line 178 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

178:8 [comments] missing starting space in comment

Check warning on line 178 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

178:7 [comments-indentation] comment not indented like content
# sel_skim_binmin bins
var_boundaries: [fCosThetaStar, fPtProng]
var_correlation:
Expand Down Expand Up @@ -227,7 +227,7 @@
respfilename: "resphisto.root"
crossfilename: "cross_section_tot.root"

#region multi

Check warning on line 230 in machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml

View workflow job for this annotation

GitHub Actions / MegaLinter

230:4 [comments] missing starting space in comment
multi:
data:
nprocessesparallel: 20
Expand All @@ -238,7 +238,7 @@
seedmerge: [12] #list of periods
period: [LHC22o] #list of periods
select_period: [1]
prefix_dir: /data2/MLhep/real/train_207280/
prefix_dir: /data2/MLhep/real/train_224749/
unmerged_tree_dir: [/alice/] #list of periods
pkl: ['${USER}/d0jet/pkl'] #list of periods
pkl_skimmed: ['${USER}/d0jet/pklsk'] #list of periods
Expand All @@ -253,10 +253,10 @@
chunksizeskim: [1000] #list of periods
fracmerge: [1.0] #list of periods
seedmerge: [12] #list of periods
period: [LHC22b1b] #list of periods
period: [LHC24d3b] #list of periods
select_period: [1]
prefix_dir: /data2/MLhep/sim/train_184858/
unmerged_tree_dir: [alice/cern.ch/user/a/alihyperloop/outputs/0018/184858/24138]
prefix_dir: /data2/MLhep/sim/train_222234/
unmerged_tree_dir: [alice/]
pkl: ['${USER}/d0jet/pkl'] #list of periods
pkl_skimmed: ['${USER}/d0jet/pklsk'] #list of periods
pkl_skimmed_merge_for_ml: ['${USER}/d0jet/pklskml'] #list of periods
Expand Down Expand Up @@ -377,6 +377,16 @@
lntheta-lnkt:
arraycols: [3, 4]

mass_roofit:
range: [1.675, 2.06]
components:
sig:
fn: 'Gaussian::sig(m[1.675,2.06], mean[1.85,1.87], sigma_g1[.01,.08])'
bkg:
fn: 'Exponential::bkg(m[1.675,2.06], alpha[-100,0])'
sum:
fn: 'SUM::sum(frac[0.,1.]*sig, bkg)'

mass_fit:
func_sig: 'gaus'
func_bkg: 'expo'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ LcJet_pp:
O2hf3pcollbase: [fNumContrib]
extra:
fIsEventReject: 0
collcnt:
trees:
O2lccollcount: [fReadCounts, fWrittenCounts]

# collgen:
# level: gen
Expand All @@ -68,7 +71,7 @@ LcJet_pp:
O2hf3ppbase: [fPt, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen]
O2lccmcpjeto: [fIndexLcCMCPJetCOs, fIndexHf3PPBases_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2lccmcpjetmo: [fIndexArrayLcCMCDJetOs_hf, fIndexArrayLcCMCDJetOs_geo, fIndexArrayLcCMCDJetOs_pt]
O2lccmcpjetsso: [fIndexLcCMCPJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
O2lccmcpjetsso: [fIndexLcCMCPJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
tags:
Expand Down Expand Up @@ -100,7 +103,7 @@ LcJet_pp:
O2hf3psel: [fCandidateSelFlag]
O2lccmcdjeto: [fIndexLcCMCDJetCOs, fIndexHf3PBases_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2lccmcdjetmo: [fIndexArrayLcCMCPJetOs_hf, fIndexArrayLcCMCPJetOs_geo, fIndexArrayLcCMCPJetOs_pt]
O2lccmcdjetsso: [fIndexLcCMCDJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
O2lccmcdjetsso: [fIndexLcCMCDJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
tags:
Expand Down Expand Up @@ -132,7 +135,7 @@ LcJet_pp:
O2hf3psel: [fCandidateSelFlag]
O2lccjeto: [fIndexLcCJetCOs, fIndexHf3PBases_0,
fJetPt, fJetPhi, fJetEta, fJetNConstituents]
O2lccjetsso: [fIndexLcCJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
O2lccjetsso: [fIndexLcCJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
extra:
fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
filter: "fPt > 1."
Expand Down Expand Up @@ -160,6 +163,9 @@ LcJet_pp:
source: evtorig
file: AnalysisResultsEvt.parquet
filter: "fIsEventReject == 0"
collcnt:
level: data
file: AnalysisResultsCollCnt.parquet

variables:
var_all: [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]
Expand Down Expand Up @@ -203,6 +209,7 @@ LcJet_pp:
namefile_unmerged_tree: AO2D.root
namefile_reco: AnalysisResultsReco.parquet
namefile_evt: AnalysisResultsEvt.parquet
namefile_collcnt: AnalysisResultsCollCnt.parquet
namefile_evtvalroot: AnalysisResultsROOTEvtVal.root
namefile_evtorig: AnalysisResultsEvtOrig.parquet
namefile_gen: AnalysisResultsGen.parquet
Expand All @@ -226,8 +233,8 @@ LcJet_pp:
seedmerge: [12] #list of periods
period: [LHC22o] #list of periods
select_period: [1]
prefix_dir: /data2/MLhep/real/train_191951/
unmerged_tree_dir: [alice/cern.ch/user/a/alihyperloop/jobs/0037/hy_372811] #list of periods
prefix_dir: /data2/MLhep/real/train_224750/
unmerged_tree_dir: [alice/] #list of periods
pkl: ['${USER}/lcjet/pkl'] #list of periods
pkl_skimmed: ['${USER}/lcjet/pklsk'] #list of periods
pkl_skimmed_merge_for_ml: ['${USER}/lcjet/pklskml'] #list of periods
Expand All @@ -241,10 +248,10 @@ LcJet_pp:
chunksizeskim: [1000] #list of periods
fracmerge: [1.0] #list of periods
seedmerge: [12] #list of periods
period: [mctest_nima] #list of periods
period: [LHC24d3b] #list of periods
select_period: [1]
prefix_dir: /data2/MLhep/sim/train_191952/
unmerged_tree_dir: [merged]
prefix_dir: /data2/MLhep/sim/train_223835/
unmerged_tree_dir: [alice/]
pkl: ['${USER}/lcjet/pkl'] #list of periods
pkl_skimmed: ['${USER}/lcjet/pklsk'] #list of periods
pkl_skimmed_merge_for_ml: ['${USER}/lcjet/pklskml'] #list of periods
Expand Down Expand Up @@ -364,13 +371,23 @@ LcJet_pp:
lntheta-lnkt:
arraycols: [3, 4]

mass_roofit:
range: [2.0, 2.5]
components:
sig:
fn: 'Gaussian::sig(m[2.1,2.48], mean[2.28,2.29], sigma_g1[.005,.03])'
bkg:
fn: 'Exponential::bkg(m[2.1, 2.48], alpha[-100,0])'
sum:
fn: 'SUM::sum(frac[0.,1.]*sig, bkg)'

mass_fit:
func_sig: 'gaus'
func_bkg: 'expo'
# par_start:
# par_fix: {1: 2.286}
par_constrain: {1: [2.28, 2.29], 2: [.005, .03]}
range: [2., 2.5]
range: [2.08, 2.48]
mass_fit_lim: [1.9, 2.6] # histogram range of the invariant mass distribution [GeV/c^2]
bin_width: 0.005 # bin width of the invariant mass histogram
efficiency:
Expand Down
38 changes: 38 additions & 0 deletions machine_learning_hep/fitting/roofitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#############################################################################
## © Copyright CERN 2024. All rights not expressly granted are reserved. ##
## ##
## This program is free software: you can redistribute it and/or modify it ##
## under the terms of the GNU General Public License as published by the ##
## Free Software Foundation, either version 3 of the License, or (at your ##
## option) any later version. This program is distributed in the hope that ##
## it will be useful, but WITHOUT ANY WARRANTY; without even the implied ##
## warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ##
## See the GNU General Public License for more details. ##
## You should have received a copy of the GNU General Public License ##
## along with this program. if not, see <https://www.gnu.org/licenses/>. ##
#############################################################################

import ROOT

# pylint: disable=too-few-public-methods
# (temporary until we add more functionality)
class RooFitter:
def fit_mass(self, hist, fit_spec, plot = False):
if hist.GetEntries() == 0:
raise UserWarning('Cannot fit histogram with no entries')
ws = ROOT.RooWorkspace("ws")
for comp, spec in fit_spec.get('components', {}).items():
ws.factory(spec['fn'])
m = ws.var('m')
dh = ROOT.RooDataHist("dh", "dh", [m], Import=hist)
model = ws.pdf('sum')
model.fitTo(dh, PrintLevel=-1)
frame = m.frame() if plot else None
if plot:
dh.plotOn(frame)
model.plotOn(frame)
for comp in fit_spec.get('components', {}):
if comp != 'sum':
model.plotOn(frame, ROOT.RooFit.Components(comp),
ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed))
return (ws, frame)
4 changes: 3 additions & 1 deletion machine_learning_hep/processer_jet.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def process_histomass_single(self, index):
histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel)))
if self.mcordata == 'data':
dfcollcnt = read_df(self.l_collcnt[index])
collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[1] for ar in dfcollcnt['fReadCounts']))
collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in dfcollcnt['fReadCounts']))
self.logger.info('sampled %g collisions', collcnt)
histonorm.SetBinContent(2, collcnt)
get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}')
Expand Down Expand Up @@ -286,6 +286,8 @@ def process_efficiency_single(self, index):
(df[f'{var}_gen'] >= var_min) & (df[f'{var}_gen'] < var_max)]
fill_hist(h_effkine[(cat, 'det', 'cut', var)], df[['fJetPt', var]])

# TODO: probably need to fill response matrix weighted by efficiency
# or defer to analyser by storing (5-dim histogram)
fill_response(response_matrix[(cat, var)], df[['fJetPt', f'{var}', 'fJetPt_gen', f'{var}_gen']])

df = dfmatch[cat]
Expand Down