alisw · qgp · Jun 19, 2024 · May 22, 2024 · May 22, 2024 · Jun 18, 2024
diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py
@@ -22,6 +22,7 @@
 from ROOT import TF1, TCanvas, TFile, gStyle
 
 from machine_learning_hep.analysis.analyzer import Analyzer
+from machine_learning_hep.fitting.roofitter import RooFitter
 from machine_learning_hep.utilities import folding
 from machine_learning_hep.utils.hist import (bin_array, create_hist,
                                              fill_hist_fast, get_axis, get_dim,
@@ -76,11 +77,13 @@ def __init__(self, datap, case, typean, period):
         self.n_colls = {}
 
         self.path_fig = Path(f'fig/{self.case}/{self.typean}')
-        for folder in ['qa', 'fit', 'sideband', 'signalextr', 'fd', 'uf']:
+        for folder in ['qa', 'fit', 'roofit', 'sideband', 'signalextr', 'fd', 'uf']:
             (self.path_fig / folder).mkdir(parents=True, exist_ok=True)
 
         self.rfigfile = TFile(str(self.path_fig / 'output.root'), 'recreate')
 
+        self.fitter = RooFitter()
+
     #region helpers
     def _save_canvas(self, canvas, filename):
         # folder = self.d_resultsallpmc if mcordata == 'mc' else self.d_resultsallpdata
@@ -139,8 +142,10 @@ def calculate_efficiencies(self):
         cats = {'pr', 'np'}
         rfilename = self.n_fileeff
         with TFile(rfilename) as rfile:
-            h_gen = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_gen'), [1], {}) for cat in cats}
-            h_det = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_det'), [1], {}).Clone(f'h_eff_{cat}')
+            bins_ptjet = (1, 2)
+            # TODO: fix projection range
+            h_gen = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_gen'), [1], {0: bins_ptjet}) for cat in cats}
+            h_det = {cat: project_hist(rfile.Get(f'h_ptjet-pthf_{cat}_det'), [1], {0: bins_ptjet}).Clone(f'h_eff_{cat}')
                      for cat in cats}
 
             for cat in cats:
@@ -173,6 +178,13 @@ def _correct_efficiency(self, hist, ipt):
 
 
     #region fitting
+    def _roofit_mass(self, hist, filename = None):
+        _ws, frame = self.fitter.fit_mass(hist, self.cfg('mass_roofit', {}), True)
+        c = TCanvas()
+        frame.Draw()
+        self._save_canvas(c, filename)
+
+
     def _fit_mass(self, hist, filename = None):
         if hist.GetEntries() == 0:
             raise UserWarning('Cannot fit histogram with no entries')
@@ -231,10 +243,11 @@ def fit(self):
             with TFile(rfilename) as rfile:
                 h = rfile.Get('h_mass-ptjet-pthf')
                 for ipt in range(get_nbins(h, 2)):
-                    h_invmass = project_hist(h, [0], {2: (ipt+1, ipt+1)})
-                    if h_invmass.GetEntries() < 100:
+                    h_invmass = project_hist(h, [0], {2: (ipt+1, ipt+1)}) # TODO: under-/overflow for jets
+                    if h_invmass.GetEntries() < 100: # TODO: reconsider criterion
                         self.logger.error('Not enough entries to fit for %s bin %d', mcordata, ipt)
                         continue
+                    self._roofit_mass(h_invmass, f'roofit/h_mass_fitted_{ipt}_{mcordata}.png')
                     fit_res, _, func_bkg = self._fit_mass( h_invmass, f'fit/h_mass_fitted_{ipt}_{mcordata}.png')
                     if fit_res and fit_res.Get() and fit_res.IsValid():
                         self.fit_sigma[mcordata][ipt] = fit_res.Parameter(2)

diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml
@@ -12,7 +12,7 @@
 ##   along with this program. if not, see <https://www.gnu.org/licenses/>. ##
 #############################################################################

 D0jet_pp:
  doml: true
  mass: 1.864
  sel_reco_unp: "fPt > 1."
@@ -20,8 +20,8 @@
  sel_gen_unp: "fPt > 1. and fPt < 10."
  sel_cen_unp: null
  sel_good_evt_unp: null
  sel_reco_skim: [null,null,null,null,null,null,null] # sel_skim_binmin bins
  sel_gen_skim: [null,null,null,null,null,null,null] # sel_skim_binmin bins
  sel_skim_binmin: [1,2,4,6,8,12,24] # skimming pt bins (sel_skim_binmin bins)
  sel_skim_binmax:   [2,4,6,8,12,24,48] # skimming pt bins (sel_skim_binmin bins)
  var_binning: fPt
@@ -48,7 +48,7 @@
    ismcbkg: [[],[1]]
    ismcrefl: [[1],[1]] # probably missing from tree creator

  #region dfs
  dfs:
    read:
      evtorig:
@@ -71,9 +71,9 @@
           O2hfd0pbase: [fPt, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen]
           O2d0cmcpjeto: [fIndexD0CMCPJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
           O2d0cmcpjetmo: [fIndexArrayD0CMCDJetOs_hf, fIndexArrayD0CMCDJetOs_geo, fIndexArrayD0CMCDJetOs_pt]
-          O2d0cmcpjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta] # fNSub2DR, fNSub1, fNSub2
+          O2d0cmcpjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
         extra:
           fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated
         tags:
          isstd: {var: fFlagMcMatchGen, req: [[1],[]]}
          ismcsignal: {var: fFlagMcMatchGen, req: [[0],[]], abs: true}
@@ -105,14 +105,14 @@
           O2hfd0sel: [fCandidateSelFlag]
           O2d0cmcdjeto: [fIndexD0CMCDJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
           O2d0cmcdjetmo: [fIndexArrayD0CMCPJetOs_hf, fIndexArrayD0CMCPJetOs_geo, fIndexArrayD0CMCPJetOs_pt]
-          O2d0cmcdjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta] # fNSub2DR, fNSub1, fNSub2
+          O2d0cmcdjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
         extra:
           fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated
           isd0: fFlagMcMatchRec == 1
          isd0bar: fFlagMcMatchRec == -1
        tags:
          ismcsignal: {var: fFlagMcMatchRec, req: [[1], []], abs: True}
          ismcbkg: {var: fFlagMcMatchRec, req: [[], [1]], abs: True}
          seld0: {var: fCandidateSelFlag, req: [[0], []]}
          seld0bar: {var: fCandidateSelFlag, req: [[1], []]}
          ismcprompt: {var: fOriginMcRec, req: [[0],[]]}
@@ -143,7 +143,7 @@
          O2d0cjeto: [fIndexD0CJetCOs, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
          O2d0cjetsso: [fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
        extra:
          fY: log((sqrt(1.864**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(1.864**2 + fPt**2)) #TODO : change mass or make sure Lc mass is updated
        filter: "fPt >= 1. and abs(fY) <= 0.8"

    merge:
@@ -175,7 +175,7 @@
  variables:
    var_all: [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]
    var_training: [[fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1], [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]]
      #TODO: add new variables for dca, max_norm_d0d0exp
      # sel_skim_binmin bins
    var_boundaries: [fCosThetaStar, fPtProng]
    var_correlation:
@@ -227,7 +227,7 @@
    respfilename: "resphisto.root"
    crossfilename: "cross_section_tot.root"

  #region multi
  multi:
    data:
      nprocessesparallel: 20
@@ -238,7 +238,7 @@
       seedmerge: [12] #list of periods
       period: [LHC22o] #list of periods
       select_period: [1]
-      prefix_dir: /data2/MLhep/real/train_207280/
+      prefix_dir: /data2/MLhep/real/train_224749/
       unmerged_tree_dir: [/alice/] #list of periods
       pkl: ['${USER}/d0jet/pkl'] #list of periods
       pkl_skimmed: ['${USER}/d0jet/pklsk'] #list of periods
@@ -253,10 +253,10 @@
       chunksizeskim: [1000] #list of periods
       fracmerge: [1.0] #list of periods
       seedmerge: [12] #list of periods
-      period: [LHC22b1b] #list of periods
+      period: [LHC24d3b] #list of periods
       select_period: [1]
-      prefix_dir: /data2/MLhep/sim/train_184858/
-      unmerged_tree_dir: [alice/cern.ch/user/a/alihyperloop/outputs/0018/184858/24138]
+      prefix_dir: /data2/MLhep/sim/train_222234/
+      unmerged_tree_dir: [alice/]
       pkl: ['${USER}/d0jet/pkl'] #list of periods
       pkl_skimmed: ['${USER}/d0jet/pklsk'] #list of periods
       pkl_skimmed_merge_for_ml: ['${USER}/d0jet/pklskml'] #list of periods
@@ -377,6 +377,16 @@
         lntheta-lnkt:
           arraycols: [3, 4]
 
+      mass_roofit:
+        range: [1.675, 2.06]
+        components:
+          sig:
+            fn: 'Gaussian::sig(m[1.675,2.06], mean[1.85,1.87], sigma_g1[.01,.08])'
+          bkg:
+            fn: 'Exponential::bkg(m[1.675,2.06], alpha[-100,0])'
+          sum:
+            fn: 'SUM::sum(frac[0.,1.]*sig, bkg)'
+
       mass_fit:
         func_sig: 'gaus'
         func_bkg: 'expo'

diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcJet_pp.yml
@@ -55,6 +55,9 @@ LcJet_pp:
           O2hf3pcollbase: [fNumContrib]
         extra:
           fIsEventReject: 0
+      collcnt:
+        trees:
+          O2lccollcount: [fReadCounts, fWrittenCounts]
 
       # collgen:
       #   level: gen
@@ -68,7 +71,7 @@ LcJet_pp:
           O2hf3ppbase: [fPt, fEta, fPhi, fFlagMcMatchGen, fOriginMcGen]
           O2lccmcpjeto: [fIndexLcCMCPJetCOs, fIndexHf3PPBases_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
           O2lccmcpjetmo: [fIndexArrayLcCMCDJetOs_hf, fIndexArrayLcCMCDJetOs_geo, fIndexArrayLcCMCDJetOs_pt]
-          O2lccmcpjetsso: [fIndexLcCMCPJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
+          O2lccmcpjetsso: [fIndexLcCMCPJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
         extra:
           fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
         tags:
@@ -100,7 +103,7 @@ LcJet_pp:
           O2hf3psel: [fCandidateSelFlag]
           O2lccmcdjeto: [fIndexLcCMCDJetCOs, fIndexHf3PBases_0, fJetPt, fJetPhi, fJetEta, fJetNConstituents]
           O2lccmcdjetmo: [fIndexArrayLcCMCPJetOs_hf, fIndexArrayLcCMCPJetOs_geo, fIndexArrayLcCMCPJetOs_pt]
-          O2lccmcdjetsso: [fIndexLcCMCDJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
+          O2lccmcdjetsso: [fIndexLcCMCDJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
         extra:
           fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
         tags:
@@ -132,7 +135,7 @@ LcJet_pp:
           O2hf3psel: [fCandidateSelFlag]
           O2lccjeto: [fIndexLcCJetCOs, fIndexHf3PBases_0,
                     fJetPt, fJetPhi, fJetEta, fJetNConstituents]
-          O2lccjetsso: [fIndexLcCJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta]
+          O2lccjetsso: [fIndexLcCJetOs, fEnergyMother, fPtLeading, fPtSubLeading, fTheta, fNSub2DR, fNSub1, fNSub2]
         extra:
           fY: log((sqrt(2.286**2 + (fPt * cosh(fEta))**2) + fPt * sinh(fEta)) / sqrt(2.286**2 + fPt**2))
         filter: "fPt > 1."
@@ -160,6 +163,9 @@ LcJet_pp:
         source: evtorig
         file: AnalysisResultsEvt.parquet
         filter: "fIsEventReject == 0"
+      collcnt:
+        level: data
+        file: AnalysisResultsCollCnt.parquet
 
   variables:
     var_all: [fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, fCpa, fCpaXY, fImpactParameter0, fImpactParameter1, fErrorImpactParameter0, fErrorImpactParameter1, fNSigTpcPi0, fNSigTpcKa0, fNSigTofPi0, fNSigTofKa0, fNSigTpcPi1, fNSigTpcKa1, fNSigTofPi1, fNSigTofKa1]
@@ -203,6 +209,7 @@ LcJet_pp:
     namefile_unmerged_tree: AO2D.root
     namefile_reco: AnalysisResultsReco.parquet
     namefile_evt: AnalysisResultsEvt.parquet
+    namefile_collcnt: AnalysisResultsCollCnt.parquet
     namefile_evtvalroot: AnalysisResultsROOTEvtVal.root
     namefile_evtorig: AnalysisResultsEvtOrig.parquet
     namefile_gen: AnalysisResultsGen.parquet
@@ -226,8 +233,8 @@ LcJet_pp:
       seedmerge: [12] #list of periods
       period: [LHC22o] #list of periods
       select_period: [1]
-      prefix_dir: /data2/MLhep/real/train_191951/
-      unmerged_tree_dir: [alice/cern.ch/user/a/alihyperloop/jobs/0037/hy_372811] #list of periods
+      prefix_dir: /data2/MLhep/real/train_224750/
+      unmerged_tree_dir: [alice/] #list of periods
       pkl: ['${USER}/lcjet/pkl'] #list of periods
       pkl_skimmed: ['${USER}/lcjet/pklsk'] #list of periods
       pkl_skimmed_merge_for_ml: ['${USER}/lcjet/pklskml'] #list of periods
@@ -241,10 +248,10 @@ LcJet_pp:
       chunksizeskim: [1000] #list of periods
       fracmerge: [1.0] #list of periods
       seedmerge: [12] #list of periods
-      period: [mctest_nima] #list of periods
+      period: [LHC24d3b] #list of periods
       select_period: [1]
-      prefix_dir: /data2/MLhep/sim/train_191952/
-      unmerged_tree_dir: [merged]
+      prefix_dir: /data2/MLhep/sim/train_223835/
+      unmerged_tree_dir: [alice/]
       pkl: ['${USER}/lcjet/pkl'] #list of periods
       pkl_skimmed: ['${USER}/lcjet/pklsk'] #list of periods
       pkl_skimmed_merge_for_ml: ['${USER}/lcjet/pklskml'] #list of periods
@@ -364,13 +371,23 @@ LcJet_pp:
         lntheta-lnkt:
           arraycols: [3, 4]
 
+      mass_roofit:
+        range: [2.0, 2.5]
+        components:
+          sig:
+            fn: 'Gaussian::sig(m[2.1,2.48], mean[2.28,2.29], sigma_g1[.005,.03])'
+          bkg:
+            fn: 'Exponential::bkg(m[2.1, 2.48], alpha[-100,0])'
+          sum:
+            fn: 'SUM::sum(frac[0.,1.]*sig, bkg)'
+
       mass_fit:
         func_sig: 'gaus'
         func_bkg: 'expo'
         # par_start:
         # par_fix: {1: 2.286}
         par_constrain: {1: [2.28, 2.29], 2: [.005, .03]}
-        range: [2., 2.5]
+        range: [2.08, 2.48]
       mass_fit_lim: [1.9, 2.6] # histogram range of the invariant mass distribution [GeV/c^2]
       bin_width: 0.005 # bin width of the invariant mass histogram
       efficiency:

diff --git a/machine_learning_hep/fitting/roofitter.py b/machine_learning_hep/fitting/roofitter.py
@@ -0,0 +1,38 @@
+#############################################################################
+##  © Copyright CERN 2024. All rights not expressly granted are reserved.  ##
+##                                                                         ##
+## This program is free software: you can redistribute it and/or modify it ##
+##  under the terms of the GNU General Public License as published by the  ##
+## Free Software Foundation, either version 3 of the License, or (at your  ##
+## option) any later version. This program is distributed in the hope that ##
+##  it will be useful, but WITHOUT ANY WARRANTY; without even the implied  ##
+##     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    ##
+##           See the GNU General Public License for more details.          ##
+##    You should have received a copy of the GNU General Public License    ##
+##   along with this program. if not, see <https://www.gnu.org/licenses/>. ##
+#############################################################################
+
+import ROOT
+
+# pylint: disable=too-few-public-methods
+# (temporary until we add more functionality)
+class RooFitter:
+    def fit_mass(self, hist, fit_spec, plot = False):
+        if hist.GetEntries() == 0:
+            raise UserWarning('Cannot fit histogram with no entries')
+        ws = ROOT.RooWorkspace("ws")
+        for comp, spec in fit_spec.get('components', {}).items():
+            ws.factory(spec['fn'])
+        m = ws.var('m')
+        dh = ROOT.RooDataHist("dh", "dh", [m], Import=hist)
+        model = ws.pdf('sum')
+        model.fitTo(dh, PrintLevel=-1)
+        frame = m.frame() if plot else None
+        if plot:
+            dh.plotOn(frame)
+            model.plotOn(frame)
+            for comp in fit_spec.get('components', {}):
+                if comp != 'sum':
+                    model.plotOn(frame, ROOT.RooFit.Components(comp),
+                                 ROOT.RooFit.LineStyle(ROOT.ELineStyle.kDashed))
+        return (ws, frame)
diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py
@@ -148,7 +148,7 @@ def process_histomass_single(self, index):
             histonorm.SetBinContent(1, len(dfquery(dfevtorig, self.s_evtsel)))
             if self.mcordata == 'data':
                 dfcollcnt = read_df(self.l_collcnt[index])
-                collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[1] for ar in dfcollcnt['fReadCounts']))
+                collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in dfcollcnt['fReadCounts']))
                 self.logger.info('sampled %g collisions', collcnt)
                 histonorm.SetBinContent(2, collcnt)
             get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}')
@@ -286,6 +286,8 @@ def process_efficiency_single(self, index):
                                 (df[f'{var}_gen'] >= var_min) & (df[f'{var}_gen'] < var_max)]
                     fill_hist(h_effkine[(cat, 'det', 'cut', var)], df[['fJetPt', var]])
 
+                    # TODO: probably need to fill response matrix weighted by efficiency
+                    # or defer to analyser by storing (5-dim histogram)
                     fill_response(response_matrix[(cat, var)], df[['fJetPt', f'{var}', 'fJetPt_gen', f'{var}_gen']])
 
                     df = dfmatch[cat]