alisw · qgp · Nov 21, 2023 · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023
diff --git a/machine_learning_hep/analysis/analyzer.py b/machine_learning_hep/analysis/analyzer.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -25,10 +25,11 @@ def __init__(self, datap, case, typean, period):
 
         # The only thing here is to dump the database in the data analysis directory
         for mcordata in ("mc", "data"):
-            prefix_dir_res = datap["mlapplication"][mcordata].get("prefix_dir_res", "")
-            results_dir = prefix_dir_res + datap["analysis"][typean][mcordata]["results"][period] \
+            dp = datap["analysis"][typean][mcordata]
+            prefix_dir_res = dp.get("prefix_dir_res", "")
+            results_dir = prefix_dir_res + dp["results"][period] \
                     if period is not None \
-                    else prefix_dir_res + datap["analysis"][typean][mcordata]["resultsallp"]
+                    else prefix_dir_res + dp["resultsallp"]
             if not exists(results_dir):
                 # create otput directories in case they do not exist
                 makedirs(results_dir)

diff --git a/machine_learning_hep/analysis/analyzerdhadrons.py b/machine_learning_hep/analysis/analyzerdhadrons.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -20,7 +20,7 @@
 # pylint: disable=unused-wildcard-import, wildcard-import
 #from array import array
 #import itertools
-# pylint: disable=import-error, no-name-in-module, unused-import
+# pylint: disable=import-error, no-name-in-module, unused-import, consider-using-f-string
 # from root_numpy import hist2array, array2hist
 from ROOT import TFile, TH1F, TH2F, TCanvas, TPad, TF1, TH1D
 from ROOT import gStyle, TLegend, TLine, TText, TPaveText, TArrow
@@ -54,10 +54,15 @@ def __init__(self, datap, case, typean, period):
         self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]
         self.triggerbit = datap["analysis"][self.typean].get("triggerbit", "")
 
-        self.d_resultsallpmc = datap["analysis"][typean]["mc"]["results"][period] \
-            if period is not None else datap["analysis"][typean]["mc"]["resultsallp"]
-        self.d_resultsallpdata = datap["analysis"][typean]["data"]["results"][period] \
-            if period is not None else datap["analysis"][typean]["data"]["resultsallp"]
+        dp = datap["analysis"][self.typean]
+        self.d_prefix_mc = dp["mc"].get("prefix_dir_res")
+        self.d_prefix_data = dp["data"].get("prefix_dir_res")
+        self.d_resultsallpmc = self.d_prefix_mc + dp["mc"]["results"][period] \
+            if period is not None \
+            else self.d_prefix_mc + dp["mc"]["resultsallp"]
+        self.d_resultsallpdata =  + dp["data"]["results"][period] \
+            if period is not None \
+            else self.d_prefix_data + dp["data"]["resultsallp"]
 
         n_filemass_name = datap["files_names"]["histofilename"]
         self.n_filemass = os.path.join(self.d_resultsallpdata, n_filemass_name)

diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml
@@ -257,11 +257,11 @@ LcpKpi:
 
   mlapplication:
     data:
-      prefix_dir_res: /data2/MLhep/
+      prefix_dir_app: /data2/MLhep/
       pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods
       pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods
     mc:
-      prefix_dir_res: /data2/MLhep/
+      prefix_dir_app: /data2/MLhep/
       pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmc] #list of periods
       pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmcmerged] #list of periods
     modelname: xgboost
@@ -272,9 +272,9 @@ LcpKpi:
                      xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav,
                      xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav]
     probcutpresel:
-      data: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] #list of nbins
-      mc: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1] #list of nbins
-    probcutoptimal: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
+      data: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
+      mc: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins
+    probcutoptimal: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] #list of nbins
 
   analysis:
     indexhptspectrum: -1 #kD0Kpi=0, kDplusKpipi=1, kDstarD0pi=2, kDsKKpi=3, kLctopKpi=4, kLcK0Sp=5
@@ -319,22 +319,24 @@ LcpKpi:
 
       data:
         runselection: [null, null] #FIXME
+        prefix_dir_res: /data2/MLhep/
         results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods
         resultsallp: LHC22pp/Results/resultsdatatot
       mc:
         runselection: [null] #FIXME
+        prefix_dir_res: /data2/MLhep/
         results: [LHC22pp_mc/Results/prod_LHC22b1b/resultsmc] #list of periods
         resultsallp: LHC22pp_mc/Results/prod_LHC22b1b/resultsmctot
 
       mass_fit_lim: [2.14, 2.436] # region for the fit of the invariant mass distribution [GeV/c^2]
       bin_width: 0.001 # bin width of the invariant mass histogram
       init_fits_from: [mc,mc,mc,mc,mc,mc] # data or mc
       sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus]
-      bkgfunc: [Pol2.Pol2,Pol2,Pol2,Pol2,Pol2]
+      bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2]
       masspeak: 2.286
       massmin: [2.14,2.14,2.14,2.14,2.14,2.14]
       massmax: [2.436,2.436,2.436,2.436,2.436,2.436]
-      rebin: [4,4,4,4,4,4]
+      rebin: [6,6,6,6,6,6]
       fix_mean: [false,false,false,false,false,false]
       fix_sigma: [false,false,false,false,false,false]
       masssecpeak: 0.
@@ -360,7 +362,7 @@ LcpKpi:
       latexbin2var: "n_{trkl}"
       nevents: null
       dodoublecross: false
-      dobkgfromsideband: true
+      dobkgfromsideband: false
 
   systematics:
     probvariation:

diff --git a/machine_learning_hep/fitting/fitters.py b/machine_learning_hep/fitting/fitters.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -24,7 +24,7 @@
 from math import sqrt
 from ctypes import c_double
 
-# pylint: disable=import-error, no-name-in-module, unused-import
+# pylint: disable=import-error, no-name-in-module, unused-import, f-string-without-interpolation
 from ROOT import AliHFInvMassFitter, AliVertexingHFUtils, AliHFInvMassMultiTrialFit
 from ROOT import TFile, TH1F, TH1D, TF1, TPaveText, TLine, TLegend, TLatex
 from ROOT import kBlue, kRed, kGreen, kMagenta, kOrange, kPink, kCyan, kYellow, kBlack
@@ -497,6 +497,10 @@ def draw_kernel(self, root_pad, root_objects=[], **draw_args): # pylint: disable
         signif = c_double()
         signif_err = c_double()
         self.kernel.Significance(n_sigma_signal, signif, signif_err)
+        bkg = bkg.value
+        bkg_err = bkg_err.value
+        signif = signif.value
+        signif_err = signif_err.value
         sig_o_bkg = sig / bkg if bkg > 0. else -1.
 
         root_objects.append(self.add_pave_helper_(0.15, 0.7, 0.48, 0.89, "NDC"))
@@ -1399,7 +1403,7 @@ def fill_pad(pad, ylims, histos, ref_line=None):
             pad.cd()
             pad.SetLeftMargin(0.13)
             pad.SetRightMargin(0.06)
-            lim_delta = (ylims[1] - ylims[0])
+            lim_delta = ylims[1] - ylims[0]
             lim_min = ylims[0] - 0.1 * lim_delta
             lim_max = ylims[1] + 0.1 * lim_delta
             for h in  histos:

diff --git a/machine_learning_hep/fitting/helpers.py b/machine_learning_hep/fitting/helpers.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -20,7 +20,7 @@
 from array import array
 from ctypes import c_double
 
-#pylint: disable=too-many-lines, too-few-public-methods
+#pylint: disable=too-many-lines, too-few-public-methods, consider-using-f-string, too-many-statements
 from ROOT import TFile, TH1F, TF1, TCanvas, gStyle #pylint: disable=import-error, no-name-in-module
 
 from machine_learning_hep.logger import get_logger
@@ -29,7 +29,7 @@
 from machine_learning_hep.fitting.utils import save_fit, load_fit
 from machine_learning_hep.fitting.fitters import FitAliHF, FitROOTGauss, FitSystAliHF
 
-class MLFitParsFactory: # pylint: disable=too-many-instance-attributes, too-many-statements
+class MLFitParsFactory: # pylint: disable=too-many-instance-attributes
     """
     Managing MLHEP specific fit parameters and is used to collect and retrieve all information
     required to initialise a (systematic) fit
@@ -670,17 +670,20 @@ def __call__(self, x_var, par):
                     return 0
 
             if fbkg[ibin1] == "kLin":
-                fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 2)
+                bkgFunc = FitBkg()
+                fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
                 hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
                 pars = fit_func.GetParameters()
                 bkg_func = TF1("fbkg", "pol1", fitlim[0], fitlim[1])
             elif fbkg[ibin1] == "Pol2":
-                fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 3)
-                hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
+                bkgFunc = FitBkg()
+                fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 3)
+                hmass.Fit("fit_func", '', '', fitlim[0], fitlim[1])
                 pars = fit_func.GetParameters()
                 bkg_func = TF1("fbkg", "pol2", fitlim[0], fitlim[1])
             elif fbkg[ibin1] == "kExpo":
-                fit_func = TF1("fit_func", FitBkg(), fitlim[0], fitlim[1], 2)
+                bkgFunc = FitBkg()
+                fit_func = TF1("fit_func", bkgFunc, fitlim[0], fitlim[1], 2)
                 hmass.Fit(fit_func, '', '', fitlim[0], fitlim[1])
                 pars = fit_func.GetParameters()
                 bkg_func = TF1("fbkg", "expo", fitlim[0], fitlim[1])

diff --git a/machine_learning_hep/multiprocesser.py b/machine_learning_hep/multiprocesser.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -51,20 +51,16 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
         self.dlper_pklsk = []
         self.dlper_pklml = []
         self.d_prefix = datap["multi"][self.mcordata].get("prefix_dir", "")
-        self.d_prefix_res = datap["mlapplication"][self.mcordata].get("prefix_dir_res", "")
-        for s in datap["multi"][self.mcordata]["unmerged_tree_dir"]:
-            self.dlper_root.append(self.d_prefix + s)
-        for s in datap["multi"][self.mcordata]["pkl"]:
-            self.dlper_pkl.append(self.d_prefix + s)
-        for s in datap["multi"][self.mcordata]["pkl_skimmed"]:
-            self.dlper_pklsk.append(self.d_prefix + s)
-        for s in datap["multi"][self.mcordata]["pkl_skimmed_merge_for_ml"]:
-            self.dlper_pklml.append(self.d_prefix + s)
-        self.d_pklml_mergedallp = self.d_prefix + \
-            datap["multi"][self.mcordata]["pkl_skimmed_merge_for_ml_all"]
-        self.d_pklevt_mergedallp = self.d_prefix + \
-            datap["multi"][self.mcordata]["pkl_evtcounter_all"]
-
+        self.d_prefix_app = datap["mlapplication"][self.mcordata].get("prefix_dir_app", "")
+        self.d_prefix_res = datap["analysis"][self.typean][self.mcordata].get("prefix_dir_res", "")
+
+        dp = datap["multi"][self.mcordata]
+        self.dlper_root = [self.d_prefix + p for p in dp["unmerged_tree_dir"]]
+        self.dlper_pkl = [self.d_prefix + p for p in dp["pkl"]]
+        self.dlper_pklsk = [self.d_prefix + p for p in dp["pkl_skimmed"]]
+        self.dlper_pklml = [self.d_prefix + p for p in dp["pkl_skimmed_merge_for_ml"]]
+        self.d_pklml_mergedallp = self.d_prefix + dp["pkl_skimmed_merge_for_ml_all"]
+        self.d_pklevt_mergedallp = self.d_prefix + dp["pkl_evtcounter_all"]
         self.dlper_mcreweights = datap["multi"][self.mcordata]["mcreweights"]
 
         #namefiles pkl
@@ -101,18 +97,14 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata):
         self.lper_evtorig = \
                 [os.path.join(direc, self.n_evtorig) for direc in self.dlper_pkl]
 
-        self.dlper_reco_modapp = []
-        self.dlper_reco_modappmerged = []
-        self.d_results = []
-
-        for s in datap["mlapplication"][self.mcordata]["pkl_skimmed_dec"]:
-            self.dlper_reco_modapp.append(self.d_prefix_res + s)
-        for s in datap["mlapplication"][self.mcordata]["pkl_skimmed_decmerged"]:
-            self.dlper_reco_modappmerged.append(self.d_prefix_res + s)
-        for s in datap["analysis"][self.typean][self.mcordata]["results"]:
-            self.d_results.append(self.d_prefix_res + s)
-        self.d_resultsallp = \
-                 self.d_prefix_res + datap["analysis"][self.typean][self.mcordata]["resultsallp"]
+        dp = datap["mlapplication"][self.mcordata]
+        self.dlper_reco_modapp = [self.d_prefix_app + p for p in dp["pkl_skimmed_dec"]]
+        self.dlper_reco_modappmerged = [self.d_prefix_app + p for p in dp["pkl_skimmed_decmerged"]]
+
+        dp = datap["analysis"][self.typean][self.mcordata]
+        self.d_results = [self.d_prefix_res + p for p in dp["results"]]
+        self.d_resultsallp = self.d_prefix_res + dp["resultsallp"]
+
         self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"]
         self.lpt_probcut = datap["mlapplication"]["probcutoptimal"]
         self.f_evt_mergedallp = os.path.join(self.d_pklevt_mergedallp, self.n_evt)

diff --git a/machine_learning_hep/processerdhadrons.py b/machine_learning_hep/processerdhadrons.py
@@ -1,5 +1,5 @@
 #############################################################################
-##  © Copyright CERN 2018. All rights not expressly granted are reserved.  ##
+##  © Copyright CERN 2023. All rights not expressly granted are reserved.  ##
 ##                 Author: [email protected]                  ##
 ## This program is free software: you can redistribute it and/or modify it ##
 ##  under the terms of the GNU General Public License as published by the  ##
@@ -133,7 +133,7 @@ def process_histomass_single(self, index):
             h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                              self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
 
-            fill_hist(h_invmass, df[self.v_invmass].to_numpy())
+            fill_hist(h_invmass, df[self.v_invmass])
             myfile.cd()
             h_invmass.Write()
 
@@ -149,9 +149,9 @@ def process_histomass_single(self, index):
                                      self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                 h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins,
                                       self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
-                fill_hist(h_invmass_sig, df_sig[self.v_invmass].to_numpy())
-                fill_hist(h_invmass_bkg, df_bkg[self.v_invmass].to_numpy())
-                fill_hist(h_invmass_refl, df_refl[self.v_invmass].to_numpy())
+                fill_hist(h_invmass_sig, df_sig[self.v_invmass])
+                fill_hist(h_invmass_bkg, df_bkg[self.v_invmass])
+                fill_hist(h_invmass_refl, df_refl[self.v_invmass])
                 myfile.cd()
                 h_invmass_sig.Write()
                 h_invmass_bkg.Write()

diff --git a/machine_learning_hep/steer_analysis.py b/machine_learning_hep/steer_analysis.py
@@ -140,20 +140,22 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite
     dirpklmltotdata = dirprefixdata + dp["pkl_skimmed_merge_for_ml_all"]
 
     dp = data_param[case]["mlapplication"]["mc"]
-    dirprefixmcres = dp.get("prefix_dir_res", "")
-    dirpklskdecmc = [dirprefixmcres + p for p in dp["pkl_skimmed_dec"]]
-    dirpklskdec_mergedmc = [dirprefixmcres + p for p in dp["pkl_skimmed_decmerged"]]
+    dirprefixmcapp = dp.get("prefix_dir_app", "")
+    dirpklskdecmc = [dirprefixmcapp + p for p in dp["pkl_skimmed_dec"]]
+    dirpklskdec_mergedmc = [dirprefixmcapp + p for p in dp["pkl_skimmed_decmerged"]]
 
     dp = data_param[case]["mlapplication"]["data"]
-    dirprefixdatares = dp.get("prefix_dir_res", "")
-    dirpklskdecdata = [dirprefixdatares + p for p in dp["pkl_skimmed_dec"]]
-    dirpklskdec_mergeddata = [dirprefixdatares + p for p in dp["pkl_skimmed_decmerged"]]
+    dirprefixdataapp = dp.get("prefix_dir_app", "")
+    dirpklskdecdata = [dirprefixdataapp + p for p in dp["pkl_skimmed_dec"]]
+    dirpklskdec_mergeddata = [dirprefixdataapp + p for p in dp["pkl_skimmed_decmerged"]]
 
     dp = data_param[case]["analysis"][typean]["data"]
+    dirprefixdatares = dp.get("prefix_dir_res", "")
     dirresultsdata = [dirprefixdatares + p for p in dp["results"]]
     dirresultsdatatot = dirprefixdatares + dp["resultsallp"]
 
     dp = data_param[case]["analysis"][typean]["mc"]
+    dirprefixmcres = dp.get("prefix_dir_res", "")
     dirresultsmc = [dirprefixmcres + p for p in dp["results"]]
     dirresultsmctot = dirprefixmcres + dp["resultsallp"]