Skip to content

Commit

Permalink
Change BDT cuts from skimming pT binning to analysis pT binning
Browse files Browse the repository at this point in the history
  • Loading branch information
vkucera committed Aug 26, 2024
1 parent 5d01ff2 commit 090a965
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ D0jet_pp:
# obsolete, to be removed
# var_cand: fCandidateSelFlag
# # var_swap: fIsCandidateSwapped
# bitmap_sel:
# var_name: fFlagMcMatchRec
bitmap_sel:
var_name: fFlagMcMatchRec
# var_name_gen: fFlagMcMatchGen
# var_name_origgen: fOriginMcGen
# var_name_origrec: fOriginMcRec
Expand Down Expand Up @@ -785,7 +785,7 @@ D0jet_pp:

# Additional cuts applied before mass histogram is filled
use_cuts: True # systematics
cuts: ["mlBkgScore < 0.02", "mlBkgScore < 0.04", "mlBkgScore < 0.06", "mlBkgScore < .08", "mlBkgScore < 0.14", "mlBkgScore < 0.26", "mlBkgScore < 0.46", "mlBkgScore < 0.52"] # (sel_skim_binmin bins) systematics FIXME: Update for new model.
cuts: ["mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.04", "mlBkgScore < 0.06", "mlBkgScore < .08", "mlBkgScore < 0.14", "mlBkgScore < 0.14", "mlBkgScore < 0.26", "mlBkgScore < 0.26", "mlBkgScore < 0.46", "mlBkgScore < 0.52", "mlBkgScore < 0.52"] # (sel_an_binmin bins) systematics FIXME: Update for new model.

systematics: # used in machine_learning_hep/analysis/systematics.py
probvariation:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ LcJet_pp:

# Additional cuts applied before mass histogram is filled
use_cuts: True
cuts: ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null]
cuts: ["mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6", null] # (sel_an_binmin bins) systematics FIXME: Update for new model.

systematics: # used in machine_learning_hep/analysis/systematics.py
probvariation:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -919,10 +919,10 @@ categories:
jet_obs:
use_cuts: [True, True, True, True, True, True, True]
cuts:
- ["mlBkgScore < 0.02", "mlBkgScore < 0.04", "mlBkgScore < 0.06", "mlBkgScore < .08", "mlBkgScore < 0.14", "mlBkgScore < 0.26", "mlBkgScore < 0.46", "mlBkgScore < 0.52"] # default
- [null,null,null,null,null,null,null,null]
- ["mlBkgScore < 0.12", "mlBkgScore < 0.16", "mlBkgScore < 0.2", "mlBkgScore < .25", "mlBkgScore < 0.4", "mlBkgScore < 0.6", "mlBkgScore < 0.8", "mlBkgScore < 0.8"] # loosest
- ["mlBkgScore < 0.06", "mlBkgScore < 0.08", "mlBkgScore < 0.1", "mlBkgScore < .20", "mlBkgScore < 0.3", "mlBkgScore < 0.6", "mlBkgScore < 0.8", "mlBkgScore < 0.8"] # loose
- ["mlBkgScore < 0.01", "mlBkgScore < 0.02", "mlBkgScore < 0.03", "mlBkgScore < .04", "mlBkgScore < 0.07", "mlBkgScore < 0.13", "mlBkgScore < 0.23", "mlBkgScore < 0.26"] # tighter 2
- ["mlBkgScore < 0.005", "mlBkgScore < 0.01", "mlBkgScore < 0.015", "mlBkgScore < .02", "mlBkgScore < 0.035", "mlBkgScore < 0.065", "mlBkgScore < 0.125", "mlBkgScore < 0.13"] # tighter 4
- ["mlBkgScore < 0.015", "mlBkgScore < 0.03", "mlBkgScore < 0.045", "mlBkgScore < .06", "mlBkgScore < 0.105", "mlBkgScore < 0.13", "mlBkgScore < 0.23", "mlBkgScore < 0.26"] # tight
- ["mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.04", "mlBkgScore < 0.06", "mlBkgScore < .08", "mlBkgScore < 0.14", "mlBkgScore < 0.14", "mlBkgScore < 0.26", "mlBkgScore < 0.26", "mlBkgScore < 0.46", "mlBkgScore < 0.52", "mlBkgScore < 0.52"] # default
- [null,null,null,null,null,null,null,null,null,null,null,null]
- ["mlBkgScore < 0.12", "mlBkgScore < 0.12", "mlBkgScore < 0.16", "mlBkgScore < 0.2", "mlBkgScore < .25", "mlBkgScore < 0.4", "mlBkgScore < 0.4", "mlBkgScore < 0.6", "mlBkgScore < 0.6", "mlBkgScore < 0.8", "mlBkgScore < 0.8", "mlBkgScore < 0.8"] # loosest
- ["mlBkgScore < 0.06", "mlBkgScore < 0.06", "mlBkgScore < 0.08", "mlBkgScore < 0.1", "mlBkgScore < .20", "mlBkgScore < 0.3", "mlBkgScore < 0.3", "mlBkgScore < 0.6", "mlBkgScore < 0.6", "mlBkgScore < 0.8", "mlBkgScore < 0.8", "mlBkgScore < 0.8"] # loose
- ["mlBkgScore < 0.01", "mlBkgScore < 0.01", "mlBkgScore < 0.02", "mlBkgScore < 0.03", "mlBkgScore < .04", "mlBkgScore < 0.07", "mlBkgScore < 0.07", "mlBkgScore < 0.13", "mlBkgScore < 0.13", "mlBkgScore < 0.23", "mlBkgScore < 0.26", "mlBkgScore < 0.26"] # tighter 2
- ["mlBkgScore < 0.005", "mlBkgScore < 0.005", "mlBkgScore < 0.01", "mlBkgScore < 0.015", "mlBkgScore < .02", "mlBkgScore < 0.035", "mlBkgScore < 0.035", "mlBkgScore < 0.065", "mlBkgScore < 0.065", "mlBkgScore < 0.125", "mlBkgScore < 0.13", "mlBkgScore < 0.13"] # tighter 4
- ["mlBkgScore < 0.015", "mlBkgScore < 0.015", "mlBkgScore < 0.03", "mlBkgScore < 0.045", "mlBkgScore < .06", "mlBkgScore < 0.105", "mlBkgScore < 0.105", "mlBkgScore < 0.13", "mlBkgScore < 0.13", "mlBkgScore < 0.23", "mlBkgScore < 0.26", "mlBkgScore < 0.26"] # tight
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,10 @@ categories:
jet_obs:
use_cuts: [True, True, True, True, True, True, True]
cuts:
- ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # default
- [null,null,null,null,null,null,null]
- ["mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.6", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.15", null] # loosest
- ["mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.7", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.3", null] # loose
- ["mlPromptScore > 0.98", "mlPromptScore > 0.98", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # tight 2
- ["mlPromptScore > 0.96", "mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.6", null] # tight 4
- ["mlPromptScore > 0.98", "mlPromptScore > 0.98", "mlPromptScore > 0.95", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.7", null] # tight
- ["mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6", null] # default
- [null,null,null,null,null,null,null,null,null,null]
- ["mlPromptScore > 0.85", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.15", "mlPromptScore > 0.15", null] # loosest
- ["mlPromptScore > 0.9", "mlPromptScore > 0.7", "mlPromptScore > 0.7", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.3", "mlPromptScore > 0.3", null] # loose
- ["mlPromptScore > 0.98", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6", null] # tight 2
- ["mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6", null] # tight 4
- ["mlPromptScore > 0.98", "mlPromptScore > 0.95", "mlPromptScore > 0.95", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.7", "mlPromptScore > 0.7", null] # tight
24 changes: 12 additions & 12 deletions machine_learning_hep/processer.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab

#variables name
self.v_train = datap["variables"]["var_training"]
self.v_bitvar = datap["bitmap_sel"]["var_name"]
self.v_bitvar = datap["bitmap_sel"]["var_name"] # used in hadrons
# self.v_bitvar_gen = datap["bitmap_sel"]["var_name_gen"]
# self.v_bitvar_origgen = datap["bitmap_sel"]["var_name_origgen"]
# self.v_bitvar_origrec = datap["bitmap_sel"]["var_name_origrec"]
Expand Down Expand Up @@ -564,36 +564,36 @@ def process_mergedec(self):


def load_cuts(self):
"""Load cuts from database
"""Load custom analysis cuts from the database.
"""
raw_cuts = self.datap["analysis"][self.typean].get("cuts", None)
if not raw_cuts:
print("No custom cuts given, hence not cutting...")
self.analysis_cuts = [None] * self.p_nptbins
self.analysis_cuts = [None] * self.p_nptfinbins
return
if len(raw_cuts) != self.p_nptbins:
print(f"You have {self.p_nptbins} but you passed {len(raw_cuts)} cuts. Exit...")
if len(raw_cuts) != self.p_nptfinbins:
print(f"You have {self.p_nptfinbins} but you passed {len(raw_cuts)} cuts. Exit...")
sys.exit(1)
self.analysis_cuts = deepcopy(raw_cuts)


def apply_cuts_ptbin(self, df_, ipt):
"""Cut dataframe with cuts for a given skimming pT bin"""
def apply_cuts_ptbin(self, df_ipt, ipt):
"""Cut dataframe with cuts for a given analysis pT bin"""
if not self.analysis_cuts[ipt]:
return df_
return df_.query(self.analysis_cuts[ipt])
return df_ipt
return df_ipt.query(self.analysis_cuts[ipt])


def apply_cuts_all_ptbins(self, df_):
"""Apply cuts for all skimming pT bins."""
if not self.do_custom_analysis_cuts or not any(self.analysis_cuts):
return df_

def apply_cut_for_ipt(dff_, ipt: int):
df_ipt = seldf_singlevar(dff_, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt])
def apply_cut_for_ipt(df_full, ipt: int):
df_ipt = seldf_singlevar(df_full, self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
return df_ipt.query(self.analysis_cuts[ipt]) if self.analysis_cuts[ipt] else df_ipt

return pd.concat(apply_cut_for_ipt(df_, ipt) for ipt in range(self.p_nptbins))
return pd.concat(apply_cut_for_ipt(df_, ipt) for ipt in range(self.p_nptfinbins))


def process_histomass(self):
Expand Down

0 comments on commit 090a965

Please sign in to comment.