Skip to content

Commit

Permalink
Vary BDT cuts
Browse files Browse the repository at this point in the history
  • Loading branch information
vkucera committed Sep 9, 2024
1 parent b21f1be commit cfd9dd1
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -529,24 +529,27 @@ categories:
rms_both_sides: true
variations:
ml:
activate: [no, no, no, yes, no, no, yes]
activate: [no, no, no, yes, yes, yes, yes, yes, yes, yes, yes, yes, yes]
label:
- "default" # working point (for tests, should be same as the default result)
- "null" # no cuts (for tests, whatever was applied on Hyperloop)
- "loosest" # same cuts as Hyperloop (for tests, should be same as null)
- "loose" # to increase efficiency by 20 %
- "tight 2"
- "tight 4"
- "tight" # to decrease efficiency by 20 %
# default: working point (for tests, should be same as the default result)
# null: no cuts (for tests, whatever was applied on Hyperloop)
# loosest: same cuts as Hyperloop (for tests, should be same as null)
["default", "null", "loosest", "loose 5", "loose 4", "loose 3", "loose 2", "loose 1", "tight 1", "tight 2", "tight 3", "tight 4", "tight 5"]
diffs:
analysis:
jet_obs:
use_cuts: [True, True, True, True, True, True, True]
use_cuts: [True, True, True, True, True, True, True, True, True, True, True, True, True]
cuts:
- ["mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6"] # default
- [null,null,null,null,null,null,null,null,null,null]
- ["mlPromptScore > 0.85", "mlPromptScore > 0.6", "mlPromptScore > 0.6", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.4", "mlPromptScore > 0.15", "mlPromptScore > 0.15"] # loosest
- ["mlPromptScore > 0.961", "mlPromptScore > 0.83", "mlPromptScore > 0.84", "mlPromptScore > 0.74", "mlPromptScore > 0.74", "mlPromptScore > 0.62", "mlPromptScore > 0.63", "mlPromptScore > 0.15", "mlPromptScore > 0.15"] # loose
- ["mlPromptScore > 0.98", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6"] # tight 2
- ["mlPromptScore > 0.97", "mlPromptScore > 0.9", "mlPromptScore > 0.9", "mlPromptScore > 0.85", "mlPromptScore > 0.85", "mlPromptScore > 0.8", "mlPromptScore > 0.8", "mlPromptScore > 0.6", "mlPromptScore > 0.6"] # tight 4
- ["mlPromptScore > 0.978", "mlPromptScore > 0.94", "mlPromptScore > 0.937", "mlPromptScore > 0.915", "mlPromptScore > 0.91", "mlPromptScore > 0.89", "mlPromptScore > 0.88", "mlPromptScore > 0.85", "mlPromptScore > 0.85"] # tight
- ["mlPromptScore > 0.961", "mlPromptScore > 0.83", "mlPromptScore > 0.84", "mlPromptScore > 0.74", "mlPromptScore > 0.74", "mlPromptScore > 0.62", "mlPromptScore > 0.63", "mlPromptScore > 0.15", "mlPromptScore > 0.15"] # loose 5
- ["mlPromptScore > 0.9628", "mlPromptScore > 0.844", "mlPromptScore > 0.852", "mlPromptScore > 0.762", "mlPromptScore > 0.762", "mlPromptScore > 0.656", "mlPromptScore > 0.664", "mlPromptScore > 0.24", "mlPromptScore > 0.24"] # loose 4
- ["mlPromptScore > 0.9646", "mlPromptScore > 0.858", "mlPromptScore > 0.864", "mlPromptScore > 0.784", "mlPromptScore > 0.784", "mlPromptScore > 0.692", "mlPromptScore > 0.698", "mlPromptScore > 0.33", "mlPromptScore > 0.33"] # loose 3
- ["mlPromptScore > 0.9664", "mlPromptScore > 0.872", "mlPromptScore > 0.876", "mlPromptScore > 0.806", "mlPromptScore > 0.806", "mlPromptScore > 0.728", "mlPromptScore > 0.732", "mlPromptScore > 0.42", "mlPromptScore > 0.42"] # loose 2
- ["mlPromptScore > 0.9682", "mlPromptScore > 0.886", "mlPromptScore > 0.888", "mlPromptScore > 0.828", "mlPromptScore > 0.828", "mlPromptScore > 0.764", "mlPromptScore > 0.766", "mlPromptScore > 0.51", "mlPromptScore > 0.51"] # loose 1
- ["mlPromptScore > 0.9716", "mlPromptScore > 0.908", "mlPromptScore > 0.9074", "mlPromptScore > 0.863", "mlPromptScore > 0.862", "mlPromptScore > 0.818", "mlPromptScore > 0.816", "mlPromptScore > 0.65", "mlPromptScore > 0.65"] # tight 1
- ["mlPromptScore > 0.9732", "mlPromptScore > 0.916", "mlPromptScore > 0.9148", "mlPromptScore > 0.876", "mlPromptScore > 0.874", "mlPromptScore > 0.836", "mlPromptScore > 0.832", "mlPromptScore > 0.7", "mlPromptScore > 0.7"] # tight 2
- ["mlPromptScore > 0.9748", "mlPromptScore > 0.924", "mlPromptScore > 0.9222", "mlPromptScore > 0.889", "mlPromptScore > 0.886", "mlPromptScore > 0.854", "mlPromptScore > 0.848", "mlPromptScore > 0.75", "mlPromptScore > 0.75"] # tight 3
- ["mlPromptScore > 0.9764", "mlPromptScore > 0.932", "mlPromptScore > 0.9296", "mlPromptScore > 0.902", "mlPromptScore > 0.898", "mlPromptScore > 0.872", "mlPromptScore > 0.864", "mlPromptScore > 0.8", "mlPromptScore > 0.8"] # tight 4
- ["mlPromptScore > 0.978", "mlPromptScore > 0.94", "mlPromptScore > 0.937", "mlPromptScore > 0.915", "mlPromptScore > 0.91", "mlPromptScore > 0.89", "mlPromptScore > 0.88", "mlPromptScore > 0.85", "mlPromptScore > 0.85"] # tight 5
84 changes: 84 additions & 0 deletions machine_learning_hep/vary_bdt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/env python3

"""
Generate BDT cut variations
"""

N_STEPS = 5
PRINT_DEFAULT = False

dic_cuts = {
"d0" : {
"string": "mlBkgScore < %g",
"cuts_default" : [0.02, 0.02, 0.02, 0.05, 0.06, 0.08, 0.08, 0.10, 0.10, 0.20, 0.25, 0.30], # default
"cuts_min" : [0.008, 0.008, 0.0087, 0.017, 0.024, 0.031, 0.028, 0.042, 0.038, 0.052, 0.067, 0.060], # tight
"cuts_max" : [0.045, 0.053, 0.054, 0.19, 0.22, 0.33, 0.46, 0.38, 0.50, 0.50, 0.50, 0.50] # loose
},
"lc": {
"string" : "mlPromptScore > %g",
"cuts_default" : [0.97, 0.9, 0.9, 0.85, 0.85, 0.8, 0.8, 0.6, 0.6], # default
"cuts_min" : [0.961, 0.83, 0.84, 0.74, 0.74, 0.62, 0.63, 0.15, 0.15], # loose
"cuts_max" : [0.978, 0.94, 0.937, 0.915, 0.91, 0.89, 0.88, 0.85, 0.85] # tight
}
}


def format_list(fmt_: str, k_: list):
return [fmt_ % val for val in k_]


def format_comment(comment: str):
return f" # {comment}"


for hf, cuts in dic_cuts.items():
cuts_default = cuts["cuts_default"]
fmt = cuts["string"]
greater_than = ">" in fmt

# Calculate steps
step_down = [(minimum - default) / N_STEPS for minimum, default in zip(cuts["cuts_min"], cuts_default)]
step_up = [(maximum - default) / N_STEPS for maximum, default in zip(cuts["cuts_max"], cuts_default)]
list_down = []
list_up = []

# Calculate variations
for i in range(N_STEPS):
list_down.append([round(default + (i + 1) * step, 6) for default, step in zip(cuts_default, step_down)])
list_up.append([round(default + (i + 1) * step, 6) for default, step in zip(cuts_default, step_up)])

labels_down = [("loose" if greater_than else "tight") + f" {i + 1}" for i in range(N_STEPS)]
labels_up = [("tight" if greater_than else "loose") + f" {i + 1}" for i in range(N_STEPS)]

labels = list(reversed(labels_down))
if PRINT_DEFAULT:
labels += ["default"]
labels += labels_up

# Print flags and labels
n_items = 2 * N_STEPS + int(PRINT_DEFAULT)
prefix_item = " - "

print(f"{hf}:")
act = f"{n_items * 'yes, '}"
print(f" activate: [{act[:-2]}]")
print(" label:", labels)
print(" use_cuts:", n_items * [True])

# Print numeric variations
print(" cuts_num:")
for var, label in zip(reversed(list_down), reversed(labels_down)):
print(prefix_item, var, format_comment(label))
if PRINT_DEFAULT:
print(prefix_item, cuts_default, format_comment("default"))
for var, label in zip(list_up, labels_up):
print(prefix_item, var, format_comment(label))

# Print formatted variations
print(" cuts:")
for var, label in zip(reversed(list_down), reversed(labels_down)):
print(prefix_item, format_list(fmt, var), format_comment(label))
if PRINT_DEFAULT:
print(prefix_item, format_list(fmt, cuts_default), format_comment("default"))
for var, label in zip(list_up, labels_up):
print(prefix_item, format_list(fmt, var), format_comment(label))

0 comments on commit cfd9dd1

Please sign in to comment.