Skip to content

Commit

Permalink
pnet mass test
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Apr 17, 2024
1 parent 0aaed7d commit c02728e
Show file tree
Hide file tree
Showing 23 changed files with 204 additions and 60 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ docker_stderror
/*.out
/*.log
/condor
src/HH4b/postprocessing/*.png
src/HH4b/postprocessing/*.pdf

running_jobs.txt

Expand Down
5 changes: 3 additions & 2 deletions src/HH4b/combine/run_blinded_hh4b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#
# Specify seed with --seed (default 42) and number of toys with --numtoys (default 100)
#
# Usage ./run_blinded_hh4b.sh [-wblsdgt] [--numtoys 100] [--seed 42]
# Usage ./run_blinded_hh4b.sh [-wblsdgt] [--numtoys 100] [--seed 42] [--passbin 1]
# --passbin X will do the fit only for bin X, or if X = 0 (default), will do for all
#
# Author: Raghav Kansal
####################################################################################################
Expand All @@ -40,7 +41,7 @@ impactsc=0
seed=42
numtoys=100
bias=-1
passbin=1
passbin=0

options=$(getopt -o "wblsdgti" --long "workspace,bfit,limits,significance,dfit,gofdata,goftoys,impactsi,impactsf:,impactsc:,bias:,seed:,numtoys:,passbin:" -- "$@")
eval set -- "$options"
Expand Down
6 changes: 3 additions & 3 deletions src/HH4b/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def ratioHistPlot(
linewidth=2,
label=bg_labels,
color=bg_colours,
flow="none",
# flow="none",
)

# signal samples
Expand All @@ -589,7 +589,7 @@ def ratioHistPlot(
linewidth=2,
label=list(sig_labels.values()),
color=sig_colours,
flow="none",
# flow="none",
)

# plot signal errors
Expand Down Expand Up @@ -619,7 +619,7 @@ def ratioHistPlot(
label=label_by_sample[data_key],
markersize=20,
color="black",
flow="none",
# flow="none",
)

if log:
Expand Down
122 changes: 82 additions & 40 deletions src/HH4b/postprocessing/PlotFits.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,18 @@
"metadata": {},
"outputs": [],
"source": [
"import argparse\n",
"import os\n",
"from pathlib import Path\n",
"from collections import OrderedDict\n",
"\n",
"import uproot\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import hist\n",
"from hist import Hist\n",
"\n",
"import plotting\n",
"from hh_vars import data_key\n",
"from postprocessing import fit_shape_var, selection_regions\n",
"import numpy as np\n",
"import uproot\n",
"\n",
"import os"
"from HH4b import plotting\n",
"from HH4b.utils import ShapeVar\n",
"from HH4b.hh_vars import data_key"
]
},
{
Expand All @@ -38,10 +36,18 @@
"metadata": {},
"outputs": [],
"source": [
"MAIN_DIR = \"../../../\"\n",
"MAIN_DIR = Path(\"../../../\")\n",
"nTF = 1\n",
"\n",
"vbf_only = False\n",
"ggf_only = False\n",
"k2v0sig = True\n",
"mreg = True\n",
"\n",
"plot_dir = f\"{MAIN_DIR}/plots/PostFit/23Nov6\"\n",
"_ = os.system(f\"mkdir -p {plot_dir}\")"
"plot_dir = MAIN_DIR / \"plots/PostFit/24Apr17pT300PNetMass\"\n",
"plot_dir.mkdir(exist_ok=True, parents=True)\n",
"\n",
"regions = \"all\""
]
},
{
Expand All @@ -50,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"cards_dir = \"23Nov6\"\n",
"cards_dir = \"24Apr17pT300CutPNetMass\"\n",
"file = uproot.open(\n",
" f\"/uscms/home/rkansal/hhcombine/hh4b/cards/{cards_dir}/FitShapes.root\"\n",
" # f\"/uscms/home/rkansal/eos/bbVV/cards/{cards_dir}/FitShapes.root\"\n",
Expand All @@ -67,20 +73,33 @@
"hist_label_map_inverse = OrderedDict(\n",
" [\n",
" (\"qcd\", \"CMS_bbbb_hadronic_qcd_datadriven\"),\n",
" (\"vjets\", \"vjets\"),\n",
" (\"diboson\", \"diboson\"),\n",
" (\"ttbar\", \"ttbar\"),\n",
" # (\"ST\", \"singletop\"),\n",
" (\"vjets\", \"vjets\"),\n",
" (\"gghtobb\", \"gghtobb\"),\n",
" (\"vhtobb\", \"vhtobb\"),\n",
" # (\"X[3000]->H(bb)Y[190](VV)\", \"xhy_mx3000_my190\"),\n",
" (\"hh4b\", \"hh4b\"),\n",
" (data_key, \"data_obs\"),\n",
" (\"data\", \"data_obs\"),\n",
" ]\n",
")\n",
"\n",
"hist_label_map = {val: key for key, val in hist_label_map_inverse.items()}\n",
"samples = list(hist_label_map.values())"
"samples = list(hist_label_map.values())\n",
"\n",
"fit_shape_var_msd = ShapeVar(\n",
" \"H2Msd\",\n",
" r\"$m^{j2}_\\mathrm{SD}$ (GeV)\",\n",
" [16, 60, 220],\n",
" reg=True,\n",
" blind_window=[110, 140],\n",
")\n",
"\n",
"fit_shape_var_mreg = ShapeVar(\n",
" \"H2PNetMass\",\n",
" r\"$m^{j2}_\\mathrm{reg}$ (GeV)\",\n",
" [16, 60, 220],\n",
" reg=True,\n",
" blind_window=[110, 140],\n",
")\n",
"shape_vars = [fit_shape_var_msd] if not mreg else [fit_shape_var_mreg]"
]
},
{
Expand All @@ -91,15 +110,21 @@
"source": [
"shapes = {\n",
" \"prefit\": \"Pre-Fit\",\n",
" # \"shapes_fit_s\": \"S+B Post-Fit\",\n",
" # \"postfit\": \"S+B Post-Fit\",\n",
" \"postfit\": \"B-only Post-Fit\",\n",
"}\n",
"\n",
"shape_vars = [fit_shape_var]\n",
"\n",
"selection_regions = {\n",
" \"pass\": \"Pass\",\n",
"selection_regions_labels = {\n",
" \"passbin1\": \"Pass Bin1\",\n",
" \"passbin2\": \"Pass Bin2\",\n",
" \"passbin3\": \"Pass Bin3\",\n",
" \"fail\": \"Fail\",\n",
"}\n",
"ylims = {\n",
" \"passbin1\": 20,\n",
" \"passbin2\": 50,\n",
" \"passbin3\": 400,\n",
" \"fail\": 45000,\n",
"}"
]
},
Expand All @@ -109,12 +134,25 @@
"metadata": {},
"outputs": [],
"source": [
"hists = {}\n",
"if regions == \"all\":\n",
" signal_regions = [\"passbin1\", \"passbin2\", \"passbin3\"]\n",
"else:\n",
" signal_regions = [regions]\n",
"\n",
"bins = [*signal_regions, \"fail\"]\n",
"selection_regions = {key: selection_regions_labels[key] for key in bins}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hists = {}\n",
"for shape in shapes:\n",
" print(shape)\n",
" hists[shape] = {\n",
" region: Hist(\n",
" region: hist.Hist(\n",
" hist.axis.StrCategory(samples, name=\"Sample\"),\n",
" *[shape_var.axis for shape_var in shape_vars],\n",
" storage=\"double\",\n",
Expand All @@ -125,6 +163,7 @@
" for region in selection_regions:\n",
" h = hists[shape][region]\n",
" templates = file[f\"{region}_{shape}\"]\n",
" # print(templates)\n",
" for key, file_key in hist_label_map_inverse.items():\n",
" if key != data_key:\n",
" if file_key not in templates:\n",
Expand All @@ -146,29 +185,32 @@
"metadata": {},
"outputs": [],
"source": [
"# pass_ylim = 5\n",
"# fail_ylim = 4000\n",
"year = \"2022EE\"\n",
"pass_ylim = 12\n",
"fail_ylim = 35000\n",
"pass_ratio_ylims = [0, 4]\n",
"year = \"2022-2023\"\n",
"pass_ratio_ylims = [0, 2]\n",
"fail_ratio_ylims = [0, 2]\n",
"signal_scale = 5.0\n",
"\n",
"for shape, shape_label in shapes.items():\n",
" for region, region_label in selection_regions.items():\n",
" pass_region = region.startswith(\"pass\")\n",
" for i, shape_var in enumerate(shape_vars):\n",
" for shape_var in shape_vars:\n",
" # print(hists[shape][region])\n",
" plot_params = {\n",
" \"hists\": hists[shape][region],\n",
" \"sig_keys\": [\"hh4b\"],\n",
" \"sig_scale_dict\": {\"hh4b\": 10},\n",
" \"bg_keys\": [\"qcd\", \"vjets\", \"diboson\", \"ttbar\", \"gghtobb\", \"vhtobb\"],\n",
" \"sig_scale_dict\": {\"hh4b\": signal_scale if pass_region else 1.0},\n",
" \"bg_keys\": [\"qcd\", \"ttbar\", \"vhtobb\", \"vjets\", \"diboson\"],\n",
" \"show\": True,\n",
" \"year\": year,\n",
" \"ylim\": pass_ylim if pass_region else fail_ylim,\n",
" \"ylim\": ylims[region],\n",
" \"xlim\": 220,\n",
" # \"xlim_low\": 50,\n",
" \"xlim_low\": 60,\n",
" \"ratio_ylims\": pass_ratio_ylims if pass_region else fail_ratio_ylims,\n",
" \"title\": f\"{shape_label} {region_label} Region\",\n",
" \"name\": f\"{plot_dir}/{shape}_{region}_{shape_var.var}.pdf\",\n",
" \"bg_order\": [\"diboson\", \"vjets\", \"vhtobb\", \"ttbar\", \"qcd\"],\n",
" \"energy\": 13.6,\n",
" }\n",
"\n",
" plotting.ratioHistPlot(**plot_params)"
Expand Down Expand Up @@ -198,7 +240,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
49 changes: 34 additions & 15 deletions src/HH4b/postprocessing/PostProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@
sys.path.append("../boosted/bdt_trainings_run3/")


XBB_CUT_BIN1 = 0.92 # 0.9
BDT_CUT_BIN1 = 0.94 # 0.97
XBB_CUT_BIN2 = 0.8
BDT_CUT_BIN2 = 0.68 # 0.7
BDT_CUT_FAIL = 0.03


def load_run3_samples(args, year):
# modify as needed
input_dir = f"/eos/uscms/store/user/cmantill/bbbb/skimmer/{args.tag}"
Expand Down Expand Up @@ -238,22 +231,24 @@ def load_run3_samples(args, year):

# define category
bdt_events["Category"] = 5 # all events
mask_bin1 = (bdt_events["H2Xbb"] > XBB_CUT_BIN1) & (bdt_events["bdt_score"] > BDT_CUT_BIN1)
mask_bin1 = (bdt_events["H2Xbb"] > args.txbb_wps[0]) & (
bdt_events["bdt_score"] > args.bdt_wps[0]
)
bdt_events.loc[mask_bin1, "Category"] = 1
mask_corner = (bdt_events["H2Xbb"] < XBB_CUT_BIN1) & (
bdt_events["bdt_score"] < BDT_CUT_BIN1
mask_corner = (bdt_events["H2Xbb"] < args.txbb_wps[0]) & (
bdt_events["bdt_score"] < args.bdt_wps[0]
)
mask_bin2 = (
(bdt_events["H2Xbb"] > XBB_CUT_BIN2)
& (bdt_events["bdt_score"] > BDT_CUT_BIN2)
(bdt_events["H2Xbb"] > args.txbb_wps[1])
& (bdt_events["bdt_score"] > args.bdt_wps[1])
& ~(mask_bin1)
& ~(mask_corner)
)
bdt_events.loc[mask_bin2, "Category"] = 2
mask_bin3 = ~(mask_bin1) & ~(mask_bin2) & (bdt_events["bdt_score"] > BDT_CUT_FAIL)
mask_bin3 = ~(mask_bin1) & ~(mask_bin2) & (bdt_events["bdt_score"] > args.bdt_wps[2])
bdt_events.loc[mask_bin3, "Category"] = 3
bdt_events.loc[
(bdt_events["H2Xbb"] < XBB_CUT_BIN2) & (bdt_events["bdt_score"] > BDT_CUT_FAIL),
(bdt_events["H2Xbb"] < args.txbb_wps[1]) & (bdt_events["bdt_score"] > args.bdt_wps[2]),
"Category",
] = 4

Expand Down Expand Up @@ -514,9 +509,15 @@ def postprocess_run3(args):
if args.fom_scan:
scan_fom(events_combined, mass=args.mass)
scan_fom_bin2(
events_combined, xbb_cut_bin1=XBB_CUT_BIN1, bdt_cut_bin1=BDT_CUT_BIN1, mass=args.mass
events_combined,
xbb_cut_bin1=args.txbb_wps[0],
bdt_cut_bin1=args.bdt_wps[0],
mass=args.mass,
)

if not args.templates:
return

templ_dir = Path("templates") / args.templates_tag
year = "2022-2023"
(templ_dir / "cutflows" / year).mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -576,7 +577,25 @@ def postprocess_run3(args):
choices=["H2Msd", "H2PNetMass"],
help="mass variable to make template",
)

parser.add_argument(
"--txbb-wps",
type=float,
nargs=2,
default=[0.92, 0.8],
help="TXbb Bin 1, Bin 2 WPs",
)

parser.add_argument(
"--bdt-wps",
type=float,
nargs=3,
default=[0.94, 0.68, 0.03],
help="BDT Bin 1, Bin 2, Fail WPs",
)

run_utils.add_bool_arg(parser, "fom-scan", default=True, help="run figure of merit scan")
run_utils.add_bool_arg(parser, "templates", default=True, help="make templates")
args = parser.parse_args()

postprocess_run3(args)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
,4 ≤ Category < 5
data,169167.0
qcd,157890.57
hh4b,3.06
ttbar,14378.5
ttlep,4064.15
vhtobb,203.45
vjets,4824.44
diboson,73.21
novhhtobb,436.22
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
,1 ≤ Category < 2
data,42.0
qcd,33.82
hh4b,1.26
ttbar,14.44
ttlep,5.2
vhtobb,2.96
vjets,1.69
diboson,0.0
novhhtobb,0.28
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
,2 ≤ Category < 3
data,159.0
qcd,97.13
hh4b,1.08
ttbar,59.49
ttlep,21.27
vhtobb,3.93
vjets,7.59
diboson,0.81
novhhtobb,2.58
Loading

0 comments on commit c02728e

Please sign in to comment.