From 4c4773fea4fdd5cf3c70ba221f4668bb70b8a0bf Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Thu, 23 May 2024 06:55:32 -0700 Subject: [PATCH] subtracted plots and mc closure --- src/HH4b/plotting.py | 241 +++++++++++++++++ src/HH4b/postprocessing/CreateDatacard.py | 7 +- src/HH4b/postprocessing/PlotFits.ipynb | 299 ---------------------- 3 files changed, 247 insertions(+), 300 deletions(-) delete mode 100644 src/HH4b/postprocessing/PlotFits.ipynb diff --git a/src/HH4b/plotting.py b/src/HH4b/plotting.py index 9d1059c8..958a673c 100644 --- a/src/HH4b/plotting.py +++ b/src/HH4b/plotting.py @@ -403,8 +403,10 @@ def ratioHistPlot( name (str): name of file to save plot sig_scale_dict (Dict[str, float]): if scaling signals in the plot, dictionary of factors by which to scale each signal + xlim (optional): x-limit on plot xlim_low (optional): x-limit low on plot ylim (optional): y-limit on plot + ylim_low (optional): y-limit low on plot show (bool): show plots or not syst (Tuple): Tuple of (wshift: name of systematic e.g. pileup, wsamples: list of samples which are affected by this), to plot variations of this systematic. @@ -871,6 +873,245 @@ def get_variances(bg_hist): plt.close() +def subtractedHistPlot( + hists: Hist, + hists_fail: Hist, + year: str, + bg_keys: list[str], + bg_err: ArrayLike = None, + sortyield: bool = False, + title: str | None = None, + name: str = "", + xlim: int | None = None, + xlim_low: int | None = None, + ylim: int | None = None, + ylim_low: int | None = None, + show: bool = True, + bg_err_type: str = "shaded", + plot_data: bool = True, + bg_order=None, + log: bool = False, + logx: bool = False, + ratio_ylims: list[float] | None = None, + energy: str = "13.6", +): + """ + Makes and saves subtracted histogram plot, to show QCD transfer factor + with a data/mc ratio plot below + + Args: + hists (Hist): input histograms per sample in pass region to plot + hists_fail (Hist): input histograms per sample in fail region to plot + year (str): datataking year + bg_keys (List[str]): background keys + title (str, optional): plot title. Defaults to None. + name (str): name of file to save plot + sig_scale_dict (Dict[str, float]): if scaling signals in the plot, dictionary of factors + by which to scale each signal + xlim_low (optional): x-limit low on plot + ylim (optional): y-limit on plot + show (bool): show plots or not + bg_order (List[str]): order in which to plot backgrounds + ratio_ylims (List[float]): y limits on the ratio plots + plot_significance (bool): plot Asimov significance below ratio plot + """ + + + # copy hists and bg_keys so input objects are not changed + hists, bg_keys = deepcopy(hists), deepcopy(bg_keys) + + if bg_order is None: + bg_order = bg_order_default + + bg_keys, bg_colours, bg_labels, _, _, _ = _process_samples( + [], bg_keys, {}, None, None, bg_order + ) + + fig, (ax, rax) = plt.subplots( + 2, + 1, + figsize=(12, 12), + gridspec_kw={"height_ratios": [3.5, 1], "hspace": 0.18}, + sharex=True, + ) + + # only use integers + ax.yaxis.set_major_locator(MaxNLocator(integer=True)) + + plt.rcParams.update({"font.size": 30}) + + # plot histograms + ax.set_ylabel("Pass / Multijet in Fail") + + # background samples + hep.histplot( + hists["qcd", :] / hists_fail["qcd", :], + ax=ax, + histtype="fill", + sort="yield" if sortyield else None, + stack=True, + edgecolor="black", + linewidth=2, + label="Multijet", + color=bg_colours[-1], + ) + + if bg_err is not None: + bg_tot = hists["qcd", :] / hists_fail["qcd", :] + if len(np.array(bg_err).shape) == 1: + bg_errs = [bg_tot - bg_err / hists_fail["qcd", :], + bg_tot + bg_err / hists_fail["qcd", :]] + + if bg_err_type == "shaded": + ax.fill_between( + np.repeat(hists.axes[1].edges, 2)[1:-1], + np.repeat(bg_errs[0].values(), 2), + np.repeat(bg_errs[1].values(), 2), + color="black", + alpha=0.2, + hatch="//", + linewidth=0, + label="Multijet Unc.", + ) + else: + ax.stairs( + bg_tot.values(), + hists.axes[1].edges, + color="black", + linewidth=3, + label="Bkg. Total", + baseline=bg_tot.values(), + ) + + ax.stairs( + bg_errs[0], + hists.axes[1].edges, + color="red", + linewidth=3, + label="Bkg. Down", + baseline=bg_errs[0], + ) + + ax.stairs( + bg_err[1], + hists.axes[1].edges, + color="#7F2CCB", + linewidth=3, + label="Bkg. Up", + baseline=bg_err[1], + ) + + # plot data + if plot_data: + data_val = hists[data_key, :].values() + qcd_fail_val = hists_fail["qcd", :].values() + yerr = ratio_uncertainty(data_val, qcd_fail_val, "poisson") + all_mc = sum(hists[bg_key, :] for bg_key in bg_keys if bg_key != "qcd") + yvalue = (hists[data_key, :] - all_mc) / hists_fail["qcd", :] + hep.histplot( + yvalue, + ax=ax, + yerr=yerr, + histtype="errorbar", + label="Data - Other Bkg.", + markersize=20, + color="black", + ) + + if log: + ax.set_yscale("log") + if logx: + ax.set_xscale("log") + + handles, labels = ax.get_legend_handles_labels() + ax.legend(handles, labels, bbox_to_anchor=(1.03, 1), loc="upper left") + + if xlim_low is not None: + if xlim is not None: + ax.set_xlim(xlim_low, xlim) + else: + ax.set_xlim(xlim_low, None) + + y_lowlim = ylim_low if ylim_low is not None else 0 if not log else 0.001 + + if ylim is not None: + ax.set_ylim([y_lowlim, ylim]) + else: + ax.set_ylim(y_lowlim) + + ax.set_xlabel("") + + # plot ratio below + if plot_data and len(bg_keys) > 0: + qcd_val = hists["qcd", :].values() + hep.histplot( + yvalue / (qcd_val / qcd_fail_val), + yerr=ratio_uncertainty(data_val, qcd_val, "poisson"), + ax=rax, + histtype="errorbar", + markersize=20, + color="black", + capsize=0, + ) + rax.set_xlabel(hists.axes[1].label) + + # fill error band of background + if bg_err is not None: + # (bkg + err) / bkg + rax.fill_between( + np.repeat(hists.axes[1].edges, 2)[1:-1], + np.repeat((bg_errs[0].values()) / (qcd_val / qcd_fail_val), 2), + np.repeat((bg_errs[1].values()) / (qcd_val / qcd_fail_val), 2), + color="black", + alpha=0.1, + hatch="//", + linewidth=0, + ) + else: + rax.set_xlabel(hists.axes[1].label) + + rax.set_ylabel("Ratio") + rax.set_ylim(ratio_ylims) + minor_locator = mticker.AutoMinorLocator(2) + rax.yaxis.set_minor_locator(minor_locator) + rax.grid(axis="y", linestyle="-", linewidth=2, which="both") + + if title is not None: + ax.set_title(title, y=1.08) + + if year == "all": + hep.cms.label( + "Work in Progress", + data=True, + lumi=f"{np.sum(list(LUMI.values())) / 1e3:.0f}", + year=None, + ax=ax, + com=energy, + ) + else: + hep.cms.label( + "Work in Progress", + fontsize=24, + data=True, + lumi=f"{LUMI[year] / 1e3:.0f}", + year=year, + ax=ax, + com=energy, + ) + + if len(name): + if not name.endswith((".pdf", ".png")): + plt.savefig(f"{name}.pdf", bbox_inches="tight") + plt.savefig(f"{name}.png") + else: + plt.savefig(name, bbox_inches="tight") + + if show: + plt.show() + else: + plt.close() + + def mesh2d( xbins: ArrayLike, ybins: ArrayLike, diff --git a/src/HH4b/postprocessing/CreateDatacard.py b/src/HH4b/postprocessing/CreateDatacard.py index 27e30118..a5b44a05 100644 --- a/src/HH4b/postprocessing/CreateDatacard.py +++ b/src/HH4b/postprocessing/CreateDatacard.py @@ -101,6 +101,7 @@ add_bool_arg(parser, "vbf-region", "Add VBF region", default=False) add_bool_arg(parser, "unblinded", "unblinded so skip blinded parts", default=False) add_bool_arg(parser, "ttbar-rate-param", "Add freely floating ttbar rate param", default=False) +add_bool_arg(parser, "mc-closure", "Perform MC closure test (fill data_obs with sum of MC bkg.", default=False) args = parser.parse_args() @@ -540,7 +541,11 @@ def fill_regions( ) # data observed - ch.setObservation(region_templates[data_key, :]) + if args.mc_closure: + all_bg = sum([region_templates[bg_key, :] for bg_key in bg_keys + ["qcd"]) + ch.setObservation(all_bg) + else: + ch.setObservation(region_templates[data_key, :]) def alphabet_fit( diff --git a/src/HH4b/postprocessing/PlotFits.ipynb b/src/HH4b/postprocessing/PlotFits.ipynb deleted file mode 100644 index 81b5d39d..00000000 --- a/src/HH4b/postprocessing/PlotFits.ipynb +++ /dev/null @@ -1,299 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import argparse\n", - "import os\n", - "from pathlib import Path\n", - "from collections import OrderedDict\n", - "\n", - "import hist\n", - "import numpy as np\n", - "import uproot\n", - "\n", - "from HH4b import plotting\n", - "from HH4b.utils import ShapeVar\n", - "from HH4b.hh_vars import data_key" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "MAIN_DIR = Path(\"../../../\")\n", - "nTF = 1\n", - "\n", - "vbf = False\n", - "# k2v0sig = True\n", - "mreg = True\n", - "\n", - "plot_dir = MAIN_DIR / \"plots/PostFit/24Apr21_legacy_bdt_ggf_tighter\"\n", - "plot_dir.mkdir(exist_ok=True, parents=True)\n", - "\n", - "regions = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cards_dir = \"24Apr21_legacy_bdt_ggf_tighter\"\n", - "file = uproot.open(\n", - " f\"/uscms/home/rkansal/hhcombine/hh4b/cards/{cards_dir}/FitShapes.root\"\n", - " # f\"/uscms/home/rkansal/eos/bbVV/cards/{cards_dir}/FitShapes.root\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# (name in templates -> name in cards)\n", - "hist_label_map_inverse = OrderedDict(\n", - " [\n", - " (\"qcd\", \"CMS_bbbb_hadronic_qcd_datadriven\"),\n", - " (\"others\", \"others\"),\n", - " (\"ttbar\", \"ttbar\"),\n", - " (\"vhtobb\", \"VH_hbb\"),\n", - " (\"tthtobb\", \"ttH_hbb\"),\n", - " (\"data\", \"data_obs\"),\n", - " ]\n", - ")\n", - "\n", - "if vbf:\n", - " hist_label_map_inverse[\"vbfhh4b-k2v0\"] = \"vbfhh4b-k2v0\"\n", - "else:\n", - " hist_label_map_inverse[\"hh4b\"] = \"hh4b\"\n", - "\n", - "hist_label_map = {val: key for key, val in hist_label_map_inverse.items()}\n", - "samples = list(hist_label_map.values())\n", - "\n", - "fit_shape_var_msd = ShapeVar(\n", - " \"H2Msd\",\n", - " r\"$m^{j2}_\\mathrm{SD}$ (GeV)\",\n", - " [16, 60, 220],\n", - " reg=True,\n", - " blind_window=[110, 140],\n", - ")\n", - "\n", - "fit_shape_var_mreg = ShapeVar(\n", - " \"H2PNetMass\",\n", - " r\"$m^{j2}_\\mathrm{reg}$ (GeV)\",\n", - " [16, 60, 220],\n", - " reg=True,\n", - " blind_window=[110, 140],\n", - ")\n", - "shape_vars = [fit_shape_var_msd] if not mreg else [fit_shape_var_mreg]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "shapes = {\n", - " \"prefit\": \"Pre-Fit\",\n", - " # \"postfit\": \"S+B Post-Fit\",\n", - " \"postfit\": \"B-only Post-Fit\",\n", - "}\n", - "\n", - "selection_regions_labels = {\n", - " \"passbin1\": \"Pass Bin1\",\n", - " \"passbin2\": \"Pass Bin2\",\n", - " \"passbin3\": \"Pass Bin3\",\n", - " \"fail\": \"Fail\",\n", - "}\n", - "\n", - "if vbf:\n", - " selection_regions_labels[\"passvbf\"] = \"Pass VBF\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if regions == \"all\":\n", - " signal_regions = [\"passbin1\", \"passbin2\", \"passbin3\"]\n", - " if vbf:\n", - " signal_regions = [\"passvbf\"] + signal_regions\n", - "else:\n", - " signal_regions = [regions]\n", - "\n", - "bins = [*signal_regions, \"fail\"]\n", - "selection_regions = {key: selection_regions_labels[key] for key in bins}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hists = {}\n", - "for shape in shapes:\n", - " hists[shape] = {\n", - " region: hist.Hist(\n", - " hist.axis.StrCategory(samples, name=\"Sample\"),\n", - " *[shape_var.axis for shape_var in shape_vars],\n", - " storage=\"double\",\n", - " )\n", - " for region in selection_regions\n", - " }\n", - "\n", - " for region in selection_regions:\n", - " h = hists[shape][region]\n", - " templates = file[f\"{region}_{shape}\"]\n", - " # print(templates)\n", - " for key, file_key in hist_label_map_inverse.items():\n", - " if key != data_key:\n", - " if file_key not in templates:\n", - " print(f\"No {key} in {region}\")\n", - " continue\n", - "\n", - " data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]\n", - " h.view(flow=False)[data_key_index, :] = templates[file_key].values()\n", - "\n", - " data_key_index = np.where(np.array(list(h.axes[0])) == data_key)[0][0]\n", - " h.view(flow=False)[data_key_index, :] = np.nan_to_num(\n", - " templates[hist_label_map_inverse[data_key]].values()\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Signal in mass window:\", np.sum(hists[\"postfit\"][\"passbin1\"][\"hh4b\", 5:8].values()))\n", - "\n", - "bg_tot = np.sum(\n", - " [\n", - " np.sum(hists[\"postfit\"][\"passbin1\"][key, 5:8].values())\n", - " for key in hist_label_map_inverse\n", - " if key not in [\"hh4b\", \"vbfhh4b-k2v0\", \"data\"]\n", - " ]\n", - ")\n", - "print(\"BG in mass window:\", bg_tot)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print([key for key in hist_label_map_inverse if key not in [\"hh4b\", \"vbfhh4b-k2v0\", \"data\"]])\n", - "{\n", - " key: np.sum(hists[\"postfit\"][\"passbin1\"][key, 5:8].values())\n", - " for key in hist_label_map_inverse\n", - " if key not in [\"hh4b\", \"vbfhh4b-k2v0\", \"data\"]\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "year = \"2022-2023\"\n", - "pass_ratio_ylims = [0, 2]\n", - "fail_ratio_ylims = [0, 2]\n", - "signal_scale = 5.0\n", - "\n", - "ylims = {\n", - " \"passvbf\": 15,\n", - " \"passbin1\": 10,\n", - " \"passbin2\": 50,\n", - " \"passbin3\": 800,\n", - " \"fail\": 100000,\n", - "}\n", - "\n", - "for shape, shape_label in shapes.items():\n", - " for region, region_label in selection_regions.items():\n", - " pass_region = region.startswith(\"pass\")\n", - " for shape_var in shape_vars:\n", - " # print(hists[shape][region])\n", - " plot_params = {\n", - " \"hists\": hists[shape][region],\n", - " \"sig_keys\": [\"hh4b\"] if not vbf else [\"vbfhh4b-k2v0\"],\n", - " \"sig_scale_dict\": (\n", - " {\"hh4b\": signal_scale if pass_region else 1.0} if not vbf else None\n", - " ),\n", - " \"bg_keys\": [\"qcd\", \"ttbar\", \"vhtobb\", \"tthtobb\", \"others\"],\n", - " \"show\": True,\n", - " \"year\": year,\n", - " \"ylim\": ylims[region],\n", - " \"xlim\": 220,\n", - " # \"xlim_low\": 50,\n", - " \"xlim_low\": 60,\n", - " \"ratio_ylims\": pass_ratio_ylims if pass_region else fail_ratio_ylims,\n", - " \"title\": f\"{shape_label} {region_label} Region\",\n", - " \"name\": f\"{plot_dir}/{shape}_{region}_{shape_var.var}.pdf\",\n", - " \"bg_order\": [\"diboson\", \"vjets\", \"vhtobb\", \"ttbar\", \"qcd\"],\n", - " \"energy\": 13.6,\n", - " }\n", - "\n", - " plotting.ratioHistPlot(**plot_params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "bbVV", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.15" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "5b9eab485576227e6cf1b964bb8855c46cbdf15c3e77cecdb2bb309145d3e8d8" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}