From c9bfd54078d51b27e6f918f51f7a252752f4fcbf Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Mon, 1 Jul 2024 19:20:08 -0700 Subject: [PATCH] update --- src/HH4b/boosted/correlations.ipynb | 144 ++++++++++++++++++++----- src/HH4b/hh_vars.py | 2 +- src/HH4b/postprocessing/PostProcess.py | 2 +- 3 files changed, 121 insertions(+), 27 deletions(-) diff --git a/src/HH4b/boosted/correlations.ipynb b/src/HH4b/boosted/correlations.ipynb index d4c3db15..6034d5e3 100644 --- a/src/HH4b/boosted/correlations.ipynb +++ b/src/HH4b/boosted/correlations.ipynb @@ -13,22 +13,13 @@ "import importlib\n", "import hist\n", "import os\n", - "\n", "import mplhep as hep\n", - "import matplotlib.ticker as mticker\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.lines import Line2D\n", - "\n", "from HH4b.postprocessing.PostProcess import add_bdt_scores\n", "import HH4b\n", - "\n", - "formatter = mticker.ScalarFormatter(useMathText=True)\n", - "formatter.set_powerlimits((-3, 3))\n", - "plt.rcParams.update({\"font.size\": 12})\n", - "plt.rcParams[\"lines.linewidth\"] = 2\n", - "plt.rcParams[\"grid.color\"] = \"#CCCCCC\"\n", - "plt.rcParams[\"grid.linewidth\"] = 0.5\n", - "plt.rcParams[\"figure.edgecolor\"] = \"none\"" + "plt.style.use(hep.style.ROOT)\n", + "from HH4b.hh_vars import txbbsfs_decorr_pt_bins, txbbsfs_decorr_txbb_wps\n", + "import json" ] }, { @@ -49,7 +40,11 @@ "outputs": [], "source": [ "package_path = os.path.dirname(HH4b.__file__)\n", - "\n", + "legacy_label = \"Legacy\"\n", + "mass_var = \"H2PNetMass\"\n", + "bdt_model_name = \"24May31_lr_0p02_md_8_AK4Away\"\n", + "bdt_config = \"24May31_lr_0p02_md_8_AK4Away\"\n", + "year = \"2023\"\n", "\n", "def get_dataframe(events_dict, year, bdt_model_name, bdt_config):\n", " bdt_model = xgb.XGBClassifier()\n", @@ -94,7 +89,7 @@ " mask_mass = (bdt_events[\"H2PNetMass\"] > 50) & (bdt_events[\"H2PNetMass\"] < 250)\n", " bdt_events = bdt_events[(mask_mass) & (mask_hlt) & (mask_presel)]\n", "\n", - " columns = [\"bdt_score\", \"H1TXbb\", \"H2TXbb\", \"H1PNetMass\", \"H2PNetMass\", \"weight\"]\n", + " columns = [\"bdt_score\", \"H1TXbb\", \"H2TXbb\", \"H1PNetMass\", \"H2PNetMass\", \"weight\", \"H1Pt\", \"H2Pt\"]\n", " bdt_events_dict[key] = bdt_events[columns]\n", " return bdt_events_dict" ] @@ -132,11 +127,7 @@ " },\n", "}\n", "\n", - "mass_var = \"H2PNetMass\"\n", - "bdt_model_name = \"24May31_lr_0p02_md_8_AK4Away\"\n", - "bdt_config = \"24May31_lr_0p02_md_8_AK4Away\"\n", "bdt_events_dict_year = {}\n", - "legacy_label = \"Legacy\"\n", "for year in samples_run3:\n", " events = HH4b.postprocessing.load_run3_samples(\n", " input_dir=input_dir,\n", @@ -284,10 +275,6 @@ "metadata": {}, "outputs": [], "source": [ - "plt.style.use(hep.style.ROOT)\n", - "import json\n", - "\n", - "year = \"2022EE\"\n", "with open(f\"{package_path}/corrections/data/txbb_sfs/{year}/sf_txbbv11_Jun14.json\") as f:\n", " txbb_sf_old = json.load(f)\n", "with open(f\"{package_path}/corrections/data/txbb_sfs/{year}/sf_txbbv11_Jun26_freezeSFs.json\") as f:\n", @@ -300,12 +287,17 @@ " f\"{package_path}/corrections/data/txbb_sfs/{year}/sf_txbbv11_Jun29_freezeSFs_zoomedInWPs.json\"\n", ") as f:\n", " txbb_sf_zoom = json.load(f)\n", + "# with open(\n", + "# f\"{package_path}/corrections/data/txbb_sfs/{year}/sf_txbbv11_Jul1_freezeSFs_zoomedInWPs998.json\"\n", + "# ) as f:\n", + "# txbb_sf_zoom998 = json.load(f)\n", "\n", - "ptbins = np.array([200, 250, 300, 400, 500, 100000])\n", + "ptbins = np.array([200] + txbbsfs_decorr_pt_bins)\n", "y_old, yerr_low_old, yerr_high_old = [], [], []\n", "y, yerr_low, yerr_high = [], [], []\n", "y_fine, yerr_low_fine, yerr_high_fine = [], [], []\n", "y_zoom, yerr_low_zoom, yerr_high_zoom = [], [], []\n", + "# y_zoom998, yerr_low_zoom998, yerr_high_zoom998 = [], [], []\n", "for i in range(len(ptbins) - 1):\n", " y_old.append(txbb_sf_old[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"central\"])\n", " yerr_low_old.append(txbb_sf_old[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"low\"])\n", @@ -323,6 +315,10 @@ " yerr_low_zoom.append(txbb_sf_zoom[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"low\"])\n", " yerr_high_zoom.append(txbb_sf_zoom[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"high\"])\n", "\n", + " # y_zoom998.append(txbb_sf_zoom998[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"central\"])\n", + " # yerr_low_zoom998.append(txbb_sf_zoom998[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"low\"])\n", + " # yerr_high_zoom998.append(txbb_sf_zoom998[f\"WP1_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"high\"])\n", + "\n", "\n", "plt.figure()\n", "# horizaontal line at 1\n", @@ -346,6 +342,13 @@ " fmt=\"o\",\n", " label=\"WP [0.995, 1]\",\n", ")\n", + "# plt.errorbar(\n", + "# y=y_zoom998,\n", + "# x=ptbins[:-1] + 40,\n", + "# yerr=[yerr_low_zoom998, yerr_high_zoom998],\n", + "# fmt=\"o\",\n", + "# label=\"WP [0.998, 1]\",\n", + "# )\n", "plt.xlabel(\"$p_T (j)$ [GeV]\")\n", "plt.ylabel(\"SF (flvB)\")\n", "plt.ylim([0, 2])\n", @@ -375,13 +378,12 @@ " txbb_sf_new[f\"WP5_pt{ptbins[i]}to{ptbins[i+1]}\"] = txbb_sf_fine[\n", " f\"WP4_pt{ptbins[i]}to{ptbins[i+1]}\"\n", " ]\n", - "print(txbb_sf_new)\n", "\n", "with open(\n", " f\"{package_path}/corrections/data/txbb_sfs/{year}/sf_txbbv11_Jul1_freezeSFs_combinedWPs.json\",\n", " \"w\",\n", ") as f:\n", - " json.dump(txbb_sf_new, f, indent=4)" + " json.dump(txbb_sf_new, f, indent=4)" ] }, { @@ -431,6 +433,98 @@ "plt.legend(title=year)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from HH4b.postprocessing.corrections import _load_txbb_sfs\n", + "\n", + "txbb_sf_func_old = _load_txbb_sfs(year, \"sf_txbbv11_Jun14\", txbbsfs_decorr_txbb_wps, txbbsfs_decorr_pt_bins)\n", + "txbb_sf_func_new = _load_txbb_sfs(year, \"sf_txbbv11_Jul1_freezeSFs_combinedWPs\", wps, txbbsfs_decorr_pt_bins)\n", + "\n", + "weight_new = txbb_sf_func_new[\"nominal\"](events[\"H1TXbb\"].to_numpy(), events[\"H1Pt\"].to_numpy()) * txbb_sf_func_new[\"nominal\"](events[\"H2TXbb\"].to_numpy(), events[\"H2Pt\"].to_numpy())\n", + "weight_old = txbb_sf_func_old[\"nominal\"](events[\"H1TXbb\"].to_numpy(), events[\"H1Pt\"].to_numpy()) * txbb_sf_func_old[\"nominal\"](events[\"H2TXbb\"].to_numpy(), events[\"H2Pt\"].to_numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weight_new[mask_bin1].sum()/ weight_old[mask_bin1].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ratio = {}\n", + "ratio[\"2022\"] = 0.5375997663881179\n", + "ratio[\"2022EE\"] = 0.7203077415037246\n", + "ratio[\"2023\"] = 1.0558906086460764\n", + "ratio[\"2023BPix\"] = 1.033687258950527" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from HH4b.hh_vars import LUMI\n", + "sum([ratio[year] * LUMI[year] for year in HH4b.hh_vars.years]) / sum([LUMI[year] for year in HH4b.hh_vars.years])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_old, yerr_low_old, yerr_high_old = [], [], []\n", + "wps = {\n", + " \"WP1\": [0.975, 1],\n", + " \"WP2\": [0.95, 0.975],\n", + " \"WP3\": [0.92, 0.95],\n", + "}\n", + "for wp in range(0, 3):\n", + " y_old.append([])\n", + " yerr_low_old.append([])\n", + " yerr_high_old.append([])\n", + " for i in range(len(ptbins) - 1):\n", + " y_old[wp].append(txbb_sf[f\"WP{wp+1}_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"central\"])\n", + " yerr_low_old[wp].append(\n", + " txbb_sf[f\"WP{wp+1}_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"low\"]\n", + " )\n", + " yerr_high_old[wp].append(\n", + " txbb_sf[f\"WP{wp+1}_pt{ptbins[i]}to{ptbins[i+1]}\"][\"final\"][\"high\"]\n", + " )\n", + "\n", + "plt.figure()\n", + "# horizaontal line at 1\n", + "plt.axhline(1, color=\"gray\", linestyle=\"--\", alpha=0.5)\n", + "# vertical line at each ptbin\n", + "for ptbin in ptbins[:-1]:\n", + " plt.axvline(ptbin, color=\"gray\", linestyle=\"-\", alpha=0.5)\n", + "for wp in range(0, 3):\n", + " plt.errorbar(\n", + " y=y_old[wp],\n", + " x=ptbins[:-1] + (wp + 3) * 5,\n", + " yerr=[yerr_low_old[wp], yerr_high_old[wp]],\n", + " fmt=\"o\",\n", + " label=f\"WP{wp+1} {wps[f'WP{wp+1}']}\",\n", + " )\n", + "plt.xlabel(\"$p_T (j)$ [GeV]\")\n", + "plt.ylabel(\"SF (flvB)\")\n", + "plt.ylim([0, 2])\n", + "plt.legend(title=year)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/HH4b/hh_vars.py b/src/HH4b/hh_vars.py index 3f11215e..6fb736c6 100644 --- a/src/HH4b/hh_vars.py +++ b/src/HH4b/hh_vars.py @@ -397,6 +397,6 @@ ttbarsfs_decorr_bdt_bins = [0.03, 0.3, 0.5, 0.7, 0.93, 1.0] txbbsfs_decorr_txbb_wps = OrderedDict( - [("WP3", [0.92, 0.95]), ("WP2", [0.95, 0.975]), ("WP1", [0.975, 1])] + [("WP5", [0.92, 0.95]), ("WP4", [0.95, 0.975]), ("WP3", [0.975, 0.99]), ("WP2", [0.99, 0.995]), ("WP1", [0.995, 1])] ) txbbsfs_decorr_pt_bins = [250, 300, 400, 500, 100000] diff --git a/src/HH4b/postprocessing/PostProcess.py b/src/HH4b/postprocessing/PostProcess.py index b2ff3863..b8310fac 100644 --- a/src/HH4b/postprocessing/PostProcess.py +++ b/src/HH4b/postprocessing/PostProcess.py @@ -342,7 +342,7 @@ def load_process_run3_samples(args, year, bdt_training_keys, control_plots, plot # load TXbb SFs txbb_sf = corrections._load_txbb_sfs( - year, "sf_txbbv11_Jun14", txbbsfs_decorr_txbb_wps, txbbsfs_decorr_pt_bins + year, "sf_txbbv11_Jul1_freezeSFs_combinedWPs", txbbsfs_decorr_txbb_wps, txbbsfs_decorr_pt_bins ) events_dict_postprocess = {}