diff --git a/fast_carpenter/summary/binned_dataframe.py b/fast_carpenter/summary/binned_dataframe.py index cd1839f..a3df678 100644 --- a/fast_carpenter/summary/binned_dataframe.py +++ b/fast_carpenter/summary/binned_dataframe.py @@ -150,6 +150,11 @@ def __init__(self, name, out_dir, binning, weights=None, dataset_col=False): excluded from the stored dataframe. Leaving this ``False`` can save some disk-space and improve processing time, particularly if the bins are only very sparsely filled. + observed (bool): If ``False`` bins in the dataframe will only be filled + if their are datapoints contained within them. Otherwise, depending on + the binning specification for each dimension, all bins for that + dimension will be present. Use `pad_missing: true` to force all bins + to be present. Other Parameters: name (str): The name of this stage (handled automatically by fast-flow) @@ -161,7 +166,8 @@ def __init__(self, name, out_dir, binning, weights=None, dataset_col=False): """ - def __init__(self, name, out_dir, binning, weights=None, dataset_col=True, pad_missing=False, file_format=None, observed=False): + def __init__(self, name, out_dir, binning, weights=None, dataset_col=True, + pad_missing=False, file_format=None, observed=False): self.name = name self.out_dir = out_dir ins, outs, binnings = cfg.create_binning_list(self.name, binning) diff --git a/tests/summary/test_binned_dataframe.py b/tests/summary/test_binned_dataframe.py index 87e5df9..b49a821 100644 --- a/tests/summary/test_binned_dataframe.py +++ b/tests/summary/test_binned_dataframe.py @@ -115,7 +115,7 @@ def test_BinnedDataframe_run_twice(binned_df_1, tmpdir, infile): assert totals["EventWeight:sumw"] == pytest.approx(231.91339 * 2) -@pytest.fixture #(scope="function") +@pytest.fixture def run_twice_data_mc(config_1, infile, observed): chunk_mc = FakeBEEvent(infile, "mc") chunk_data = FakeBEEvent(infile, "data")