Confidence Interval Stats Function (#213)

* Confidence interval: basic implementation fixed PR concerns float fix * black flake * add confidence interval module --------- Co-authored-by: Stephanie Brink <[email protected]>
LLNL · Oct 25, 2024 · 429b027 · 429b027
1 parent 5f2d9af
commit 429b027
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 0 deletions.
diff --git a/thicket/stats/__init__.py b/thicket/stats/__init__.py
@@ -23,6 +23,7 @@
 from .preference import preference
 from .distance import bhattacharyya_distance
 from .distance import hellinger_distance
+from .confidence_interval import confidence_interval
 
 
 try:

diff --git a/thicket/stats/confidence_interval.py b/thicket/stats/confidence_interval.py
@@ -0,0 +1,96 @@
+# Copyright 2022 Lawrence Livermore National Security, LLC and other
+# Thicket Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+import numpy as np
+import pandas as pd
+import scipy.stats
+
+import thicket as th
+from ..utils import verify_thicket_structures
+from .stats_utils import cache_stats_op
+
+
+@cache_stats_op
+def confidence_interval(thicket, columns=None, confidence_level=0.95):
+    r"""Calculate the confidence interval for each node in the performance data table.
+
+    Designed to take in a thicket, and append one or more columns to the aggregated
+    statistics table for the confidence interval calculation for each node.
+
+    A confidence interval is a range of values, derived from sample data, that is
+    likely to contain the true population parameter with a specified level of confidence.
+    It provides an estimate of uncertainty around a sample statistic, indicating how much
+    variability is expected if the sampling process were repeated multiple times.
+
+    Arguments:
+        thicket (thicket): Thicket object
+        columns (list): List of hardware/timing metrics to perform confidence interval
+            calculation on. Note, if using a columnar_joined thicket a list of tuples
+            must be passed in with the format (column index, column name).
+        confidence_level (float):  The confidence level (often 0.90, 0.95, or 0.99)
+            indicates the degree of confidence that the true parameter lies within the interval.
+
+    Returns:
+        (list): returns a list of output statsframe column names
+
+    Equation:
+        .. math::
+
+             \text{CI} = \bar{x} \pm z \left( \frac{\sigma}{\sqrt{n}} \right)
+    """
+    if columns is None or not isinstance(columns, list):
+        raise ValueError("Value passed to 'columns' must be of type list.")
+
+    if not isinstance(confidence_level, float):
+        raise ValueError(r"Value passed to 'confidence_level' must be of type float.")
+
+    if confidence_level >= 1 or confidence_level <= 0:
+        raise ValueError(
+            r"Value passed to 'confidence_level' must be in the range of (0, 1)."
+        )
+
+    verify_thicket_structures(thicket.dataframe, columns=columns)
+
+    output_column_names = []
+
+    sample_sizes = []
+
+    # Calculate mean and standard deviation
+    mean_cols = th.stats.mean(thicket, columns=columns)
+    std_cols = th.stats.std(thicket, columns=columns)
+
+    # Convert confidence level to Z score
+    z = scipy.stats.norm.ppf((1 + confidence_level) / 2)
+
+    # Get number of profiles per node
+    idx = pd.IndexSlice
+    for node in thicket.dataframe.index.get_level_values(0).unique().tolist():
+        node_df = thicket.dataframe.loc[idx[node, :]]
+        sample_sizes.append(len(node_df))
+
+    # Calculate confidence interval for every column
+    for i in range(0, len(columns)):
+        x = thicket.statsframe.dataframe[mean_cols[i]]
+        s = thicket.statsframe.dataframe[std_cols[i]]
+
+        c_p = x + (z * (s / np.sqrt(sample_sizes)))
+        c_m = x - (z * (s / np.sqrt(sample_sizes)))
+
+        out = pd.Series(list(zip(c_m, c_p)), index=thicket.statsframe.dataframe.index)
+
+        if thicket.dataframe.columns.nlevels == 1:
+            out_col = f"confidence_interval_{confidence_level}_{columns[i]}"
+        else:
+            out_col = (
+                columns[i][0],
+                f"confidence_interval_{confidence_level}_{columns[i][1]}",
+            )
+
+        output_column_names.append(out_col)
+        thicket.statsframe.dataframe[out_col] = out
+
+    thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1)
+
+    return output_column_names
diff --git a/thicket/tests/test_stats.py b/thicket/tests/test_stats.py
@@ -6,6 +6,7 @@
 import math
 
 import numpy as np
+import pytest
 
 import thicket as th
 
@@ -1217,3 +1218,49 @@ def test_cache_decorator(rajaperf_seq_O3_1M_cali):
     assert (
         len(th_1.statsframe_ops_cache[list(th_1.statsframe_ops_cache.keys())[0]]) == 1
     )
+
+
+def test_confidence_interval(thicket_axis_columns):
+    thicket_list, thicket_list_cp, combined_th = thicket_axis_columns
+
+    idx = list(combined_th.dataframe.columns.levels[0][0:2])
+    columns = [(idx[0], "Min time/rank"), (idx[1], "Min time/rank")]
+
+    with pytest.raises(
+        ValueError, match="Value passed to 'columns' must be of type list."
+    ):
+        th.stats.confidence_interval(combined_th, columns="columns")
+
+    with pytest.raises(
+        ValueError,
+        match="Value passed to 'confidence_level' must be of type float.",
+    ):
+        th.stats.confidence_interval(
+            combined_th, columns=columns, confidence_level="0.95"
+        )
+
+    with pytest.raises(
+        ValueError,
+        match=r"Value passed to 'confidence_level' must be in the range of \(0, 1\).",
+    ):
+        th.stats.confidence_interval(combined_th, columns=columns, confidence_level=1.2)
+
+    # Hardcoded cases
+    columns = [("block_128", "Avg time/rank"), ("default", "Avg time/rank")]
+
+    th.stats.confidence_interval(combined_th, columns=columns)
+
+    correct_data = {
+        ("block_128", "confidence_interval_0.95_Avg time/rank"): [
+            (1.0128577358974717, 4.149184264102528),
+            (0.0049270306443246845, 0.012615969355675315),
+        ],
+        ("default", "confidence_interval_0.95_Avg time/rank"): [
+            (43.443961386963, 288.581029613037),
+            (1.858945913805485, 10.117062086194515),
+        ],
+    }
+
+    for col in correct_data.keys():
+        for idx, val in enumerate(correct_data[col]):
+            assert combined_th.statsframe.dataframe[col][idx] == val