diff --git a/thicket/stats/confidence_interval.py b/thicket/stats/confidence_interval.py
index 2c014644..07b166fc 100644
--- a/thicket/stats/confidence_interval.py
+++ b/thicket/stats/confidence_interval.py
@@ -5,46 +5,94 @@
 
 import numpy as np
 import pandas as pd
-import scipy.stats as stats
+import scipy.stats
 
 import thicket as th
 from ..utils import verify_thicket_structures
 from .stats_utils import cache_stats_op
-from thicket.stats import mean
 
 
 @cache_stats_op
-def confidence_interval(thicket, columns=None, confidence_value=0.95):
+def confidence_interval(thicket, columns=None, confidence_level=0.95):
+    r"""Calculate the confidence interval for each node in the performance data table.
+
+    Designed to take in a thicket, and append one or more columns to the aggregated
+    statistics table for the confidence interval calculation for each node.
+
+    A confidence interval is a range of values, derived from sample data, that is
+    likely to contain the true population parameter with a specified level of confidence.
+    It provides an estimate of uncertainty around a sample statistic, indicating how much
+    variability is expected if the sampling process were repeated multiple times.
+
+    Arguments:
+        thicket (thicket): Thicket object
+        columns (list): List of hardware/timing metrics to perform confidence interval
+            calculation on. Note, if using a columnar_joined thicket a list of tuples
+            must be passed in with the format (column index, column name).
+        confidence_level (int,float):  The confidence level (often 0.90, 0.95, or 0.99)
+            indicates the degree of confidence that the true parameter lies within the interval.
+
+    Returns:
+        (list): returns a list of output statsframe column names
+
+    Equation:
+        .. math::
+
+             \text{CI} = \bar{x} \pm z \left( \frac{\sigma}{\sqrt{n}} \right)
+    """
+    if columns is None or not isinstance(columns, list):
+        raise ValueError("Value passed to 'columns' must be of type list.")
+
+    if not isinstance(confidence_level, (int, float)):
+        raise ValueError(
+            r"Value passed to 'confidence_level' must be of type float or int."
+        )
+
+    if confidence_level >= 1 or confidence_level <= 0:
+        raise ValueError(
+            r"Value passed to 'confidence_level' must be in the range of (0, 1)."
+        )
+
+    verify_thicket_structures(thicket.dataframe, columns=columns)
+
     output_column_names = []
-    
+
+    sample_sizes = []
+
+    # Calculate mean and standard deviation
     mean_cols = th.stats.mean(thicket, columns=columns)
     std_cols = th.stats.std(thicket, columns=columns)
-    sample_sizes = []
-    z = stats.norm.ppf((1 + confidence_value) / 2)
 
+    # Convert confidence level to Z score
+    z = scipy.stats.norm.ppf((1 + confidence_level) / 2)
+
+    # Get number of profiles per node
     idx = pd.IndexSlice
-    for node in thicket.graph.traverse():
+    for node in thicket.dataframe.index.get_level_values(0).unique().tolist():
         node_df = thicket.dataframe.loc[idx[node, :]]
         sample_sizes.append(len(node_df))
 
+    # Calculate confidence interval for every column
     for i in range(0, len(columns)):
         x = thicket.statsframe.dataframe[mean_cols[i]]
         s = thicket.statsframe.dataframe[std_cols[i]]
-        n = sample_sizes
 
-        c_p = x + (z * (s / np.sqrt(n)))
-        c_m = x - (z * (s / np.sqrt(n)))
-        
-        out = list(zip(c_m, c_p))
-        out = pd.Series(out, index=thicket.statsframe.dataframe.index)
+        c_p = x + (z * (s / np.sqrt(sample_sizes)))
+        c_m = x - (z * (s / np.sqrt(sample_sizes)))
+
+        out = pd.Series(list(zip(c_m, c_p)), index=thicket.statsframe.dataframe.index)
+
+        if thicket.dataframe.columns.nlevels == 1:
+            out_col = f"confidence_interval_{confidence_level}_{columns[i]}"
+        else:
+            out_col = (
+                columns[i][0],
+                f"confidence_interval_{confidence_level}_{columns[i][1]}",
+            )
 
-        # If multi index, place below first level
-        out_col = f"confidence_interval_{confidence_value}_{columns[i]}"
         output_column_names.append(out_col)
         thicket.statsframe.dataframe[out_col] = out
-        break
 
     thicket.statsframe.dataframe = thicket.statsframe.dataframe.sort_index(axis=1)
-    return output_column_names
-
 
+    return output_column_names
diff --git a/thicket/tests/test_stats.py b/thicket/tests/test_stats.py
index 868cf6ef..60257b62 100644
--- a/thicket/tests/test_stats.py
+++ b/thicket/tests/test_stats.py
@@ -6,6 +6,7 @@
 import math
 
 import numpy as np
+import pytest
 
 import thicket as th
 
@@ -1217,3 +1218,49 @@ def test_cache_decorator(rajaperf_seq_O3_1M_cali):
     assert (
         len(th_1.statsframe_ops_cache[list(th_1.statsframe_ops_cache.keys())[0]]) == 1
     )
+
+
+def test_confidence_interval(thicket_axis_columns):
+    thicket_list, thicket_list_cp, combined_th = thicket_axis_columns
+
+    idx = list(combined_th.dataframe.columns.levels[0][0:2])
+    columns = [(idx[0], "Min time/rank"), (idx[1], "Min time/rank")]
+
+    with pytest.raises(
+        ValueError, match="Value passed to 'columns' must be of type list."
+    ):
+        th.stats.confidence_interval(combined_th, columns="columns")
+
+    with pytest.raises(
+        ValueError,
+        match="Value passed to 'confidence_level' must be of type float or int.",
+    ):
+        th.stats.confidence_interval(
+            combined_th, columns=columns, confidence_level="0.95"
+        )
+
+    with pytest.raises(
+        ValueError,
+        match=r"Value passed to 'confidence_level' must be in the range of \(0, 1\).",
+    ):
+        th.stats.confidence_interval(combined_th, columns=columns, confidence_level=95)
+
+    # Hardcoded cases
+    columns = [("block_128", "Avg time/rank"), ("default", "Avg time/rank")]
+
+    th.stats.confidence_interval(combined_th, columns=columns)
+
+    correct_data = {
+        ("block_128", "confidence_interval_0.95_Avg time/rank"): [
+            (1.0128577358974717, 4.149184264102528),
+            (0.0049270306443246845, 0.012615969355675315),
+        ],
+        ("default", "confidence_interval_0.95_Avg time/rank"): [
+            (43.443961386963, 288.581029613037),
+            (1.858945913805485, 10.117062086194515),
+        ],
+    }
+
+    for col in correct_data.keys():
+        for idx, val in enumerate(correct_data[col]):
+            assert combined_th.statsframe.dataframe[col][idx] == val