Removing the bootstrap matrix solver from this branch in favor of it …

…living in the ELEX-3830 branch
washingtonpost · Jan 31, 2024 · fe69ce4 · fe69ce4
1 parent 67dd426
commit fe69ce4
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 329 deletions.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 from setuptools import find_packages, setup
 
-INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12", "tqdm~=4.66"]
+INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12"]
 
 THIS_FILE_DIR = os.path.dirname(__file__)
 

diff --git a/src/elexsolver/TransitionMatrixSolver.py b/src/elexsolver/TransitionMatrixSolver.py
@@ -3,7 +3,6 @@
 
 import cvxpy as cp
 import numpy as np
-from tqdm import tqdm
 
 from elexsolver.logging import initialize_logging
 from elexsolver.TransitionSolver import TransitionSolver
@@ -107,93 +106,3 @@ def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None =
         percentages = self.__solve(X, Y, weights)
         self._transitions = np.diag(X_expected_totals) @ percentages
         return percentages
-
-
-class BootstrapTransitionMatrixSolver(TransitionSolver):
-    """
-    Bootstrap version of the matrix regression transition solver.
-    """
-
-    def __init__(self, B: int = 1000, strict: bool = True, verbose: bool = True, lam: int | None = None):
-        """
-        Parameters
-        ----------
-        `B` : int, default 1000
-            Number of bootstrap samples to draw and matrix solver models to fit/predict.
-        `strict` : bool, default True
-            If `True`, solution will be constrainted so that all coefficients are >= 0,
-            <= 1, and the sum of each row equals 1.
-        `verbose` : bool, default True
-            If `False`, this will reduce the amount of logging produced for each of the `B` bootstrap samples.
-        `lam` : float, optional
-            `lam` != 0 will enable L2 regularization (Ridge).
-        """
-        super().__init__()
-        self._strict = strict
-        self._B = B
-        self._verbose = verbose
-        self._lambda = lam
-
-        # class members that are instantiated during model-fit
-        self._predicted_percentages = None
-        self._X_expected_totals = None
-
-    def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None = None) -> np.ndarray:
-        self._predicted_percentages = []
-
-        # assuming pandas.DataFrame
-        if not isinstance(X, np.ndarray):
-            X = X.to_numpy()
-        if not isinstance(Y, np.ndarray):
-            Y = Y.to_numpy()
-
-        self._X_expected_totals = X.sum(axis=0) / X.sum(axis=0).sum()
-
-        tm = TransitionMatrixSolver(strict=self._strict, lam=self._lambda)
-        self._predicted_percentages.append(tm.fit_predict(X, Y, weights=weights))
-
-        for b in tqdm(range(0, self._B - 1), desc="Bootstrapping", disable=not self._verbose):
-            rng = np.random.default_rng(seed=b)
-            X_resampled = rng.choice(
-                X, len(X), replace=True, axis=0, p=(weights / weights.sum() if weights is not None else None)
-            )
-            indices = [np.where((X == x).all(axis=1))[0][0] for x in X_resampled]
-            Y_resampled = Y[indices]
-            self._predicted_percentages.append(tm.fit_predict(X_resampled, Y_resampled, weights=None))
-
-        percentages = np.mean(self._predicted_percentages, axis=0)
-        self._transitions = np.diag(self._X_expected_totals) @ percentages
-        return percentages
-
-    def get_confidence_interval(self, alpha: float, transitions: bool = False) -> (np.ndarray, np.ndarray):
-        """
-        Parameters
-        ----------
-        `alpha` : float
-            Value between [0, 1).  If greater than 1, will be divided by 100.
-        `transitions` : bool, default False
-            If True, the returned matrices will represent transitions, not percentages.
-
-        Returns
-        -------
-        A tuple of two np.ndarray matrices of float.  Element 0 has the lower bound and 1 has the upper bound.
-        """
-        if alpha > 1:
-            alpha = alpha / 100
-        if alpha < 0 or alpha >= 1:
-            raise ValueError(f"Invalid confidence interval {alpha}.")
-
-        p_lower = ((1.0 - alpha) / 2.0) * 100
-        p_upper = ((1.0 + alpha) / 2.0) * 100
-
-        percentages = (
-            np.percentile(self._predicted_percentages, p_lower, axis=0),
-            np.percentile(self._predicted_percentages, p_upper, axis=0),
-        )
-
-        if transitions:
-            return (
-                np.diag(self._X_expected_totals) @ percentages[0],
-                np.diag(self._X_expected_totals) @ percentages[1],
-            )
-        return percentages
diff --git a/tests/test_transition_matrix_solver.py b/tests/test_transition_matrix_solver.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from elexsolver.TransitionMatrixSolver import BootstrapTransitionMatrixSolver, TransitionMatrixSolver
+from elexsolver.TransitionMatrixSolver import TransitionMatrixSolver
 
 RTOL = 1e-04
 ATOL = 1e-04
@@ -228,239 +228,3 @@ def test_matrix_fit_predict_pandas():
     except ImportError:
         # pass this test through since pandas isn't a requirement for elex-solver
         assert True
-
-
-def test_bootstrap_fit_predict():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]])
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    current = btms.fit_predict(X, Y)
-    np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)
-
-
-def test_bootstrap_fit_predict_with_weights():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    weights = np.array([500, 250, 125, 62.5, 31.25, 15.625])
-
-    expected = np.array([[0.739798, 0.260202], [0.229358, 0.770642]])
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    current = btms.fit_predict(X, Y, weights=weights)
-    np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)
-
-
-def test_bootstrap_confidence_interval_percentages():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]])
-    expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]])
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    _ = btms.fit_predict(X, Y)
-    (current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=False)
-    np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
-    np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)
-
-
-def test_bootstrap_confidence_interval_greater_than_1():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]])
-    expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]])
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    _ = btms.fit_predict(X, Y)
-    (current_lower, current_upper) = btms.get_confidence_interval(95, transitions=False)
-    np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
-    np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)
-
-
-def test_bootstrap_confidence_interval_invalid():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    _ = btms.fit_predict(X, Y)
-
-    with pytest.raises(ValueError):
-        btms.get_confidence_interval(-34)
-
-
-def test_bootstrap_confidence_interval_transitions():
-    X = np.array(
-        [
-            [1, 2],
-            [3, 4],
-            [5, 6],
-            [7, 8],
-            [9, 10],
-            [11, 12],
-        ]
-    )
-
-    Y = np.array(
-        [
-            [2, 3],
-            [4, 5],
-            [6, 7],
-            [8, 9],
-            [10, 11],
-            [12, 13],
-        ]
-    )
-
-    expected_lower = np.array([[0.349649, 0.044297], [0.049151, 0.419715]])
-    expected_upper = np.array([[0.417241, 0.111889], [0.118746, 0.489311]])
-
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    _ = btms.fit_predict(X, Y)
-    (current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=True)
-    np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
-    np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)
-
-
-def test_bootstrap_get_prediction_interval():
-    btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-    with pytest.raises(NotImplementedError):
-        btms.get_prediction_interval(0)
-
-
-def test_bootstrap_fit_predict_pandas():
-    try:
-        import pandas  # pylint: disable=import-outside-toplevel
-
-        X = pandas.DataFrame(
-            [
-                [1, 2],
-                [3, 4],
-                [5, 6],
-                [7, 8],
-                [9, 10],
-                [11, 12],
-            ],
-            columns=["x1", "x2"],
-        )
-
-        Y = pandas.DataFrame(
-            [
-                [2, 3],
-                [4, 5],
-                [6, 7],
-                [8, 9],
-                [10, 11],
-                [12, 13],
-            ],
-            columns=["y1", "y2"],
-        )
-
-        expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]])
-
-        btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
-        current = btms.fit_predict(X, Y)
-        np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)
-
-    except ImportError:
-        # pass this test through since pandas isn't a requirement for elex-solver
-        assert True