Skip to content

Commit

Permalink
Removing the bootstrap matrix solver from this branch in favor of it …
Browse files Browse the repository at this point in the history
…living in the ELEX-3830 branch
  • Loading branch information
dmnapolitano committed Jan 31, 2024
1 parent 67dd426 commit fe69ce4
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 329 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from setuptools import find_packages, setup

INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12", "tqdm~=4.66"]
INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12"]

THIS_FILE_DIR = os.path.dirname(__file__)

Expand Down
91 changes: 0 additions & 91 deletions src/elexsolver/TransitionMatrixSolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import cvxpy as cp
import numpy as np
from tqdm import tqdm

from elexsolver.logging import initialize_logging
from elexsolver.TransitionSolver import TransitionSolver
Expand Down Expand Up @@ -107,93 +106,3 @@ def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None =
percentages = self.__solve(X, Y, weights)
self._transitions = np.diag(X_expected_totals) @ percentages
return percentages


class BootstrapTransitionMatrixSolver(TransitionSolver):
"""
Bootstrap version of the matrix regression transition solver.
"""

def __init__(self, B: int = 1000, strict: bool = True, verbose: bool = True, lam: int | None = None):
"""
Parameters
----------
`B` : int, default 1000
Number of bootstrap samples to draw and matrix solver models to fit/predict.
`strict` : bool, default True
If `True`, solution will be constrainted so that all coefficients are >= 0,
<= 1, and the sum of each row equals 1.
`verbose` : bool, default True
If `False`, this will reduce the amount of logging produced for each of the `B` bootstrap samples.
`lam` : float, optional
`lam` != 0 will enable L2 regularization (Ridge).
"""
super().__init__()
self._strict = strict
self._B = B
self._verbose = verbose
self._lambda = lam

# class members that are instantiated during model-fit
self._predicted_percentages = None
self._X_expected_totals = None

def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None = None) -> np.ndarray:
self._predicted_percentages = []

# assuming pandas.DataFrame
if not isinstance(X, np.ndarray):
X = X.to_numpy()
if not isinstance(Y, np.ndarray):
Y = Y.to_numpy()

self._X_expected_totals = X.sum(axis=0) / X.sum(axis=0).sum()

tm = TransitionMatrixSolver(strict=self._strict, lam=self._lambda)
self._predicted_percentages.append(tm.fit_predict(X, Y, weights=weights))

for b in tqdm(range(0, self._B - 1), desc="Bootstrapping", disable=not self._verbose):
rng = np.random.default_rng(seed=b)
X_resampled = rng.choice(
X, len(X), replace=True, axis=0, p=(weights / weights.sum() if weights is not None else None)
)
indices = [np.where((X == x).all(axis=1))[0][0] for x in X_resampled]
Y_resampled = Y[indices]
self._predicted_percentages.append(tm.fit_predict(X_resampled, Y_resampled, weights=None))

percentages = np.mean(self._predicted_percentages, axis=0)
self._transitions = np.diag(self._X_expected_totals) @ percentages
return percentages

def get_confidence_interval(self, alpha: float, transitions: bool = False) -> (np.ndarray, np.ndarray):
"""
Parameters
----------
`alpha` : float
Value between [0, 1). If greater than 1, will be divided by 100.
`transitions` : bool, default False
If True, the returned matrices will represent transitions, not percentages.
Returns
-------
A tuple of two np.ndarray matrices of float. Element 0 has the lower bound and 1 has the upper bound.
"""
if alpha > 1:
alpha = alpha / 100
if alpha < 0 or alpha >= 1:
raise ValueError(f"Invalid confidence interval {alpha}.")

p_lower = ((1.0 - alpha) / 2.0) * 100
p_upper = ((1.0 + alpha) / 2.0) * 100

percentages = (
np.percentile(self._predicted_percentages, p_lower, axis=0),
np.percentile(self._predicted_percentages, p_upper, axis=0),
)

if transitions:
return (
np.diag(self._X_expected_totals) @ percentages[0],
np.diag(self._X_expected_totals) @ percentages[1],
)
return percentages
238 changes: 1 addition & 237 deletions tests/test_transition_matrix_solver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from elexsolver.TransitionMatrixSolver import BootstrapTransitionMatrixSolver, TransitionMatrixSolver
from elexsolver.TransitionMatrixSolver import TransitionMatrixSolver

RTOL = 1e-04
ATOL = 1e-04
Expand Down Expand Up @@ -228,239 +228,3 @@ def test_matrix_fit_predict_pandas():
except ImportError:
# pass this test through since pandas isn't a requirement for elex-solver
assert True


def test_bootstrap_fit_predict():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
current = btms.fit_predict(X, Y)
np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)


def test_bootstrap_fit_predict_with_weights():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

weights = np.array([500, 250, 125, 62.5, 31.25, 15.625])

expected = np.array([[0.739798, 0.260202], [0.229358, 0.770642]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
current = btms.fit_predict(X, Y, weights=weights)
np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)


def test_bootstrap_confidence_interval_percentages():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]])
expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
_ = btms.fit_predict(X, Y)
(current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=False)
np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)


def test_bootstrap_confidence_interval_greater_than_1():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]])
expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
_ = btms.fit_predict(X, Y)
(current_lower, current_upper) = btms.get_confidence_interval(95, transitions=False)
np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)


def test_bootstrap_confidence_interval_invalid():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
_ = btms.fit_predict(X, Y)

with pytest.raises(ValueError):
btms.get_confidence_interval(-34)


def test_bootstrap_confidence_interval_transitions():
X = np.array(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
]
)

Y = np.array(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
]
)

expected_lower = np.array([[0.349649, 0.044297], [0.049151, 0.419715]])
expected_upper = np.array([[0.417241, 0.111889], [0.118746, 0.489311]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
_ = btms.fit_predict(X, Y)
(current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=True)
np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL)


def test_bootstrap_get_prediction_interval():
btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
with pytest.raises(NotImplementedError):
btms.get_prediction_interval(0)


def test_bootstrap_fit_predict_pandas():
try:
import pandas # pylint: disable=import-outside-toplevel

X = pandas.DataFrame(
[
[1, 2],
[3, 4],
[5, 6],
[7, 8],
[9, 10],
[11, 12],
],
columns=["x1", "x2"],
)

Y = pandas.DataFrame(
[
[2, 3],
[4, 5],
[6, 7],
[8, 9],
[10, 11],
[12, 13],
],
columns=["y1", "y2"],
)

expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]])

btms = BootstrapTransitionMatrixSolver(B=10, verbose=False)
current = btms.fit_predict(X, Y)
np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL)

except ImportError:
# pass this test through since pandas isn't a requirement for elex-solver
assert True

0 comments on commit fe69ce4

Please sign in to comment.