Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor and change optimization #41

Merged
merged 1 commit into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ignore-patterns=

# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
init-hook="import sys; import os; sys.path.append(os.path.abspath('.'));"

# Use multiple processes to speed up Pylint.
jobs=1
Expand Down Expand Up @@ -62,6 +62,7 @@ disable=missing-docstring,
too-many-branches,
too-many-statements,
too-few-public-methods,
import-error,



Expand Down
180 changes: 82 additions & 98 deletions docs/examples/quick-start.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions kulprit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
Kullback-Leibler projections for Bayesian model selection.
"""

from kulprit.reference import ReferenceModel
from kulprit.reference import ProjectionPredictive
from kulprit.datasets import *


__version__ = "0.0.1"


__all__ = ["ReferenceModel"]
__all__ = ["ProjectionPredictive"]
2 changes: 1 addition & 1 deletion kulprit/projection/projector.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(
The inference data object corresponding to the reference model.
path : dict
An optional search path dictionary, initialized to None and assigned by the
ReferenceModel parent object following a search for efficient submodel retrieval.
ProjectionPredictive parent object following a search for efficient submodel retrieval.
"""

# log reference model and reference inference data object
Expand Down
130 changes: 68 additions & 62 deletions kulprit/projection/solver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Optimisation module."""
"""optimization module."""

from typing import List, Optional
import warnings

import arviz as az
import bambi as bmb
Expand Down Expand Up @@ -55,43 +56,7 @@ def pps(self):
).values.T
return pps

def linear_predict(
self,
beta_x: np.ndarray,
X: Optional[np.ndarray] = None,
) -> np.ndarray:
"""Predict the latent predictor of the submodel.

Parameters:
----------
beta_x : np.ndarray
The model's projected posterior
X : np.ndarray
The model's common design matrix

Returns
-------
np.ndarray: Point estimate of the latent predictor using the single draw from the
posterior and the model's design matrix
"""

linear_predictor = np.zeros(shape=(X.shape[0],))

# Contribution due to common terms
if X is not None:

if len(beta_x.shape) > 1: # pragma: no cover
raise NotImplementedError("Currently this method only works for single samples.")

# 'contribution' is of shape:
# * (obs_n, ) for univariate
contribution = np.dot(X, beta_x.T).T
linear_predictor += contribution

# return the latent predictor
return linear_predictor

def _init_optimisation(self, term_names: List[str]) -> List[float]:
def _init_optimization(self, term_names: List[str]) -> List[float]:
"""Initialise the optimization with the reference posterior means."""

return np.hstack(
Expand All @@ -101,24 +66,26 @@ def _init_optimisation(self, term_names: List[str]) -> List[float]:
def _build_bounds(self, init: List[float]) -> list:
"""Build bounds for the parameters in the optimization.

This method is used to ensure that dispersion or other auxiliary
parameters present in certain families remain within their valid regions.
This method is used to ensure that dispersion or other auxiliary parameters present
in certain families remain within their valid regions.

Parameters:
----------
init : (List[float])
init : List[float]
The list of initial parameter values

Returns:
-------
List : [Tuple(float)]The upper and lower bounds for each initialized parameter in
the optimization
List : [Tuple(float)]
The upper and lower bounds for each initialized parameter in the optimization
"""
eps = np.finfo(np.float64).eps

# build bounds based on family
# build bounds based on family, we are assuming that the last parameter is the dispersion
# and that the other parameters are unbounded (like when using Gaussian priors)
if self.ref_family in ["gaussian"]:
# account for the dispersion parameter
bounds = [(None, None)] * (init.size - 1) + [(0, None)]
bounds = [(None, None)] * (init.size - 1) + [(eps, None)]
elif self.ref_family in ["binomial", "poisson"]:
# no dispersion parameter, so no bounds
bounds = [(None, None)] * (init.size)
Expand All @@ -140,7 +107,7 @@ def objective(
Parameters:
----------
params : array_like
The optimisation parameters mean values
The optimization parameters mean values
obs : array_like
One sample from the posterior predictive distribution of the reference model
X : array_like
Expand All @@ -153,19 +120,19 @@ def objective(

# Gaussian observation likelihood
if self.ref_family == "gaussian":
linear_predictor = self.linear_predict(beta_x=params[:-1], X=X)
linear_predictor = _linear_predict(beta_x=params[:-1], X=X)
neg_llk = self.neg_log_likelihood(points=obs, mean=linear_predictor, sigma=params[-1])

# Binomial observation likelihood
elif self.ref_family == "binomial":
trials = self.ref_model.response.data[:, 1]
linear_predictor = self.linear_predict(beta_x=params, X=X)
linear_predictor = _linear_predict(beta_x=params, X=X)
probs = self.ref_model.family.link.linkinv(linear_predictor)
neg_llk = self.neg_log_likelihood(points=obs, probs=probs, trials=trials)

# Poisson observation likelihood
elif self.ref_family == "poisson":
linear_predictor = self.linear_predict(beta_x=params, X=X)
linear_predictor = _linear_predict(beta_x=params, X=X)
lam = self.ref_model.family.link.linkinv(linear_predictor)
neg_llk = self.neg_log_likelihood(points=obs, lam=lam)
return neg_llk
Expand All @@ -191,25 +158,29 @@ def solve(self, term_names: List[str], X: np.ndarray, slices: dict) -> SubModel:
SubModel: The projected submodel object
"""

# initialise the optimisation
init = self._init_optimisation(term_names=term_names)
# use reference model posterior as initial guess
init = self._init_optimization(term_names=term_names)

# build the optimisation parameter bounds
# build the optimization parameter bounds
bounds = self._build_bounds(init)

# perform mean-field variational projection predictive inference
res_posterior = []
objectives = []
for obs in self.pps:
opt = minimize(
self.objective,
args=(obs, X),
x0=init, # use reference model posterior as initial guess
bounds=bounds, # apply bounds
method="powell",
)
res_posterior.append(opt.x)
objectives.append(opt.fun)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Values in x were outside bounds")
for obs in self.pps:
opt = minimize(
self.objective,
args=(obs, X),
x0=init,
tol=0.0001,
bounds=bounds,
# This is the fastest method and the projected posterior looks Gaussian-like
method="SLSQP",
)
res_posterior.append(opt.x)
objectives.append(opt.fun)

# compile the projected posterior
res_samples = np.vstack(res_posterior)
Expand Down Expand Up @@ -243,3 +214,38 @@ def solve(self, term_names: List[str], X: np.ndarray, slices: dict) -> SubModel:
# compute the average loss
loss = np.mean(objectives)
return posterior, loss


def _linear_predict(
beta_x: np.ndarray,
X: Optional[np.ndarray] = None,
) -> np.ndarray:
"""Predict the latent predictor of the submodel.

Parameters:
----------
beta_x : np.ndarray
The model's projected posterior
X : np.ndarray
The model's common design matrix

Returns
-------
np.ndarray: Point estimate of the latent predictor using the single draw from the
posterior and the model's design matrix
"""

linear_predictor = np.zeros(shape=(X.shape[0],))

# Contribution due to common terms
if X is not None:

if len(beta_x.shape) > 1:
raise NotImplementedError("Currently this method only works for single samples.")

# 'contribution' is of shape * (obs_n, ) for univariate
contribution = np.dot(X, beta_x.T).T
linear_predictor += contribution

# return the latent predictor
return linear_predictor
Loading
Loading