From dae5202906cf5a8a4722957ed9da1a815fd88eea Mon Sep 17 00:00:00 2001 From: Osvaldo A Martin Date: Thu, 21 Mar 2024 16:46:34 -0300 Subject: [PATCH] Use preliz (#53) * use preliz * update req * arviz req * fix linter * new version --- kulprit/projection/likelihood.py | 116 ------------------------------- kulprit/projection/solver.py | 42 ++++------- pyproject.toml | 2 + tests/test_likelihood.py | 71 ------------------- 4 files changed, 16 insertions(+), 215 deletions(-) delete mode 100644 kulprit/projection/likelihood.py delete mode 100644 tests/test_likelihood.py diff --git a/kulprit/projection/likelihood.py b/kulprit/projection/likelihood.py deleted file mode 100644 index ef010f5..0000000 --- a/kulprit/projection/likelihood.py +++ /dev/null @@ -1,116 +0,0 @@ -# pylint: disable=consider-using-generator -import math -import numpy as np - -import numba as nb - - -LOOKUP_TABLE = np.array( - [ - 0.0, - 0.0, - 0.69314718, - 1.79175947, - 3.17805383, - 4.78749174, - 6.57925121, - 8.52516136, - 10.6046029, - 12.80182748, - 15.10441257, - 17.50230785, - 19.9872145, - 22.55216385, - 25.19122118, - 27.89927138, - 30.67186011, - 33.50507345, - 36.39544521, - 39.33988419, - 42.33561646, - ] -) - - -@nb.njit -def log_factorial(n): - if n > 20: - return math.lgamma(n + 1) # inexact but fast computation of the factorial - return LOOKUP_TABLE[n] - - -@nb.njit -def log_binom_coeff(n, k): - return log_factorial(n) - log_factorial(k) + log_factorial(n - k) - - -@nb.njit -def gaussian_log_pdf(y, mean, sigma): - return -np.log(sigma) - 0.5 * np.log(2 * np.pi) - 0.5 * ((y - mean) / sigma) ** 2 - - -@nb.njit -def gaussian_neg_llk(points, mean, sigma): - llk = 0 - for y, m in zip(points, mean): - llk -= gaussian_log_pdf(y, m, sigma) - return llk - - -@nb.njit -def binomial_log_pdf(y, prob, trials): - if prob == 0 or prob == 1 or y > trials: - return -np.inf - else: - return log_binom_coeff(trials, y) + y * np.log(prob) + (trials - y) * np.log(1 - prob) - - -@nb.njit -def binomial_neg_llk(points, probs, trials): - llk = 0 - for y, p, t in zip(points, probs, trials): - llk -= binomial_log_pdf(y, p, t) - return llk - - -@nb.njit -def bernoulli_log_pdf(y, prob): - if prob in (0, 1): - return -np.inf - else: - return y * np.log(prob) + (1 - y) * np.log(1 - prob) - - -@nb.njit -def bernoulli_neg_llk(points, probs): - llk = 0 - for ( - y, - p, - ) in zip(points, probs): - llk -= bernoulli_log_pdf(y, p) - return llk - - -@nb.njit -def poisson_log_pdf(y, lam): - if lam == 0: - return -np.inf - else: - return y * np.log(lam) - lam - log_factorial(y) - - -@nb.njit -def poisson_neg_llk(points, lam): - llk = 0 - for y, l in zip(points, lam): - llk -= poisson_log_pdf(y, l) - return llk - - -LIKELIHOODS = { - "gaussian": gaussian_neg_llk, - "binomial": binomial_neg_llk, - "bernoulli": bernoulli_neg_llk, - "poisson": poisson_neg_llk, -} diff --git a/kulprit/projection/solver.py b/kulprit/projection/solver.py index b30af3d..295a412 100644 --- a/kulprit/projection/solver.py +++ b/kulprit/projection/solver.py @@ -1,5 +1,6 @@ """optimization module.""" +# pylint: disable=protected-access from typing import List, Optional import warnings @@ -7,11 +8,11 @@ import bambi as bmb import numpy as np import xarray as xr +import preliz as pz from scipy.optimize import minimize from kulprit.data.submodel import SubModel -from kulprit.projection.likelihood import LIKELIHOODS class Solver: @@ -36,11 +37,8 @@ def __init__( self.num_chain = self.ref_idata.posterior.dims["chain"] self.num_samples = self.num_chain * 100 - try: - # define the negative log likelihood function of the submodel - self.neg_log_likelihood = LIKELIHOODS[self.ref_family] - except KeyError: - raise NotImplementedError from None + if self.ref_family not in ["gaussian", "poisson", "bernoulli", "binomial"]: + raise NotImplementedError(f"Family {self.ref_family} not supported") @property def pps(self): @@ -115,20 +113,20 @@ def objective( # Gaussian observation likelihood if self.ref_family == "gaussian": linear_predictor = _linear_predict(beta_x=params[:-1], X=X) - neg_llk = self.neg_log_likelihood(points=obs, mean=linear_predictor, sigma=params[-1]) + neg_llk = pz.Normal(mu=linear_predictor, sigma=params[-1])._neg_logpdf(obs) # Binomial observation likelihood elif self.ref_family == "binomial": trials = self.ref_model.response.data[:, 1] linear_predictor = _linear_predict(beta_x=params, X=X) probs = self.ref_model.family.link["p"].linkinv(linear_predictor) - neg_llk = self.neg_log_likelihood(points=obs, probs=probs, trials=trials) + neg_llk = pz.Binomial(n=trials, p=probs)._neg_logpdf(obs) # Bernoulli observation likelihood elif self.ref_family == "bernoulli": linear_predictor = _linear_predict(beta_x=params, X=X) probs = self.ref_model.family.link["p"].linkinv(linear_predictor) - neg_llk = self.neg_log_likelihood(points=obs, probs=probs) + neg_llk = pz.Binomial(p=probs)._neg_logpdf(obs) # Poisson observation likelihood elif self.ref_family == "poisson": @@ -136,7 +134,8 @@ def objective( warnings.filterwarnings("ignore", message="overflow encountered in exp") linear_predictor = _linear_predict(beta_x=params, X=X) lam = self.ref_model.family.link["mu"].linkinv(np.float64(linear_predictor)) - neg_llk = self.neg_log_likelihood(points=obs, lam=lam) + neg_llk = pz.Poisson(mu=lam)._neg_logpdf(obs) + return neg_llk def solve(self, term_names: List[str], X: np.ndarray, slices: dict) -> SubModel: @@ -160,34 +159,21 @@ def solve(self, term_names: List[str], X: np.ndarray, slices: dict) -> SubModel: SubModel: The projected submodel object """ # build the optimization parameter bounds - init = np.hstack( - [self.ref_idata.posterior.mean(["chain", "draw"])[term].values for term in term_names] - ) - bounds = self._build_bounds(init) + term_values = az.extract(self.ref_idata.posterior, num_samples=self.pps.shape[0]) + init = np.stack([term_values[key].values.flatten() for key in term_names]).T + bounds = self._build_bounds(init[0]) - # perform mean-field variational projection predictive inference + # perform projection predictive inference res_posterior = [] objectives = [] - chains = np.random.randint(0, self.ref_idata.posterior.dims["chain"], self.pps.shape[0]) - draws = np.random.randint(0, self.ref_idata.posterior.dims["draw"], self.pps.shape[0]) - with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="Values in x were outside bounds") for idx, obs in enumerate(self.pps): - # use samples from reference model posterior as initial guess - init = np.hstack( - [ - self.ref_idata.posterior.sel({"chain": chains[idx], "draw": draws[idx]})[ - term - ].values - for term in term_names - ] - ) opt = minimize( self.objective, args=(obs, X), - x0=init, + x0=init[idx], tol=0.001, bounds=bounds, method="SLSQP", diff --git a/pyproject.toml b/pyproject.toml index 3c2ca69..b2cadfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,9 +24,11 @@ classifiers = [ dynamic = ["version"] description = "Kullback-Leibler projections for Bayesian model selection." dependencies = [ + "arviz>=0.17.1", "bambi>=0.12.0", "scikit-learn>=1.0.2", "numba>=0.56.0", + "preliz>=0.4.1" ] [tool.flit.module] diff --git a/tests/test_likelihood.py b/tests/test_likelihood.py deleted file mode 100644 index ad7aa35..0000000 --- a/tests/test_likelihood.py +++ /dev/null @@ -1,71 +0,0 @@ -# pylint: disable=:no-self-use -import numpy as np - -from scipy import stats - -import pytest - -from kulprit.projection.likelihood import ( - gaussian_neg_llk, - binomial_neg_llk, - bernoulli_neg_llk, - poisson_neg_llk, -) - - -class TestLikelihood: - """Test the likelihood mehtods implemented.""" - - def test_gaussian_likelihood(self): - # produce random samples - data = np.random.random(10) - - # define the parameters of the Gaussian - mus = np.random.random(10) - sigma = 1.0 - - # compute the log likelihood using scipy - scipy_llk = stats.norm(mus, sigma).logpdf(data).sum() - - # test that kulprit produces similar results - assert -gaussian_neg_llk(data, mus, sigma) == pytest.approx(scipy_llk) - - def test_binomial_likelihood(self): - # produce random samples - data = np.random.randint(11, size=10) - - # define the parameters of the Binomial - ns_ = np.random.randint(11, size=10) - probs = np.random.uniform(low=0, high=1, size=10) - - # compute the log likelihood using scipy - scipy_llk = stats.binom(ns_, probs).logpmf(data).sum() - - # test that kulprit produces similar results - assert -binomial_neg_llk(data, probs, ns_) == pytest.approx(scipy_llk) - - def test_bernoulli_likelihood(self): - # produce random samples - data = np.random.randint(2, size=10) - - # define the parameters of the Bernoulli - probs = np.random.uniform(low=0, high=1, size=10) - - # compute the log likelihood using scipy - scipy_llk = stats.bernoulli(probs).logpmf(data).sum() - - # test that kulprit produces similar results - assert -bernoulli_neg_llk(data, probs) == pytest.approx(scipy_llk) - - def test_poisson_likelihood(self): - # produce random samples - data = np.random.randint(11, size=10) - - # define the parameters of the Poisson - lambdas = np.random.randint(11, size=10) - - # compute the log likelihood using scipy - scipy_llk = stats.poisson(lambdas).logpmf(data).sum() - - # test that kulprit produces similar results - assert -poisson_neg_llk(data, lambdas) == pytest.approx(scipy_llk)