diff --git a/pyproject.toml b/pyproject.toml index 6370e95..7678fcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "rolch" -version = "0.1.6" +version = "0.1.7" authors = [ {name="Simon Hirsch", email="simon.hirsch@stud.uni-due.de"}, {name="Jonathan Berrisch", email="jonathan.berrisch@uni-due.de"}, diff --git a/src/rolch/online_gamlss.py b/src/rolch/online_gamlss.py index 06ece7a..aac9638 100644 --- a/src/rolch/online_gamlss.py +++ b/src/rolch/online_gamlss.py @@ -8,10 +8,16 @@ online_coordinate_descent, online_coordinate_descent_path, ) -from rolch.gram import init_gram, init_inverted_gram, init_y_gram, update_inverted_gram +from rolch.gram import ( + init_gram, + init_inverted_gram, + init_y_gram, + update_inverted_gram, + init_forget_vector, +) from rolch.information_criteria import select_best_model_by_information_criterion from rolch.scaler import OnlineScaler -from rolch.utils import calculate_effective_training_length +from rolch.utils import calculate_effective_training_length, online_mean_update class OnlineGamlss: @@ -135,6 +141,9 @@ def fit_beta( iteration_inner, param, ): + + f = init_forget_vector(self.forget, self.n_obs) + if self.method == "ols": lambda_max = None lambda_path = None @@ -143,10 +152,7 @@ def fit_beta( beta = (x_gram @ y_gram).flatten() residuals = y - X @ beta.T - if self.method == "ols" or self.intercept_only[param]: - rss = np.sum(residuals**2 * w) / np.mean(w) - else: - rss = np.sum(residuals**2 * w[:, None], axis=0) / np.mean(w) + rss = np.sum(residuals**2 * w * f) / np.mean(w * f) elif (self.method == "lasso") & self.intercept_only[param]: lambda_max = None @@ -167,10 +173,7 @@ def fit_beta( )[0] residuals = y - X @ beta.T - if self.method == "ols" or self.intercept_only[param]: - rss = np.sum(residuals**2 * w, axis=0) / np.mean(w) - elif self.method == "lasso": - rss = np.sum(residuals**2 * w[:, None], axis=0) / np.mean(w) + rss = np.sum(residuals**2 * w * f, axis=0) / np.mean(w * f) elif self.method == "lasso": intercept = ( @@ -197,10 +200,9 @@ def fit_beta( residuals = y[:, None] - X @ beta_path.T - if self.method == "ols" or self.intercept_only[param]: - rss = np.sum(residuals**2 * w, axis=0) / np.mean(w) - elif self.method == "lasso": - rss = np.sum(residuals**2 * w[:, None], axis=0) / np.mean(w) + rss = np.sum(residuals**2 * w[:, None] * f[:, None], axis=0) / np.mean( + w * f + ) model_params_n = np.sum(~np.isclose(beta_path, 0), axis=1) best_ic = select_best_model_by_information_criterion( @@ -228,6 +230,11 @@ def update_beta( iteration_inner, param, ): + + denom = online_mean_update( + self.mean_of_weights[param], w, self.forget, self.n_obs + ) + if self.method == "ols": # Not relevant for OLS lambda_max = None @@ -240,7 +247,7 @@ def update_beta( rss = ( (residuals**2).flatten() * w + (1 - self.forget) * (self.rss[param] * self.mean_of_weights[param]) - ) / (self.mean_of_weights[param] * (1 - self.forget) + w) + ) / denom elif (self.method == "lasso") & self.intercept_only[param]: lambda_max = None @@ -264,7 +271,7 @@ def update_beta( rss = ( (residuals**2).flatten() * w + (1 - self.forget) * (self.rss[param] * self.mean_of_weights[param]) - ) / (self.mean_of_weights[param] * (1 - self.forget) + w) + ) / denom elif self.method == "lasso": intercept = ( @@ -293,7 +300,7 @@ def update_beta( rss = ( (residuals**2).flatten() * w + (1 - self.forget) * (self.rss[param] * self.mean_of_weights[param]) - ) / (self.mean_of_weights[param] * (1 - self.forget) + w) + ) / denom model_params_n = np.sum(np.isclose(beta_path, 0), axis=1) best_ic = select_best_model_by_information_criterion( diff --git a/src/rolch/utils.py b/src/rolch/utils.py index b235b6a..6c92225 100644 --- a/src/rolch/utils.py +++ b/src/rolch/utils.py @@ -1,4 +1,5 @@ import sys +import numpy as np def calculate_asymptotic_training_length(forget: float): @@ -14,3 +15,21 @@ def calculate_effective_training_length(forget: float, n_obs: int): return n_obs else: return (1 - (1 - forget) ** n_obs) / forget + + +def online_mean_update(avg: float, value: float, forget: float, n_seen: int): + + n_asymmptotic = calculate_asymptotic_training_length(forget) + n_eff = calculate_effective_training_length(forget, n_seen) + + forget_scaled = forget * np.maximum(n_asymmptotic / n_eff, 1.0) + + diff = value - avg + incr = forget_scaled * diff + + if forget_scaled > 0: + new_avg = avg + incr + else: + new_avg = avg + diff / n_seen + + return new_avg