Skip to content

Commit

Permalink
fix slow tests for metalearner
Browse files Browse the repository at this point in the history
Summary:
Certain kats tests are disabled, since they are timing out and failing. This diff improves the performance of the tests by removing some of the slow features being computed.

More precisely, this diff removes `"firstmin_ac"` feature. It also propagates the params to use only selected features, such that this functionality may be reused when creating models.

Analysis of slow features was performed in N5487518.

Differential Revision: D58581652

fbshipit-source-id: ad846aad9cdd5c8b0994339ac2a3308d111bcc50
  • Loading branch information
islijepcevic authored and facebook-github-bot committed Jun 14, 2024
1 parent 635fef8 commit 56d1d47
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 33 deletions.
7 changes: 5 additions & 2 deletions kats/models/metalearner/get_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,21 @@ def tune_executor(self) -> Dict[str, Any]:

def get_meta_data(
self,
**kwargs: Any,
**tsfeatures_kwargs: Any,
) -> GetMetaDataVal:
"""Get meta data, as well as search method and type of error metric
Meta data includes time series features, best hyper-params for each candidate models, and best model.
Args:
tsfeatures_kwargs: keyword arguments for TsFeatures.
Returns:
A dictionary storing the best hyper-parameters and the errors for each candidate model, the features of the time series data, the hyper-parameter searching method,
the error metric used for model evaluation and the corresponding best model.
"""

features_dict = TsFeatures(**kwargs).transform(self.data)
features_dict = TsFeatures(**tsfeatures_kwargs).transform(self.data)

# feature contains nan, pass
# pyre-fixme[16]: `List` has no attribute `values`.
Expand Down
10 changes: 8 additions & 2 deletions kats/models/metalearner/metalearner_hpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,12 +634,18 @@ def train(
LOGGER.info(f"Early stopping! Stop at epoch {epoch + 1}.")
break

def pred(self, source_ts: TimeSeriesData, ts_scale: bool = True) -> pd.DataFrame:
def pred(
self,
source_ts: TimeSeriesData,
ts_scale: bool = True,
**tsfeatures_kwargs: Any,
) -> pd.DataFrame:
"""Predict hyper-parameters for a new time series data.
Args:
source_ts: :class:`kats.consts.TimeSeriesData` object representing the time series for which to generate hyper-parameters
ts_scale: A boolean to specify whether or not to rescale time series data (i.e., divide its value by its maximum value) before calculating its features. Default is True.
**tsfeatures_kwargs: keyword arguments for TsFeatures.
Returns:
A `pandas.DataFrame` object storing the recommended hyper-parameters.
Expand All @@ -659,7 +665,7 @@ def pred(self, source_ts: TimeSeriesData, ts_scale: bool = True) -> pd.DataFrame
LOGGER.info(msg)

self.model.eval()
new_feature = TsFeatures().transform(ts)
new_feature = TsFeatures(**tsfeatures_kwargs).transform(ts)
# pyre-fixme[16]: `List` has no attribute `values`.
new_feature_vector = np.asarray(list(new_feature.values()))

Expand Down
18 changes: 14 additions & 4 deletions kats/models/metalearner/metalearner_modelselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,14 +383,19 @@ def load_model(self, file_name: str) -> None:
raise ValueError(msg)

def pred(
self, source_ts: TimeSeriesData, ts_scale: bool = True, n_top: int = 1
self,
source_ts: TimeSeriesData,
ts_scale: bool = True,
n_top: int = 1,
**tsfeatures_kwargs: Any,
) -> Union[str, List[str]]:
"""Predict the best forecasting model for a new time series data.
Args:
source_ts: :class:`kats.consts.TimeSeriesData` object representing the new time series data.
ts_scale: Optional; A boolean to specify whether or not to rescale time series data (i.e., normalizing it with its maximum vlaue) before calculating features. Default is True.
n_top: Optional; A integer for the number of top model names to return. Default is 1.
**tsfeatures_kwargs: keyword arguments for TsFeatures.
Returns:
A string or a list of strings of the names of forecasting models.
Expand All @@ -408,7 +413,7 @@ def pred(
msg = "Successful scaled! Each value of TS has been divided by the max value of TS."
logging.info(msg)

new_features = TsFeatures().transform(ts)
new_features = TsFeatures(**tsfeatures_kwargs).transform(ts)
# pyre-fixme[16]: `List` has no attribute `values`.
new_features_vector = np.asarray(list(new_features.values()))
if np.any(np.isnan(new_features_vector)):
Expand Down Expand Up @@ -467,7 +472,11 @@ def _bootstrap(self, data: np.ndarray, rep: int = 200) -> float:
return pvalue

def pred_fuzzy(
self, source_ts: TimeSeriesData, ts_scale: bool = True, sig_level: float = 0.2
self,
source_ts: TimeSeriesData,
ts_scale: bool = True,
sig_level: float = 0.2,
**tsfeatures_kwargs: Any,
) -> Dict[str, Any]:
"""Predict a forecasting model for a new time series data using fuzzy method.
Expand All @@ -479,6 +488,7 @@ def pred_fuzzy(
ts_scale: Optional; A boolean to specify whether or not to rescale time series data (i.e., normalizing it with its maximum vlaue) before calculating features. Default is True.
sig_level: Optional; A float representing the significance level for bootstrap test. If pvalue>=sig_level, then we deem there is no difference between the best and the second best model.
Default is 0.2.
**tsfeatures_kwargs: keyword arguments for TsFeatures.
Returns:
A dictionary of prediction results, including forecasting models, their probability of being th best forecasting models and the pvalues of bootstrap tests.
Expand All @@ -489,7 +499,7 @@ def pred_fuzzy(
# scale time series to make ts features more stable
ts.value /= ts.value.max()
# pyre-fixme[16]: `List` has no attribute `values`.
test = np.asarray(list(TsFeatures().transform(ts).values()))
test = np.asarray(list(TsFeatures(**tsfeatures_kwargs).transform(ts).values()))
test[np.isnan(test)] = 0.0
if self.scale:
test = (test - self.x_mean) / self.x_std
Expand Down
10 changes: 8 additions & 2 deletions kats/models/metalearner/metalearner_predictability.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,18 @@ def train(
self._clf_threshold = clf_threshold
return ans

def pred(self, source_ts: TimeSeriesData, ts_rescale: bool = True) -> bool:
def pred(
self,
source_ts: TimeSeriesData,
ts_rescale: bool = True,
**tsfeatures_kwargs: Any,
) -> bool:
"""Predict whether a time series is predicable or not.
Args:
source_ts: :class:`kats.consts.TimeSeriesData` object representing the new time series data.
ts_scale: Optional; A boolean to specify whether or not to rescale time series data (i.e., normalizing it with its maximum vlaue) before calculating features. Default is True.
**tsfeatures_kwargs: keyword arguments for TsFeatures.
Returns:
A boolean representing whether the time series is predictable or not.
Expand All @@ -283,7 +289,7 @@ def pred(self, source_ts: TimeSeriesData, ts_rescale: bool = True) -> bool:
ts.value /= ts.value.max()
msg = "Successful scaled! Each value of TS has been divided by the max value of TS."
logging.info(msg)
features = TsFeatures().transform(ts)
features = TsFeatures(**tsfeatures_kwargs).transform(ts)
# pyre-fixme[16]: `List` has no attribute `values`.
x = np.array(list(features.values()))
if np.sum(np.isnan(x)) > 0:
Expand Down
54 changes: 35 additions & 19 deletions kats/tests/models/test_metalearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import collections
import logging
import random
from typing import Dict, List
from unittest import TestCase
from unittest.mock import patch

Expand All @@ -32,6 +33,7 @@
METALEARNING_TEST_METADATA_STR,
METALEARNING_TEST_MULTI,
METALEARNING_TEST_T1,
METALEARNING_TEST_T1_FEATURES,
METALEARNING_TEST_T2,
)

Expand Down Expand Up @@ -83,8 +85,9 @@

t1 = TimeSeriesData(METALEARNING_TEST_T1)
t2 = TimeSeriesData(METALEARNING_TEST_T2)
# pyre-fixme[5]: Global expression must be annotated.
feature = np.array(METALEARNING_TEST_FEATURES)
feature: np.ndarray = np.array(METALEARNING_TEST_FEATURES)
num_features: int = feature.shape[1]
feature_names: List[str] = list(METALEARNING_TEST_T1_FEATURES.keys())


# pyre-fixme[3]: Return type must be annotated.
Expand All @@ -93,19 +96,19 @@ def generate_meta_data(n):
# generate meta data to initialize MetaLearnModelSelect
np.random.seed(560)
random.seed(560)
spaces = {m: base_models[m].get_parameter_search_space() for m in base_models}
spaces = {m: model.get_parameter_search_space() for m, model in base_models.items()}

m = len(base_models)
res = np.abs(np.random.uniform(0, 1.0, n * m)).reshape(n, -1)
features = np.random.randn(n * 40).reshape(n, -1)
features = np.random.randn(n * num_features).reshape(n, -1)
generators = {
m: Models.UNIFORM(
SearchSpace(
[InstantiationBase.parameter_from_json(item) for item in spaces[m]]
[InstantiationBase.parameter_from_json(item) for item in space]
),
deduplicate=False,
)
for m in spaces
for m, space in spaces.items()
}
models = list(base_models.keys())
ans = []
Expand All @@ -119,15 +122,15 @@ def generate_meta_data(n):
{
"hpt_res": hpt,
"best_model": np.random.choice(models),
"features": {str(k): features[i, k] for k in range(features.shape[1])},
"features": {str(k): features[i, k] for k in range(num_features)},
}
)
return ans


# pyre-fixme[3]: Return type must be annotated.
# pyre-fixme[2]: Parameter must be annotated.
def generate_meta_data_by_model(model, n, d=40):
def generate_meta_data_by_model(model, n, d=num_features):
random.seed(560)
np.random.seed(560)
model = model.lower()
Expand Down Expand Up @@ -179,6 +182,10 @@ def generate_meta_data_by_model(model, n, d=40):
"sarima": SARIMAParams,
}

tsfeatures_params: Dict[str, List[str]] = {
"selected_features": feature_names,
}


# pyre-fixme[3]: Return type must be annotated.
# pyre-fixme[2]: Parameter must be annotated.
Expand Down Expand Up @@ -210,7 +217,7 @@ def test_get_meta_data(self) -> None:
all_models=candidate_models,
all_params=candidate_params,
)
res = metadata.get_meta_data()
res = metadata.get_meta_data(**tsfeatures_params)

# test meta data output
self.assertEqual(
Expand Down Expand Up @@ -298,15 +305,19 @@ def test_model(self) -> None:
mlms.train(method="RandomForest")
# Test prediction consistency
t2_df = t2.to_dataframe().copy()
pred = mlms.pred(t2)
pred_fuzzy = mlms.pred_fuzzy(t2)
pred_all = mlms.pred(t2, n_top=2)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
pred = mlms.pred(t2, **tsfeatures_params)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
pred_fuzzy = mlms.pred_fuzzy(t2, **tsfeatures_params)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
pred_all = mlms.pred(t2, n_top=2, **tsfeatures_params)
if pred != pred_fuzzy["label"][0] or pred != pred_all[0]:
msg = f"Prediction is not consistent! Results are: self.pred: {pred}, self.pred_fuzzy: {pred_fuzzy}, self.pred(, n_top=2): {pred_all}"
logging.error(msg)
raise ValueError(msg)
# Test case for time series with nan features
_ = mlms.pred(t1)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
_ = mlms.pred(t1, **tsfeatures_params)
# Test pred_by_feature and its consistency

feature2 = feature.copy()
Expand Down Expand Up @@ -387,7 +398,8 @@ def test_model(self) -> None:
mlp.train()

# Test case for time series with nan features
ts_pred = mlp.pred(t1)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
ts_pred = mlp.pred(t1, **tsfeatures_params)
self.assertTrue(
isinstance(ts_pred, bool),
f"The output of MetaLearnPredictability should be a boolean but receives {type(ts_pred)}.",
Expand All @@ -399,7 +411,8 @@ def test_model(self) -> None:
)

t2_df = t2.to_dataframe().copy()
mlp.pred(t2)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
mlp.pred(t2, **tsfeatures_params)
feature2 = feature.copy()
mlp.pred_by_feature(feature)
# Test if the target TimeSeriesData keeps its original value
Expand Down Expand Up @@ -457,8 +470,10 @@ def test_default_models(self) -> None:
mlhpt.build_network()
mlhpt.train()
# Test case for time series with nan features
_ = (mlhpt.pred(t1).parameters[0],)
_ = mlhpt.pred(t2)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
_ = (mlhpt.pred(t1, **tsfeatures_params).parameters[0],)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
_ = mlhpt.pred(t2, **tsfeatures_params)
mlhpt.pred_by_feature(feature1)
mlhpt.pred_by_feature(feature2)
mlhpt.pred_by_feature(feature3)
Expand Down Expand Up @@ -487,6 +502,7 @@ def test_customized_models(self) -> None:
# Test customized model
mlhpt = MetaLearnHPT(x, y, ["p"], ["d", "q"])
self.assertRaises(ValueError, mlhpt.build_network)
mlhpt.build_network([40], [[5]], [10, 20])
mlhpt.build_network([num_features], [[5]], [10, 20])
mlhpt.train()
mlhpt.pred(t2)
# pyre-ignore[6]: tsfeatures_params is not a positional arg
mlhpt.pred(t2, **tsfeatures_params)
4 changes: 0 additions & 4 deletions kats/tests/models/test_models_dummy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4696,7 +4696,6 @@
"diff2y_pacf5": 1.8304505333773173,
"seas_acf1": 0.16081850213286256,
"seas_pacf1": 0.22189491849213927,
"firstmin_ac": 1,
"firstzero_ac": 3,
"holt_alpha": 0.21052631578947367,
"holt_beta": 0.21052631578947367,
Expand Down Expand Up @@ -4808,7 +4807,6 @@
"diff2y_pacf5": 1.2927400320509825,
"seas_acf1": 0.00474904222956029,
"seas_pacf1": 0.015773034095832947,
"firstmin_ac": 1,
"firstzero_ac": 4,
"holt_alpha": 0.20073207666635207,
"holt_beta": 0.20073038111605043,
Expand Down Expand Up @@ -4853,7 +4851,6 @@
0.21705932,
1.36665286,
-0.59686006,
0.94514967,
-0.11579573,
0.43509245,
0.92586754,
Expand Down Expand Up @@ -4895,7 +4892,6 @@
-0.96291282,
-0.89634859,
0.47413525,
1.66061776,
0.41800094,
-0.50182222,
1.00137551,
Expand Down

0 comments on commit 56d1d47

Please sign in to comment.