Skip to content

Commit

Permalink
Merge branch 'main' into fix_ext_rev_features_chunksize
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-braun authored Jul 14, 2024
2 parents 2d3488d + 1a71969 commit 893033c
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 107 deletions.
10 changes: 5 additions & 5 deletions tests/units/feature_extraction/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def test_f(chunk):
)

def test_with_wrong_input(self):
test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.NaN}])
test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.nan}])
self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort")

test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": 1}])
Expand Down Expand Up @@ -489,10 +489,10 @@ def test_with_wrong_input(self):
ValueError, to_tsdata, test_df, "id", "strange_kind", "value", "sort"
)

test_df = pd.DataFrame([{"id": np.NaN, "kind": "a", "value": 3, "sort": 1}])
test_df = pd.DataFrame([{"id": np.nan, "kind": "a", "value": 3, "sort": 1}])
self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort")

test_df = pd.DataFrame([{"id": 0, "kind": np.NaN, "value": 3, "sort": 1}])
test_df = pd.DataFrame([{"id": 0, "kind": np.nan, "value": 3, "sort": 1}])
self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort")

test_df = pd.DataFrame([{"id": 2}, {"id": 1}])
Expand All @@ -518,10 +518,10 @@ def test_with_wrong_input(self):
# If there are more than one column, the algorithm can not choose the correct column
self.assertRaises(ValueError, to_tsdata, test_dict, "id", None, None, None)

test_df = pd.DataFrame([{"id": 0, "value": np.NaN}])
test_df = pd.DataFrame([{"id": 0, "value": np.nan}])
self.assertRaises(ValueError, to_tsdata, test_df, "id", None, "value", None)

test_df = pd.DataFrame([{"id": 0, "value": np.NaN}])
test_df = pd.DataFrame([{"id": 0, "value": np.nan}])
self.assertRaises(ValueError, to_tsdata, test_df, None, None, "value", None)

test_df = pd.DataFrame([{"id": 0, "a_": 3, "b": 5, "sort": 1}])
Expand Down
62 changes: 29 additions & 33 deletions tests/units/feature_extraction/test_feature_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def tearDown(self):
warnings.resetwarnings()

def assertIsNaN(self, result):
self.assertTrue(np.isnan(result), msg="{} is not np.NaN")
self.assertTrue(np.isnan(result), msg="{} is not np.nan")

def assertEqualOnAllArrayTypes(self, f, input_to_f, result, *args, **kwargs):
expected_result = f(input_to_f, *args, **kwargs)
Expand Down Expand Up @@ -575,7 +575,7 @@ def test_length(self):
self.assertEqualOnAllArrayTypes(length, [1, 2, 3, 4], 4)
self.assertEqualOnAllArrayTypes(length, [1, 2, 3], 3)
self.assertEqualOnAllArrayTypes(length, [1, 2], 2)
self.assertEqualOnAllArrayTypes(length, [1, 2, 3, np.NaN], 4)
self.assertEqualOnAllArrayTypes(length, [1, 2, 3, np.nan], 4)
self.assertEqualOnAllArrayTypes(length, [], 0)

def test_standard_deviation(self):
Expand Down Expand Up @@ -1180,26 +1180,26 @@ def test_binned_entropy(self):
self.assertAlmostEqualOnAllArrayTypes(
binned_entropy,
[10] * 10 + [1],
-(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)),
-(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)),
10,
)
self.assertAlmostEqualOnAllArrayTypes(
binned_entropy,
[10] * 10 + [1],
-(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)),
-(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)),
10,
)
self.assertAlmostEqualOnAllArrayTypes(
binned_entropy,
[10] * 10 + [1],
-(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)),
-(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)),
100,
)
self.assertAlmostEqualOnAllArrayTypes(
binned_entropy, list(range(10)), -np.math.log(1 / 10), 100
binned_entropy, list(range(10)), -math.log(1 / 10), 100
)
self.assertAlmostEqualOnAllArrayTypes(
binned_entropy, list(range(100)), -np.math.log(1 / 2), 2
binned_entropy, list(range(100)), -math.log(1 / 2), 2
)

def test_sample_entropy(self):
Expand Down Expand Up @@ -1512,24 +1512,22 @@ def test_value_count(self):
self.assertEqualPandasSeriesWrapper(value_count, [1] * 10, 10, value=1)
self.assertEqualPandasSeriesWrapper(value_count, list(range(10)), 1, value=0)
self.assertEqualPandasSeriesWrapper(value_count, [1] * 10, 0, value=0)
self.assertEqualPandasSeriesWrapper(value_count, [np.NaN, 0, 1] * 3, 3, value=0)
self.assertEqualPandasSeriesWrapper(value_count, [np.nan, 0, 1] * 3, 3, value=0)
self.assertEqualPandasSeriesWrapper(
value_count, [np.NINF, 0, 1] * 3, 3, value=0
)
self.assertEqualPandasSeriesWrapper(
value_count, [np.PINF, 0, 1] * 3, 3, value=0
value_count, [-np.inf, 0, 1] * 3, 3, value=0
)
self.assertEqualPandasSeriesWrapper(value_count, [np.inf, 0, 1] * 3, 3, value=0)
self.assertEqualPandasSeriesWrapper(
value_count, [0.1, 0.2, 0.3] * 3, 3, value=0.2
)
self.assertEqualPandasSeriesWrapper(
value_count, [np.NaN, 0, 1] * 3, 3, value=np.NaN
value_count, [np.nan, 0, 1] * 3, 3, value=np.nan
)
self.assertEqualPandasSeriesWrapper(
value_count, [np.NINF, 0, 1] * 3, 3, value=np.NINF
value_count, [-np.inf, 0, 1] * 3, 3, value=-np.inf
)
self.assertEqualPandasSeriesWrapper(
value_count, [np.PINF, 0, 1] * 3, 3, value=np.PINF
value_count, [np.inf, 0, 1] * 3, 3, value=np.inf
)

def test_range_count(self):
Expand All @@ -1546,7 +1544,7 @@ def test_range_count(self):
range_count, list(range(0, -10, -1)), 9, min=-10, max=0
)
self.assertEqualPandasSeriesWrapper(
range_count, [np.NaN, np.PINF, np.NINF] + list(range(10)), 10, min=0, max=10
range_count, [np.nan, np.inf, -np.inf] + list(range(10)), 10, min=0, max=10
)

def test_approximate_entropy(self):
Expand Down Expand Up @@ -1685,7 +1683,7 @@ def test__aggregate_on_chunks(self):
)
self.assertListEqual(
_aggregate_on_chunks(
x=pd.Series([0, 1, 2, np.NaN, 5]), f_agg="median", chunk_len=2
x=pd.Series([0, 1, 2, np.nan, 5]), f_agg="median", chunk_len=2
),
[0.5, 2, 5],
)
Expand Down Expand Up @@ -1728,7 +1726,7 @@ def test_agg_linear_trend(self):
self.assertAlmostEqual(res['attr_"intercept"__chunk_len_3__f_agg_"median"'], 1)
self.assertAlmostEqual(res['attr_"slope"__chunk_len_3__f_agg_"median"'], 3)

x = pd.Series([np.NaN, np.NaN, np.NaN, -3, -3, -3])
x = pd.Series([np.nan, np.nan, np.nan, -3, -3, -3])
res = agg_linear_trend(x=x, param=param)

res = pd.Series(dict(res))
Expand All @@ -1742,7 +1740,7 @@ def test_agg_linear_trend(self):
self.assertIsNaN(res['attr_"intercept"__chunk_len_3__f_agg_"median"'])
self.assertIsNaN(res['attr_"slope"__chunk_len_3__f_agg_"median"'])

x = pd.Series([np.NaN, np.NaN, -3, -3, -3, -3])
x = pd.Series([np.nan, np.nan, -3, -3, -3, -3])
res = agg_linear_trend(x=x, param=param)

res = pd.Series(dict(res))
Expand Down Expand Up @@ -1946,19 +1944,19 @@ def test_count_above(self):
self.assertEqualPandasSeriesWrapper(
count_above, [0.1, 0.2, 0.3] * 3, 2 / 3, t=0.2
)
self.assertEqualPandasSeriesWrapper(count_above, [np.NaN, 0, 1] * 3, 2 / 3, t=0)
self.assertEqualPandasSeriesWrapper(count_above, [np.nan, 0, 1] * 3, 2 / 3, t=0)
self.assertEqualPandasSeriesWrapper(
count_above, [np.NINF, 0, 1] * 3, 2 / 3, t=0
count_above, [-np.inf, 0, 1] * 3, 2 / 3, t=0
)
self.assertEqualPandasSeriesWrapper(count_above, [np.PINF, 0, 1] * 3, 1, t=0)
self.assertEqualPandasSeriesWrapper(count_above, [np.inf, 0, 1] * 3, 1, t=0)
self.assertEqualPandasSeriesWrapper(
count_above, [np.NaN, 0, 1] * 3, 0, t=np.NaN
count_above, [np.nan, 0, 1] * 3, 0, t=np.nan
)
self.assertEqualPandasSeriesWrapper(
count_above, [np.NINF, 0, np.PINF] * 3, 1, t=np.NINF
count_above, [-np.inf, 0, np.inf] * 3, 1, t=-np.inf
)
self.assertEqualPandasSeriesWrapper(
count_above, [np.PINF, 0, 1] * 3, 1 / 3, t=np.PINF
count_above, [np.inf, 0, 1] * 3, 1 / 3, t=np.inf
)

def test_count_below(self):
Expand All @@ -1968,21 +1966,19 @@ def test_count_below(self):
self.assertEqualPandasSeriesWrapper(
count_below, [0.1, 0.2, 0.3] * 3, 2 / 3, t=0.2
)
self.assertEqualPandasSeriesWrapper(count_below, [np.NaN, 0, 1] * 3, 1 / 3, t=0)
self.assertEqualPandasSeriesWrapper(
count_below, [np.NINF, 0, 1] * 3, 2 / 3, t=0
)
self.assertEqualPandasSeriesWrapper(count_below, [np.nan, 0, 1] * 3, 1 / 3, t=0)
self.assertEqualPandasSeriesWrapper(
count_below, [np.PINF, 0, 1] * 3, 1 / 3, t=0
count_below, [-np.inf, 0, 1] * 3, 2 / 3, t=0
)
self.assertEqualPandasSeriesWrapper(count_below, [np.inf, 0, 1] * 3, 1 / 3, t=0)
self.assertEqualPandasSeriesWrapper(
count_below, [np.NaN, 0, 1] * 3, 0, t=np.NaN
count_below, [np.nan, 0, 1] * 3, 0, t=np.nan
)
self.assertEqualPandasSeriesWrapper(
count_below, [np.NINF, 0, np.PINF] * 3, 1 / 3, t=np.NINF
count_below, [-np.inf, 0, np.inf] * 3, 1 / 3, t=-np.inf
)
self.assertEqualPandasSeriesWrapper(
count_below, [np.PINF, 0, 1] * 3, 1, t=np.PINF
count_below, [np.inf, 0, 1] * 3, 1, t=np.inf
)

def test_benford_correlation(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/units/feature_extraction/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_from_column_correct_for_selected_columns(self):

self.assertEqual(
kind_to_fc_parameters[tsn]["value_count"],
[{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}],
[{"value": np.inf}, {"value": -np.inf}, {"value": np.nan}],
)

def test_from_column_correct_for_comprehensive_fc_parameters(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/units/feature_selection/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

@pytest.fixture()
def binary_series_with_nan():
return pd.Series([np.NaN, 1, 1])
return pd.Series([np.nan, 1, 1])


@pytest.fixture()
def real_series_with_nan():
return pd.Series([np.NaN, 1, 2])
return pd.Series([np.nan, 1, 2])


@pytest.fixture()
Expand Down
24 changes: 12 additions & 12 deletions tests/units/transformers/test_per_column_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def test_only_nans_and_infs(self):
X = pd.DataFrame(index=list(range(100)))

X["NaNs"] = np.nan * np.ones(100)
X["PINF"] = np.PINF * np.ones(100)
X["NINF"] = np.NINF * np.ones(100)
X["PINF"] = np.inf * np.ones(100)
X["NINF"] = -np.inf * np.ones(100)

with warnings.catch_warnings(record=True) as w:
imputer.fit(X)
Expand All @@ -53,8 +53,8 @@ def test_with_numpy_array(self):
X = pd.DataFrame(index=list(range(100)))

X["NaNs"] = np.nan * np.ones(100)
X["PINF"] = np.PINF * np.ones(100)
X["NINF"] = np.NINF * np.ones(100)
X["PINF"] = np.inf * np.ones(100)
X["NINF"] = -np.inf * np.ones(100)

X_numpy = X.values.copy()

Expand Down Expand Up @@ -87,7 +87,7 @@ def test_with_numpy_array(self):
def test_standard_replacement_behavior(self):
imputer = PerColumnImputer()

data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
X = pd.DataFrame({"a": data})
true_X = pd.DataFrame({"a": truth})
Expand All @@ -98,7 +98,7 @@ def test_standard_replacement_behavior(self):
pdt.assert_frame_equal(selected_X, true_X)

def test_partial_preset_col_to_NINF_given(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
X = pd.DataFrame({"a": data})
true_X = pd.DataFrame({"a": truth})
Expand All @@ -112,7 +112,7 @@ def test_partial_preset_col_to_NINF_given(self):
pdt.assert_frame_equal(selected_X, true_X)

def test_partial_preset_col_to_PINF_given(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
X = pd.DataFrame({"a": data})
true_X = pd.DataFrame({"a": truth})
Expand All @@ -126,7 +126,7 @@ def test_partial_preset_col_to_PINF_given(self):
pdt.assert_frame_equal(selected_X, true_X)

def test_partial_preset_col_to_NAN_given(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
X = pd.DataFrame({"a": data})
true_X = pd.DataFrame({"a": truth})
Expand All @@ -151,7 +151,7 @@ def test_different_shapes_fitted_and_transformed(self):
self.assertRaises(ValueError, imputer.transform, X)

def test_preset_has_higher_priority_than_fit(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]

X = pd.DataFrame({"a": data})
Expand All @@ -166,8 +166,8 @@ def test_preset_has_higher_priority_than_fit(self):
pdt.assert_frame_equal(selected_X, true_X)

def test_only_parameters_of_last_fit_count(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
data_2 = [-np.inf, np.inf, np.nan, 10.0, -10.0, 3.0, 3.0]
truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]
truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]

Expand All @@ -185,7 +185,7 @@ def test_only_parameters_of_last_fit_count(self):
pdt.assert_frame_equal(selected_X, true_X)

def test_only_subset_of_columns_given(self):
data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0]
truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]
truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
X = pd.DataFrame({"a": data, "b": data})
Expand Down
Loading

0 comments on commit 893033c

Please sign in to comment.