From f90fd14f8199095d6de09d657d78f15721c04179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 5 Jun 2024 00:04:08 +0200 Subject: [PATCH] Add first 4 bins test --- tests/BinDisc_unittest.cpp | 54 ++++++++++++++++++++++++++------------ tests/testKbins.py | 10 +++++++ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 44f9ead..2125d00 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -2,15 +2,22 @@ #include "../BinDisc.h" namespace mdlp { + const float margin = 1e-4; class TestBinDisc3U : public BinDisc, public testing::Test { public: TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {}; - float margin = 1e-4; }; class TestBinDisc3Q : public BinDisc, public testing::Test { public: TestBinDisc3Q(int n_bins = 3) : BinDisc(n_bins, strategy_t::QUANTILE) {}; - float margin = 1e-4; + }; + class TestBinDisc4U : public BinDisc, public testing::Test { + public: + TestBinDisc4U(int n_bins = 4) : BinDisc(n_bins, strategy_t::UNIFORM) {}; + }; + class TestBinDisc4Q : public BinDisc, public testing::Test { + public: + TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {}; }; TEST_F(TestBinDisc3U, Easy3BinsUniform) { @@ -165,21 +172,34 @@ namespace mdlp { EXPECT_EQ(expected, labels); EXPECT_EQ(3.0, X[0]); // X is not modified } - // TEST(TestBinDisc_Gen, Easy4Bins) - // { - // auto disc = BinDisc(4); - // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; - // disc.fit(X); - // auto cuts = disc.getCutPoints(); - // EXPECT_EQ(3.0, cuts[0]); - // EXPECT_EQ(6.0, cuts[1]); - // EXPECT_EQ(9.0, cuts[2]); - // EXPECT_EQ(numeric_limits::max(), cuts[3]); - // EXPECT_EQ(4, cuts.size()); - // auto labels = disc.transform(X); - // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; - // EXPECT_EQ(expected, labels); - // } + TEST_F(TestBinDisc4U, Easy4BinsUniform) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(3.75, cuts[0]); + EXPECT_EQ(6.5, cuts[1]); + EXPECT_EQ(9.25, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4Q, Easy4BinsQuantile) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(3.75, cuts[0]); + EXPECT_EQ(6.5, cuts[1]); + EXPECT_EQ(9.25, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } // TEST(TestBinDisc_Gen, X13Bins) // { // auto disc = BinDisc(4); diff --git a/tests/testKbins.py b/tests/testKbins.py index 411b157..8309146 100644 --- a/tests/testKbins.py +++ b/tests/testKbins.py @@ -20,6 +20,12 @@ def test(clf, X, expected, title): clf3q = KBinsDiscretizer( n_bins=3, encode="ordinal", strategy="quantile", subsample=200_000 ) +clf4u = KBinsDiscretizer( + n_bins=4, encode="ordinal", strategy="uniform", subsample=200_000 +) +clf4q = KBinsDiscretizer( + n_bins=4, encode="ordinal", strategy="quantile", subsample=200_000 +) # X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] labels = [0, 0, 0, 1, 1, 1, 2, 2, 2] @@ -51,6 +57,10 @@ def test(clf, X, expected, title): test(clf3u, X, labels, title="EasyRepeatedUniform") test(clf3q, X, labels2, title="EasyRepeatedQuantile") # +X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] +labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] +test(clf4u, X, labels, title="Easy4BinsUniform") +test(clf4q, X, labels, title="Easy4BinsQuantile") # X = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] # X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] # X = [[x] for x in X]