From 042d0788ad22649a316ad7c96b237fde6cc40748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 5 Jun 2024 10:33:01 +0200 Subject: [PATCH] Complete test BinDisc --- BinDisc.cpp | 2 +- tests/BinDisc_unittest.cpp | 210 +++++++++++++++------- tests/teestkbin.ipynb | 195 -------------------- tests/testKbins.py | 360 +++++++++++++++++++++++++++++++++++-- 4 files changed, 490 insertions(+), 277 deletions(-) delete mode 100644 tests/teestkbin.ipynb diff --git a/BinDisc.cpp b/BinDisc.cpp index 36b0350..4865fdc 100644 --- a/BinDisc.cpp +++ b/BinDisc.cpp @@ -55,7 +55,7 @@ namespace mdlp { const auto indexLower = clip(i, 0, data.size() - 1); const double percentI = static_cast(indexLower) / static_cast(data.size() - 1); const double fraction = - (percentile / 100. - percentI) / + (percentile / 100.0 - percentI) / (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; if (value != results.back()) diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 2125d00..f6e78dc 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -1,8 +1,23 @@ +#include +#include +#include #include "gtest/gtest.h" +#include "ArffFiles.h" #include "../BinDisc.h" namespace mdlp { const float margin = 1e-4; + static std::string set_data_path() + { + std::string path = "../datasets/"; + std::ifstream file(path + "iris.arff"); + if (file.is_open()) { + file.close(); + return path; + } + return "../../tests/datasets/"; + } + const std::string data_path = set_data_path(); class TestBinDisc3U : public BinDisc, public testing::Test { public: TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {}; @@ -200,66 +215,137 @@ namespace mdlp { labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); } - // TEST(TestBinDisc_Gen, X13Bins) - // { - // auto disc = BinDisc(4); - // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; - // disc.fit(X); - // auto cuts = disc.getCutPoints(); - // EXPECT_EQ(3.0, cuts[0]); - // EXPECT_EQ(6.0, cuts[1]); - // EXPECT_EQ(9.0, cuts[2]); - // EXPECT_EQ(numeric_limits::max(), cuts[3]); - // EXPECT_EQ(4, cuts.size()); - // auto labels = disc.transform(X); - // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; - // EXPECT_EQ(expected, labels); - // } - // TEST(TestBinDisc_Gen, X14Bins) - // { - // auto disc = BinDisc(4); - // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; - // disc.fit(X); - // auto cuts = disc.getCutPoints(); - // EXPECT_EQ(3.0, cuts[0]); - // EXPECT_EQ(6.0, cuts[1]); - // EXPECT_EQ(9.0, cuts[2]); - // EXPECT_EQ(numeric_limits::max(), cuts[3]); - // EXPECT_EQ(4, cuts.size()); - // auto labels = disc.transform(X); - // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 }; - // EXPECT_EQ(expected, labels); - // } - // TEST(TestBinDisc_Gen, X15Bins) - // { - // auto disc = BinDisc(4); - // samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; - // disc.fit(X); - // auto cuts = disc.getCutPoints(); - // EXPECT_EQ(3.0, cuts[0]); - // EXPECT_EQ(6.0, cuts[1]); - // EXPECT_EQ(9.0, cuts[2]); - // EXPECT_EQ(numeric_limits::max(), cuts[3]); - // EXPECT_EQ(4, cuts.size()); - // EXPECT_EQ(15, X[0]); // X is not modified - // auto labels = disc.transform(X); - // labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 }; - // EXPECT_EQ(expected, labels); - // } - // TEST(TestBinDisc_Gen, RepeatedValues) - // { - // auto disc = BinDisc(4); - // samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; - // // 0 1 2 3 4 5 6 7 8 9 - // disc.fit(X); - // auto cuts = disc.getCutPoints(); - // EXPECT_EQ(1.0, cuts[0]); - // EXPECT_EQ(2.0, cuts[1]); - // EXPECT_EQ(3.0, cuts[2]); - // EXPECT_EQ(numeric_limits::max(), cuts[3]); - // EXPECT_EQ(4, cuts.size()); - // auto labels = disc.transform(X); - // labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }; - // EXPECT_EQ(expected, labels); - // } + TEST_F(TestBinDisc4U, X13BinsUniform) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.0, cuts[0]); + EXPECT_EQ(7.0, cuts[1]); + EXPECT_EQ(10.0, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4Q, X13BinsQuantile) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.0, cuts[0]); + EXPECT_EQ(7.0, cuts[1]); + EXPECT_EQ(10.0, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4U, X14BinsUniform) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.25, cuts[0]); + EXPECT_EQ(7.5, cuts[1]); + EXPECT_EQ(10.75, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4Q, X14BinsQuantile) + { + samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.25, cuts[0]); + EXPECT_EQ(7.5, cuts[1]); + EXPECT_EQ(10.75, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4U, X15BinsUniform) + { + samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.5, cuts[0]); + EXPECT_EQ(8, cuts[1]); + EXPECT_EQ(11.5, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4Q, X15BinsQuantile) + { + samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(4.5, cuts[0]); + EXPECT_EQ(8, cuts[1]); + EXPECT_EQ(11.5, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4U, RepeatedValuesUniform) + { + samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; + // 0 1 2 3 4 5 6 7 8 9 + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(1.0, cuts[0]); + EXPECT_EQ(2.0, cuts[1]); + EXPECT_EQ(3.0, cuts[2]); + EXPECT_EQ(numeric_limits::max(), cuts[3]); + EXPECT_EQ(4, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4Q, RepeatedValuesQuantile) + { + samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; + // 0 1 2 3 4 5 6 7 8 9 + fit(X); + auto cuts = getCutPoints(); + EXPECT_EQ(2.0, cuts[0]); + EXPECT_EQ(3.0, cuts[1]); + EXPECT_EQ(numeric_limits::max(), cuts[2]); + EXPECT_EQ(3, cuts.size()); + auto labels = transform(X); + labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 }; + EXPECT_EQ(expected, labels); + } + TEST_F(TestBinDisc4U, irisUniform) + { + ArffFiles file; + file.load(data_path + "iris.arff", true); + vector& X = file.getX(); + fit(X[0]); + auto Xt = transform(X[0]); + labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; + EXPECT_EQ(expected, Xt); + } + TEST_F(TestBinDisc4Q, irisQuantile) + { + ArffFiles file; + file.load(data_path + "iris.arff", true); + vector& X = file.getX(); + fit(X[0]); + auto Xt = transform(X[0]); + labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; + EXPECT_EQ(expected, Xt); + } } diff --git a/tests/teestkbin.ipynb b/tests/teestkbin.ipynb deleted file mode 100644 index 5cb3541..0000000 --- a/tests/teestkbin.ipynb +++ /dev/null @@ -1,195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.preprocessing import KBinsDiscretizer" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[array([ 1., 4., 7., 10.])]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:248: FutureWarning: In version 1.5 onwards, subsample=200_000 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "X = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]\n", - "X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]\n", - "X = [[x] for x in X]\n", - "disc = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n", - "disc.fit(X)\n", - "print(disc.bin_edges_)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0.]\n", - " [0.]\n", - " [0.]\n", - " [1.]\n", - " [1.]\n", - " [1.]\n", - " [2.]\n", - " [2.]\n", - " [2.]\n", - " [2.]]\n" - ] - } - ], - "source": [ - "print(disc.transform(X))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[array([1. , 3.66666667, 6.33333333, 9. ])]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:248: FutureWarning: In version 1.5 onwards, subsample=200_000 will be used by default. Set subsample explicitly to silence this warning in the mean time. Set subsample=None to disable subsampling explicitly.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "X=[1,2,3,4,5,6,7,8,9]\n", - "X = [[x] for x in X]\n", - "z = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]\n", - "z = [[x] for x in z]\n", - "disc = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n", - "disc.fit(X)\n", - "print(disc.bin_edges_)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0.]\n", - " [0.]\n", - " [0.]\n", - " [1.]\n", - " [1.]\n", - " [1.]\n", - " [2.]\n", - " [2.]\n", - " [2.]]\n" - ] - } - ], - "source": [ - "print(disc.transform(X))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 - quantiles=array([ 0. , 33.33333333, 66.66666667, 100. ])\n", - "1 - bin_edges=array([1. , 1. , 1.66666667, 3. ])\n", - "2 - bin_edges=array([1. , 1.66666667, 3. ])\n", - "3 - transform=array([1, 0, 0, 1, 0, 0, 1, 0, 0])\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "X = [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]\n", - "quantiles = np.linspace(0, 100, 4)\n", - "print(f\"0 - {quantiles=}\")\n", - "bin_edges= np.percentile(X, quantiles)\n", - "print(f\"1 - {bin_edges=}\")\n", - "mask = np.ediff1d(bin_edges, to_begin=np.inf) > 1e-8\n", - "bin_edges = bin_edges[mask]\n", - "print(f\"2 - {bin_edges=}\")\n", - "transform = np.searchsorted(bin_edges[1:-1], X, side=\"right\")\n", - "print(f\"3 - {transform=}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "const auto i = static_cast(std::floor(static_cast(inArray.size() - 1) * inPercentile / 100.));\n", - "const auto indexLower = clip(i, 0, inArray.size() - 2);\n", - "const double percentI =static_cast(indexLower) / static_cast(inArray.size() - 1);\n", - "const double fraction =\n", - " (inPercentile / 100. - percentI) /\n", - " (static_cast(indexLower + 1) / static_cast(inArray.size() - 1) - percentI);\n", - "\n", - "NdArray returnArray = {\n", - " arrayCopy[indexLower] + (arrayCopy[indexLower + 1] - arrayCopy[indexLower]) * fraction\n", - "};\n", - "return returnArray;" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/testKbins.py b/tests/testKbins.py index 8309146..e2f8fea 100644 --- a/tests/testKbins.py +++ b/tests/testKbins.py @@ -1,4 +1,4 @@ -from turtle import title +from scipy.io.arff import loadarff from sklearn.preprocessing import KBinsDiscretizer @@ -61,22 +61,344 @@ def test(clf, X, expected, title): labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] test(clf4u, X, labels, title="Easy4BinsUniform") test(clf4q, X, labels, title="Easy4BinsQuantile") -# X = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] -# X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] -# X = [[x] for x in X] -# disc = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform') -# disc.fit(X) -# print(disc.bin_edges_) - - -# print(disc.transform(X)) - -# X=[1,2,3,4,5,6,7,8,9] -# X = [[x] for x in X] -# z = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] -# z = [[x] for x in z] -# disc = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform') -# disc.fit(X) -# print(disc.bin_edges_) +# +X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] +labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3] +test(clf4u, X, labels, title="X13BinsUniform") +test(clf4q, X, labels, title="X13BinsQuantile") +# +X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] +labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3] +test(clf4u, X, labels, title="X14BinsUniform") +test(clf4q, X, labels, title="X14BinsQuantile") +# +X1 = [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +X2 = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +labels1 = [3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0] +labels2 = [3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0] +test(clf4u, X1, labels1, title="X15BinsUniform") +test(clf4q, X2, labels2, title="X15BinsQuantile") +# +X = [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] +labels = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3] +test(clf4u, X, labels, title="RepeatedValuesUniform") +test(clf4q, X, labels, title="RepeatedValuesQuantile") -# print(disc.transform(X)) +print(f"Uniform {clf4u.bin_edges_=}") +print(f"Quaintile {clf4q.bin_edges_=}") +print("-" * 80) +# +data, meta = loadarff("tests/datasets/iris.arff") +labelsu = [ + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 3, + 2, + 2, + 1, + 2, + 1, + 2, + 0, + 2, + 1, + 0, + 1, + 1, + 2, + 1, + 2, + 1, + 1, + 2, + 1, + 1, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 1, + 1, + 1, + 2, + 1, + 0, + 1, + 1, + 1, + 2, + 0, + 1, + 2, + 1, + 3, + 2, + 2, + 3, + 0, + 3, + 2, + 3, + 2, + 2, + 2, + 1, + 1, + 2, + 2, + 3, + 3, + 1, + 2, + 1, + 3, + 2, + 2, + 3, + 2, + 2, + 2, + 3, + 3, + 3, + 2, + 2, + 2, + 3, + 2, + 2, + 1, + 2, + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 2, + 2, + 1, +] +labelsq = [ + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 3, + 3, + 3, + 1, + 3, + 1, + 2, + 0, + 3, + 1, + 0, + 2, + 2, + 2, + 1, + 3, + 1, + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 1, + 2, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 0, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 2, + 3, + 2, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 1, + 2, + 3, + 3, + 3, + 3, + 2, + 3, + 1, + 3, + 2, + 3, + 3, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 2, + 2, + 3, + 2, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 2, + 2, +] +test(clf4u, data["sepallength"], labelsu, title="IrisUniform") +test(clf4q, data["sepallength"], labelsq, title="IrisQuantile") +# print("Labels") +# print(labels) +# print("Expected") +# print(expected) +# for i in range(len(labels)): +# if labels[i] != expected[i]: +# print(f"Error at {i} {labels[i]} != {expected[i]}")