From ee21d9471a6a40de2de37c77e8b920f6d1d73dd3 Mon Sep 17 00:00:00 2001
From: sadko4u <sadko4u@gmail.com>
Date: Mon, 28 Aug 2023 16:01:30 +0300
Subject: [PATCH] Split unit tests and performance tests, some AMD-related
 optimizations

---
 CHANGELOG                                     |   1 +
 include/private/dsp/arch/x86/cpuid.h          |  15 ++-
 include/private/dsp/arch/x86/features.h       |   3 +-
 src/main/x86/avx.cpp                          |  14 ++-
 src/main/x86/x86.cpp                          |  32 ++++-
 src/test/ptest/hmath/h_abs_sum.cpp            | 110 +++++++++++++++++
 .../ptest/hmath/{hsum.cpp => h_sqr_sum.cpp}   |  30 +----
 src/test/ptest/hmath/h_sum.cpp                | 110 +++++++++++++++++
 src/test/utest/hmath/h_abs_sum.cpp            | 116 ++++++++++++++++++
 .../utest/hmath/{hsum.cpp => h_sqr_sum.cpp}   |  21 +---
 src/test/utest/hmath/h_sum.cpp                | 116 ++++++++++++++++++
 11 files changed, 507 insertions(+), 61 deletions(-)
 create mode 100644 src/test/ptest/hmath/h_abs_sum.cpp
 rename src/test/ptest/hmath/{hsum.cpp => h_sqr_sum.cpp} (68%)
 create mode 100644 src/test/ptest/hmath/h_sum.cpp
 create mode 100644 src/test/utest/hmath/h_abs_sum.cpp
 rename src/test/utest/hmath/{hsum.cpp => h_sqr_sum.cpp} (77%)
 create mode 100644 src/test/utest/hmath/h_sum.cpp

diff --git a/CHANGELOG b/CHANGELOG
index 26a8dfef..4023c4da 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,7 @@
 === 1.0.15 ===
 * Fixed syntax error in C interface, covered with tests.
 * Bugfix in horizontal summing functions (invalid register clobber list).
+* Some AMD-related optimizations.
 
 === 1.0.14 ===
 * Implemented pcomplex_r2c instruction set.
diff --git a/include/private/dsp/arch/x86/cpuid.h b/include/private/dsp/arch/x86/cpuid.h
index 4909d13c..9b260f2c 100644
--- a/include/private/dsp/arch/x86/cpuid.h
+++ b/include/private/dsp/arch/x86/cpuid.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 31 мар. 2020 г.
@@ -116,12 +116,18 @@
 
 //-------------------------------------------------------------------------
 // Different processor families
+
+#define INTEL_FAMILY_686_CORE                   0x06
+
 #define AMD_FAMILY_K8_HAMMER                    0x0f
 #define AMD_FAMILY_K10                          0x10
 #define AMD_FAMILY_BOBCAT                       0x14
 #define AMD_FAMILY_BULLDOZER                    0x15
 #define AMD_FAMILY_JAGUAR                       0x16
 #define AMD_FAMILY_ZEN_1_2                      0x17
+#define AMD_FAMILY_DHYANA                       0x18
+#define AMD_FAMILY_ZEN_3_4                      0x19
+#define AMD_FAMILY_ZEN_5                        0x1a
 
 #define AMD_MODEL_ZEN_2                         0x31
 
@@ -191,8 +197,9 @@ namespace lsp
         }
 
         uint64_t read_xcr(umword_t xcr_id);
-    }
-}
+
+    } /* namespace x86 */
+} /* namespace lsp */
 
 
 #endif /* PRIVATE_DSP_ARCH_X86_CPUID_H_ */
diff --git a/include/private/dsp/arch/x86/features.h b/include/private/dsp/arch/x86/features.h
index cfe91a90..6455919c 100644
--- a/include/private/dsp/arch/x86/features.h
+++ b/include/private/dsp/arch/x86/features.h
@@ -89,7 +89,8 @@
             {
                 FEAT_FAST_MOVS,         // Processor implements optimized MOVS instruction
                 FEAT_FAST_AVX,          // Fast AVX implementation
-                FEAT_FAST_FMA3          // Fast FMA3 implementation
+                FEAT_FAST_FMA3,         // Fast FMA3 implementation
+                FEAT_BELOW_ZEN3         // CPU has AMD architecture and is below Zen3
             };
 
             /**
diff --git a/src/main/x86/avx.cpp b/src/main/x86/avx.cpp
index 2bf9c0f6..a7775f10 100644
--- a/src/main/x86/avx.cpp
+++ b/src/main/x86/avx.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 31 мар. 2020 г.
@@ -127,8 +127,9 @@
 
                 // This routine sucks on AMD Bulldozer processor family but is pretty great on Intel
                 // Not tested on AMD Processors above Bulldozer family
-                bool favx   = feature_check(f, FEAT_FAST_AVX);
-                bool ffma   = favx && feature_check(f, FEAT_FAST_FMA3);
+                bool favx       = feature_check(f, FEAT_FAST_AVX);
+                bool ffma       = favx && feature_check(f, FEAT_FAST_FMA3);
+                bool below_zen3 = feature_check(f, FEAT_BELOW_ZEN3);
 
                 CEXPORT2_X64(favx, reverse1, reverse1);
                 CEXPORT2_X64(favx, reverse2, reverse2);
@@ -448,7 +449,10 @@
                     CEXPORT2(favx, pcomplex_rdiv2, pcomplex_rdiv2_fma3);
                     CEXPORT2(favx, pcomplex_div3, pcomplex_div3_fma3);
 
-                    CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3);
+                    if (!below_zen3)
+                    {
+                        CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3);
+                    }
 
                     CEXPORT2(favx, direct_fft, direct_fft_fma3);
                     CEXPORT2(favx, reverse_fft, reverse_fft_fma3);
diff --git a/src/main/x86/x86.cpp b/src/main/x86/x86.cpp
index f53936e7..b6c1067b 100644
--- a/src/main/x86/x86.cpp
+++ b/src/main/x86/x86.cpp
@@ -516,21 +516,45 @@
                     case FEAT_FAST_MOVS:
                         if (f->vendor == CPU_VENDOR_INTEL)
                         {
-                            if ((f->family == 0x6) && (f->model >= 0x5e)) // Should be some Core i3 microarchitecture...
+                            // Should be some Core i3 microarchitecture...
+                            if ((f->family == INTEL_FAMILY_686_CORE) && (f->model >= 0x5e))
                                 return true;
                         }
                         break;
                     case FEAT_FAST_AVX:
                         if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX
                             return true;
+                        // Only starting with ZEN 1 architecture AMD's implementation of AVX is fast enough
                         if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
-                            return (f->family >= AMD_FAMILY_ZEN_1_2); // Only starting with ZEN 1 architecture AMD's implementation of AVX is fast enough
+                        {
+                            if (f->family < AMD_FAMILY_ZEN_1_2)
+                                return false;
+                            if (f->family == AMD_FAMILY_DHYANA)
+                                return false;
+                            return true;
+                        }
                         break;
                     case FEAT_FAST_FMA3:
                         if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX
                             return true;
-                        if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) // Starting with ZEN 2 FMA3 operations are fast enough on AMD
-                            return (f->family >= AMD_FAMILY_ZEN_1_2) && (f->model >= AMD_MODEL_ZEN_2);
+                        // Starting with ZEN 2 FMA3 operations are fast enough on AMD
+                        if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
+                        {
+                            if (f->family < AMD_FAMILY_ZEN_1_2)
+                                return false;
+                            if (f->family == AMD_FAMILY_DHYANA)
+                                return false;
+                            return true;
+                        }
+                        break;
+
+                    case FEAT_BELOW_ZEN3: // Test that this is AMD and below Zen 3 architecture
+                        if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
+                        {
+                            if (f->family < AMD_FAMILY_ZEN_3_4)
+                                return true;
+                            return false;
+                        }
                         break;
                     default:
                         break;
diff --git a/src/test/ptest/hmath/h_abs_sum.cpp b/src/test/ptest/hmath/h_abs_sum.cpp
new file mode 100644
index 00000000..7713d12a
--- /dev/null
+++ b/src/test/ptest/hmath/h_abs_sum.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/dsp/dsp.h>
+#include <lsp-plug.in/test-fw/ptest.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+#include <lsp-plug.in/common/alloc.h>
+
+#define MIN_RANK 8
+#define MAX_RANK 16
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_abs_sum(const float *src, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    typedef float (* h_sum_t)(const float *src, size_t count);
+}
+
+PTEST_BEGIN("dsp.hmath", hsum, 5, 10000)
+
+    void call(const char *label, float *src, size_t count, h_sum_t func)
+    {
+        if (!PTEST_SUPPORTED(func))
+            return;
+
+        char buf[80];
+        sprintf(buf, "%s x %d", label, int(count));
+        printf("Testing %s numbers...\n", buf);
+
+        PTEST_LOOP(buf,
+            func(src, count);
+        );
+    }
+
+    PTEST_MAIN
+    {
+        size_t buf_size = 1 << MAX_RANK;
+        uint8_t *data   = NULL;
+        float *src      = alloc_aligned<float>(data, buf_size, 64);
+
+        for (size_t i=0; i < buf_size; ++i)
+            src[i]          = randf(0.0f, 1.0f);
+
+        #define CALL(func) \
+            call(#func, src, count, func)
+
+        for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
+        {
+            size_t count = 1 << i;
+
+            CALL(generic::h_abs_sum);
+            IF_ARCH_X86(CALL(sse::h_abs_sum));
+            IF_ARCH_X86(CALL(avx::h_abs_sum));
+            IF_ARCH_ARM(CALL(neon_d32::h_abs_sum));
+            IF_ARCH_AARCH64(CALL(asimd::h_abs_sum));
+            PTEST_SEPARATOR;
+        }
+
+        free_aligned(data);
+    }
+
+PTEST_END
+
diff --git a/src/test/ptest/hmath/hsum.cpp b/src/test/ptest/hmath/h_sqr_sum.cpp
similarity index 68%
rename from src/test/ptest/hmath/hsum.cpp
rename to src/test/ptest/hmath/h_sqr_sum.cpp
index e0f26125..5ce55b6b 100644
--- a/src/test/ptest/hmath/hsum.cpp
+++ b/src/test/ptest/hmath/h_sqr_sum.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 31 мар. 2020 г.
@@ -31,50 +31,40 @@ namespace lsp
 {
     namespace generic
     {
-        float h_sum(const float *src, size_t count);
         float h_sqr_sum(const float *src, size_t count);
-        float h_abs_sum(const float *src, size_t count);
     }
 
     IF_ARCH_X86(
         namespace sse
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
 
         namespace avx
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
             float h_sqr_sum_fma3(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
     )
 
     IF_ARCH_ARM(
         namespace neon_d32
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
     )
 
     IF_ARCH_AARCH64(
         namespace asimd
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
     )
 
     typedef float (* h_sum_t)(const float *src, size_t count);
 }
 
-PTEST_BEGIN("dsp.hmath", hsum, 5, 10000)
+PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 10000)
 
     void call(const char *label, float *src, size_t count, h_sum_t func)
     {
@@ -106,13 +96,6 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000)
         {
             size_t count = 1 << i;
 
-            CALL(generic::h_sum);
-            IF_ARCH_X86(CALL(sse::h_sum));
-            IF_ARCH_X86(CALL(avx::h_sum));
-            IF_ARCH_ARM(CALL(neon_d32::h_sum));
-            IF_ARCH_AARCH64(CALL(asimd::h_sum));
-            PTEST_SEPARATOR;
-
             CALL(generic::h_sqr_sum);
             IF_ARCH_X86(CALL(sse::h_sqr_sum));
             IF_ARCH_X86(CALL(avx::h_sqr_sum));
@@ -120,13 +103,6 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000)
             IF_ARCH_ARM(CALL(neon_d32::h_sqr_sum));
             IF_ARCH_AARCH64(CALL(asimd::h_sqr_sum));
             PTEST_SEPARATOR;
-
-            CALL(generic::h_abs_sum);
-            IF_ARCH_X86(CALL(sse::h_abs_sum));
-            IF_ARCH_X86(CALL(avx::h_abs_sum));
-            IF_ARCH_ARM(CALL(neon_d32::h_abs_sum));
-            IF_ARCH_AARCH64(CALL(asimd::h_abs_sum));
-            PTEST_SEPARATOR2;
         }
 
         free_aligned(data);
diff --git a/src/test/ptest/hmath/h_sum.cpp b/src/test/ptest/hmath/h_sum.cpp
new file mode 100644
index 00000000..aa3aebd7
--- /dev/null
+++ b/src/test/ptest/hmath/h_sum.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/dsp/dsp.h>
+#include <lsp-plug.in/test-fw/ptest.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+#include <lsp-plug.in/common/alloc.h>
+
+#define MIN_RANK 8
+#define MAX_RANK 16
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_sum(const float *src, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_sum(const float *src, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    typedef float (* h_sum_t)(const float *src, size_t count);
+}
+
+PTEST_BEGIN("dsp.hmath", h_sum, 5, 10000)
+
+    void call(const char *label, float *src, size_t count, h_sum_t func)
+    {
+        if (!PTEST_SUPPORTED(func))
+            return;
+
+        char buf[80];
+        sprintf(buf, "%s x %d", label, int(count));
+        printf("Testing %s numbers...\n", buf);
+
+        PTEST_LOOP(buf,
+            func(src, count);
+        );
+    }
+
+    PTEST_MAIN
+    {
+        size_t buf_size = 1 << MAX_RANK;
+        uint8_t *data   = NULL;
+        float *src      = alloc_aligned<float>(data, buf_size, 64);
+
+        for (size_t i=0; i < buf_size; ++i)
+            src[i]          = randf(0.0f, 1.0f);
+
+        #define CALL(func) \
+            call(#func, src, count, func)
+
+        for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
+        {
+            size_t count = 1 << i;
+
+            CALL(generic::h_sum);
+            IF_ARCH_X86(CALL(sse::h_sum));
+            IF_ARCH_X86(CALL(avx::h_sum));
+            IF_ARCH_ARM(CALL(neon_d32::h_sum));
+            IF_ARCH_AARCH64(CALL(asimd::h_sum));
+            PTEST_SEPARATOR;
+        }
+
+        free_aligned(data);
+    }
+
+PTEST_END
+
diff --git a/src/test/utest/hmath/h_abs_sum.cpp b/src/test/utest/hmath/h_abs_sum.cpp
new file mode 100644
index 00000000..872373ed
--- /dev/null
+++ b/src/test/utest/hmath/h_abs_sum.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/test-fw/utest.h>
+#include <lsp-plug.in/test-fw/FloatBuffer.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+
+#ifdef ARCH_ARM
+    #define TOLERANCE 1e-3
+#endif
+
+#ifndef TOLERANCE
+    #define TOLERANCE 1e-4
+#endif
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_abs_sum(const float *src, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_abs_sum(const float *src, size_t count);
+        }
+    )
+
+    typedef float (* h_sum_t)(const float *src, size_t count);
+}
+
+UTEST_BEGIN("dsp.hmath", h_abs_sum)
+
+    void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2)
+    {
+        if (!UTEST_SUPPORTED(func1))
+            return;
+        if (!UTEST_SUPPORTED(func2))
+            return;
+
+        UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                32, 64, 65, 100, 768, 999, 0x1fff)
+        {
+            for (size_t mask=0; mask <= 0x01; ++mask)
+            {
+                printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask));
+
+                FloatBuffer src(count, align, mask & 0x01);
+                src.randomize_sign();
+
+                // Call functions
+                float a = func1(src, count);
+                float b = func2(src, count);
+
+                UTEST_ASSERT_MSG(src.valid(), "Source buffer corrupted");
+
+                // Compare buffers
+                if (!float_equals_adaptive(a, b, TOLERANCE))
+                {
+                    src.dump("src1");
+                    UTEST_FAIL_MSG("Result of function 1 (%f) differs result of function 2 (%f)", a, b);
+                }
+            }
+        }
+    }
+
+    UTEST_MAIN
+    {
+        #define CALL(generic, func, align) \
+            call(#func, align, generic, func);
+
+        IF_ARCH_X86(CALL(generic::h_abs_sum, sse::h_abs_sum, 16));
+        IF_ARCH_X86(CALL(generic::h_abs_sum, avx::h_abs_sum, 32));
+        IF_ARCH_ARM(CALL(generic::h_abs_sum, neon_d32::h_abs_sum, 16));
+        IF_ARCH_AARCH64(CALL(generic::h_abs_sum, asimd::h_abs_sum, 16));
+    }
+UTEST_END
diff --git a/src/test/utest/hmath/hsum.cpp b/src/test/utest/hmath/h_sqr_sum.cpp
similarity index 77%
rename from src/test/utest/hmath/hsum.cpp
rename to src/test/utest/hmath/h_sqr_sum.cpp
index 5ee290a4..7979bd21 100644
--- a/src/test/utest/hmath/hsum.cpp
+++ b/src/test/utest/hmath/h_sqr_sum.cpp
@@ -36,25 +36,19 @@ namespace lsp
 {
     namespace generic
     {
-        float h_sum(const float *src, size_t count);
         float h_sqr_sum(const float *src, size_t count);
-        float h_abs_sum(const float *src, size_t count);
     }
 
     IF_ARCH_X86(
         namespace sse
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
 
         namespace avx
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
             float h_sqr_sum_fma3(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
     )
 
@@ -70,16 +64,14 @@ namespace lsp
     IF_ARCH_AARCH64(
         namespace asimd
         {
-            float h_sum(const float *src, size_t count);
             float h_sqr_sum(const float *src, size_t count);
-            float h_abs_sum(const float *src, size_t count);
         }
     )
 
     typedef float (* h_sum_t)(const float *src, size_t count);
 }
 
-UTEST_BEGIN("dsp.hmath", hsum)
+UTEST_BEGIN("dsp.hmath", h_sqr_sum)
 
     void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2)
     {
@@ -119,21 +111,10 @@ UTEST_BEGIN("dsp.hmath", hsum)
         #define CALL(generic, func, align) \
             call(#func, align, generic, func);
 
-        IF_ARCH_X86(CALL(generic::h_sum, sse::h_sum, 16));
         IF_ARCH_X86(CALL(generic::h_sqr_sum, sse::h_sqr_sum, 16));
-        IF_ARCH_X86(CALL(generic::h_abs_sum, sse::h_abs_sum, 16));
-
-        IF_ARCH_X86(CALL(generic::h_sum, avx::h_sum, 32));
         IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum, 32));
         IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum_fma3, 32));
-        IF_ARCH_X86(CALL(generic::h_abs_sum, avx::h_abs_sum, 32));
-
-        IF_ARCH_ARM(CALL(generic::h_sum, neon_d32::h_sum, 16));
         IF_ARCH_ARM(CALL(generic::h_sqr_sum, neon_d32::h_sqr_sum, 16));
-        IF_ARCH_ARM(CALL(generic::h_abs_sum, neon_d32::h_abs_sum, 16));
-
-        IF_ARCH_AARCH64(CALL(generic::h_sum, asimd::h_sum, 16));
         IF_ARCH_AARCH64(CALL(generic::h_sqr_sum, asimd::h_sqr_sum, 16));
-        IF_ARCH_AARCH64(CALL(generic::h_abs_sum, asimd::h_abs_sum, 16));
     }
 UTEST_END
diff --git a/src/test/utest/hmath/h_sum.cpp b/src/test/utest/hmath/h_sum.cpp
new file mode 100644
index 00000000..1d157aab
--- /dev/null
+++ b/src/test/utest/hmath/h_sum.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 31 мар. 2020 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lsp-plug.in/common/types.h>
+#include <lsp-plug.in/test-fw/utest.h>
+#include <lsp-plug.in/test-fw/FloatBuffer.h>
+#include <lsp-plug.in/test-fw/helpers.h>
+
+#ifdef ARCH_ARM
+    #define TOLERANCE 1e-3
+#endif
+
+#ifndef TOLERANCE
+    #define TOLERANCE 1e-4
+#endif
+
+namespace lsp
+{
+    namespace generic
+    {
+        float h_sum(const float *src, size_t count);
+    }
+
+    IF_ARCH_X86(
+        namespace sse
+        {
+            float h_sum(const float *src, size_t count);
+        }
+
+        namespace avx
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_ARM(
+        namespace neon_d32
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    IF_ARCH_AARCH64(
+        namespace asimd
+        {
+            float h_sum(const float *src, size_t count);
+        }
+    )
+
+    typedef float (* h_sum_t)(const float *src, size_t count);
+}
+
+UTEST_BEGIN("dsp.hmath", h_sum)
+
+    void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2)
+    {
+        if (!UTEST_SUPPORTED(func1))
+            return;
+        if (!UTEST_SUPPORTED(func2))
+            return;
+
+        UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                32, 64, 65, 100, 768, 999, 0x1fff)
+        {
+            for (size_t mask=0; mask <= 0x01; ++mask)
+            {
+                printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask));
+
+                FloatBuffer src(count, align, mask & 0x01);
+                src.randomize_sign();
+
+                // Call functions
+                float a = func1(src, count);
+                float b = func2(src, count);
+
+                UTEST_ASSERT_MSG(src.valid(), "Source buffer corrupted");
+
+                // Compare buffers
+                if (!float_equals_adaptive(a, b, TOLERANCE))
+                {
+                    src.dump("src1");
+                    UTEST_FAIL_MSG("Result of function 1 (%f) differs result of function 2 (%f)", a, b);
+                }
+            }
+        }
+    }
+
+    UTEST_MAIN
+    {
+        #define CALL(generic, func, align) \
+            call(#func, align, generic, func);
+
+        IF_ARCH_X86(CALL(generic::h_sum, sse::h_sum, 16));
+        IF_ARCH_X86(CALL(generic::h_sum, avx::h_sum, 32));
+        IF_ARCH_ARM(CALL(generic::h_sum, neon_d32::h_sum, 16));
+        IF_ARCH_AARCH64(CALL(generic::h_sum, asimd::h_sum, 16));
+    }
+UTEST_END