From ee21d9471a6a40de2de37c77e8b920f6d1d73dd3 Mon Sep 17 00:00:00 2001 From: sadko4u Date: Mon, 28 Aug 2023 16:01:30 +0300 Subject: [PATCH] Split unit tests and performance tests, some AMD-related optimizations --- CHANGELOG | 1 + include/private/dsp/arch/x86/cpuid.h | 15 ++- include/private/dsp/arch/x86/features.h | 3 +- src/main/x86/avx.cpp | 14 ++- src/main/x86/x86.cpp | 32 ++++- src/test/ptest/hmath/h_abs_sum.cpp | 110 +++++++++++++++++ .../ptest/hmath/{hsum.cpp => h_sqr_sum.cpp} | 30 +---- src/test/ptest/hmath/h_sum.cpp | 110 +++++++++++++++++ src/test/utest/hmath/h_abs_sum.cpp | 116 ++++++++++++++++++ .../utest/hmath/{hsum.cpp => h_sqr_sum.cpp} | 21 +--- src/test/utest/hmath/h_sum.cpp | 116 ++++++++++++++++++ 11 files changed, 507 insertions(+), 61 deletions(-) create mode 100644 src/test/ptest/hmath/h_abs_sum.cpp rename src/test/ptest/hmath/{hsum.cpp => h_sqr_sum.cpp} (68%) create mode 100644 src/test/ptest/hmath/h_sum.cpp create mode 100644 src/test/utest/hmath/h_abs_sum.cpp rename src/test/utest/hmath/{hsum.cpp => h_sqr_sum.cpp} (77%) create mode 100644 src/test/utest/hmath/h_sum.cpp diff --git a/CHANGELOG b/CHANGELOG index 26a8dfef..4023c4da 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ === 1.0.15 === * Fixed syntax error in C interface, covered with tests. * Bugfix in horizontal summing functions (invalid register clobber list). +* Some AMD-related optimizations. === 1.0.14 === * Implemented pcomplex_r2c instruction set. diff --git a/include/private/dsp/arch/x86/cpuid.h b/include/private/dsp/arch/x86/cpuid.h index 4909d13c..9b260f2c 100644 --- a/include/private/dsp/arch/x86/cpuid.h +++ b/include/private/dsp/arch/x86/cpuid.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -116,12 +116,18 @@ //------------------------------------------------------------------------- // Different processor families + +#define INTEL_FAMILY_686_CORE 0x06 + #define AMD_FAMILY_K8_HAMMER 0x0f #define AMD_FAMILY_K10 0x10 #define AMD_FAMILY_BOBCAT 0x14 #define AMD_FAMILY_BULLDOZER 0x15 #define AMD_FAMILY_JAGUAR 0x16 #define AMD_FAMILY_ZEN_1_2 0x17 +#define AMD_FAMILY_DHYANA 0x18 +#define AMD_FAMILY_ZEN_3_4 0x19 +#define AMD_FAMILY_ZEN_5 0x1a #define AMD_MODEL_ZEN_2 0x31 @@ -191,8 +197,9 @@ namespace lsp } uint64_t read_xcr(umword_t xcr_id); - } -} + + } /* namespace x86 */ +} /* namespace lsp */ #endif /* PRIVATE_DSP_ARCH_X86_CPUID_H_ */ diff --git a/include/private/dsp/arch/x86/features.h b/include/private/dsp/arch/x86/features.h index cfe91a90..6455919c 100644 --- a/include/private/dsp/arch/x86/features.h +++ b/include/private/dsp/arch/x86/features.h @@ -89,7 +89,8 @@ { FEAT_FAST_MOVS, // Processor implements optimized MOVS instruction FEAT_FAST_AVX, // Fast AVX implementation - FEAT_FAST_FMA3 // Fast FMA3 implementation + FEAT_FAST_FMA3, // Fast FMA3 implementation + FEAT_BELOW_ZEN3 // CPU has AMD architecture and is below Zen3 }; /** diff --git a/src/main/x86/avx.cpp b/src/main/x86/avx.cpp index 2bf9c0f6..a7775f10 100644 --- a/src/main/x86/avx.cpp +++ b/src/main/x86/avx.cpp @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -127,8 +127,9 @@ // This routine sucks on AMD Bulldozer processor family but is pretty great on Intel // Not tested on AMD Processors above Bulldozer family - bool favx = feature_check(f, FEAT_FAST_AVX); - bool ffma = favx && feature_check(f, FEAT_FAST_FMA3); + bool favx = feature_check(f, FEAT_FAST_AVX); + bool ffma = favx && feature_check(f, FEAT_FAST_FMA3); + bool below_zen3 = feature_check(f, FEAT_BELOW_ZEN3); CEXPORT2_X64(favx, reverse1, reverse1); CEXPORT2_X64(favx, reverse2, reverse2); @@ -448,7 +449,10 @@ CEXPORT2(favx, pcomplex_rdiv2, pcomplex_rdiv2_fma3); CEXPORT2(favx, pcomplex_div3, pcomplex_div3_fma3); - CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3); + if (!below_zen3) + { + CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3); + } CEXPORT2(favx, direct_fft, direct_fft_fma3); CEXPORT2(favx, reverse_fft, reverse_fft_fma3); diff --git a/src/main/x86/x86.cpp b/src/main/x86/x86.cpp index f53936e7..b6c1067b 100644 --- a/src/main/x86/x86.cpp +++ b/src/main/x86/x86.cpp @@ -516,21 +516,45 @@ case FEAT_FAST_MOVS: if (f->vendor == CPU_VENDOR_INTEL) { - if ((f->family == 0x6) && (f->model >= 0x5e)) // Should be some Core i3 microarchitecture... + // Should be some Core i3 microarchitecture... + if ((f->family == INTEL_FAMILY_686_CORE) && (f->model >= 0x5e)) return true; } break; case FEAT_FAST_AVX: if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX return true; + // Only starting with ZEN 1 architecture AMD's implementation of AVX is fast enough if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) - return (f->family >= AMD_FAMILY_ZEN_1_2); // Only starting with ZEN 1 architecture AMD's implementation of AVX is fast enough + { + if (f->family < AMD_FAMILY_ZEN_1_2) + return false; + if (f->family == AMD_FAMILY_DHYANA) + return false; + return true; + } break; case FEAT_FAST_FMA3: if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX return true; - if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) // Starting with ZEN 2 FMA3 operations are fast enough on AMD - return (f->family >= AMD_FAMILY_ZEN_1_2) && (f->model >= AMD_MODEL_ZEN_2); + // Starting with ZEN 2 FMA3 operations are fast enough on AMD + if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) + { + if (f->family < AMD_FAMILY_ZEN_1_2) + return false; + if (f->family == AMD_FAMILY_DHYANA) + return false; + return true; + } + break; + + case FEAT_BELOW_ZEN3: // Test that this is AMD and below Zen 3 architecture + if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) + { + if (f->family < AMD_FAMILY_ZEN_3_4) + return true; + return false; + } break; default: break; diff --git a/src/test/ptest/hmath/h_abs_sum.cpp b/src/test/ptest/hmath/h_abs_sum.cpp new file mode 100644 index 00000000..7713d12a --- /dev/null +++ b/src/test/ptest/hmath/h_abs_sum.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov + * + * This file is part of lsp-dsp-lib + * Created on: 31 мар. 2020 г. + * + * lsp-dsp-lib is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * lsp-dsp-lib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with lsp-dsp-lib. If not, see . + */ + +#include +#include +#include +#include + +#define MIN_RANK 8 +#define MAX_RANK 16 + +namespace lsp +{ + namespace generic + { + float h_abs_sum(const float *src, size_t count); + } + + IF_ARCH_X86( + namespace sse + { + float h_abs_sum(const float *src, size_t count); + } + + namespace avx + { + float h_abs_sum(const float *src, size_t count); + } + ) + + IF_ARCH_ARM( + namespace neon_d32 + { + float h_abs_sum(const float *src, size_t count); + } + ) + + IF_ARCH_AARCH64( + namespace asimd + { + float h_abs_sum(const float *src, size_t count); + } + ) + + typedef float (* h_sum_t)(const float *src, size_t count); +} + +PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) + + void call(const char *label, float *src, size_t count, h_sum_t func) + { + if (!PTEST_SUPPORTED(func)) + return; + + char buf[80]; + sprintf(buf, "%s x %d", label, int(count)); + printf("Testing %s numbers...\n", buf); + + PTEST_LOOP(buf, + func(src, count); + ); + } + + PTEST_MAIN + { + size_t buf_size = 1 << MAX_RANK; + uint8_t *data = NULL; + float *src = alloc_aligned(data, buf_size, 64); + + for (size_t i=0; i < buf_size; ++i) + src[i] = randf(0.0f, 1.0f); + + #define CALL(func) \ + call(#func, src, count, func) + + for (size_t i=MIN_RANK; i <= MAX_RANK; ++i) + { + size_t count = 1 << i; + + CALL(generic::h_abs_sum); + IF_ARCH_X86(CALL(sse::h_abs_sum)); + IF_ARCH_X86(CALL(avx::h_abs_sum)); + IF_ARCH_ARM(CALL(neon_d32::h_abs_sum)); + IF_ARCH_AARCH64(CALL(asimd::h_abs_sum)); + PTEST_SEPARATOR; + } + + free_aligned(data); + } + +PTEST_END + diff --git a/src/test/ptest/hmath/hsum.cpp b/src/test/ptest/hmath/h_sqr_sum.cpp similarity index 68% rename from src/test/ptest/hmath/hsum.cpp rename to src/test/ptest/hmath/h_sqr_sum.cpp index e0f26125..5ce55b6b 100644 --- a/src/test/ptest/hmath/hsum.cpp +++ b/src/test/ptest/hmath/h_sqr_sum.cpp @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -31,50 +31,40 @@ namespace lsp { namespace generic { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } IF_ARCH_X86( namespace sse { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } namespace avx { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); float h_sqr_sum_fma3(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } ) IF_ARCH_ARM( namespace neon_d32 { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } ) IF_ARCH_AARCH64( namespace asimd { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } ) typedef float (* h_sum_t)(const float *src, size_t count); } -PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) +PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 10000) void call(const char *label, float *src, size_t count, h_sum_t func) { @@ -106,13 +96,6 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) { size_t count = 1 << i; - CALL(generic::h_sum); - IF_ARCH_X86(CALL(sse::h_sum)); - IF_ARCH_X86(CALL(avx::h_sum)); - IF_ARCH_ARM(CALL(neon_d32::h_sum)); - IF_ARCH_AARCH64(CALL(asimd::h_sum)); - PTEST_SEPARATOR; - CALL(generic::h_sqr_sum); IF_ARCH_X86(CALL(sse::h_sqr_sum)); IF_ARCH_X86(CALL(avx::h_sqr_sum)); @@ -120,13 +103,6 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) IF_ARCH_ARM(CALL(neon_d32::h_sqr_sum)); IF_ARCH_AARCH64(CALL(asimd::h_sqr_sum)); PTEST_SEPARATOR; - - CALL(generic::h_abs_sum); - IF_ARCH_X86(CALL(sse::h_abs_sum)); - IF_ARCH_X86(CALL(avx::h_abs_sum)); - IF_ARCH_ARM(CALL(neon_d32::h_abs_sum)); - IF_ARCH_AARCH64(CALL(asimd::h_abs_sum)); - PTEST_SEPARATOR2; } free_aligned(data); diff --git a/src/test/ptest/hmath/h_sum.cpp b/src/test/ptest/hmath/h_sum.cpp new file mode 100644 index 00000000..aa3aebd7 --- /dev/null +++ b/src/test/ptest/hmath/h_sum.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov + * + * This file is part of lsp-dsp-lib + * Created on: 31 мар. 2020 г. + * + * lsp-dsp-lib is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * lsp-dsp-lib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with lsp-dsp-lib. If not, see . + */ + +#include +#include +#include +#include + +#define MIN_RANK 8 +#define MAX_RANK 16 + +namespace lsp +{ + namespace generic + { + float h_sum(const float *src, size_t count); + } + + IF_ARCH_X86( + namespace sse + { + float h_sum(const float *src, size_t count); + } + + namespace avx + { + float h_sum(const float *src, size_t count); + } + ) + + IF_ARCH_ARM( + namespace neon_d32 + { + float h_sum(const float *src, size_t count); + } + ) + + IF_ARCH_AARCH64( + namespace asimd + { + float h_sum(const float *src, size_t count); + } + ) + + typedef float (* h_sum_t)(const float *src, size_t count); +} + +PTEST_BEGIN("dsp.hmath", h_sum, 5, 10000) + + void call(const char *label, float *src, size_t count, h_sum_t func) + { + if (!PTEST_SUPPORTED(func)) + return; + + char buf[80]; + sprintf(buf, "%s x %d", label, int(count)); + printf("Testing %s numbers...\n", buf); + + PTEST_LOOP(buf, + func(src, count); + ); + } + + PTEST_MAIN + { + size_t buf_size = 1 << MAX_RANK; + uint8_t *data = NULL; + float *src = alloc_aligned(data, buf_size, 64); + + for (size_t i=0; i < buf_size; ++i) + src[i] = randf(0.0f, 1.0f); + + #define CALL(func) \ + call(#func, src, count, func) + + for (size_t i=MIN_RANK; i <= MAX_RANK; ++i) + { + size_t count = 1 << i; + + CALL(generic::h_sum); + IF_ARCH_X86(CALL(sse::h_sum)); + IF_ARCH_X86(CALL(avx::h_sum)); + IF_ARCH_ARM(CALL(neon_d32::h_sum)); + IF_ARCH_AARCH64(CALL(asimd::h_sum)); + PTEST_SEPARATOR; + } + + free_aligned(data); + } + +PTEST_END + diff --git a/src/test/utest/hmath/h_abs_sum.cpp b/src/test/utest/hmath/h_abs_sum.cpp new file mode 100644 index 00000000..872373ed --- /dev/null +++ b/src/test/utest/hmath/h_abs_sum.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov + * + * This file is part of lsp-dsp-lib + * Created on: 31 мар. 2020 г. + * + * lsp-dsp-lib is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * lsp-dsp-lib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with lsp-dsp-lib. If not, see . + */ + +#include +#include +#include +#include + +#ifdef ARCH_ARM + #define TOLERANCE 1e-3 +#endif + +#ifndef TOLERANCE + #define TOLERANCE 1e-4 +#endif + +namespace lsp +{ + namespace generic + { + float h_abs_sum(const float *src, size_t count); + } + + IF_ARCH_X86( + namespace sse + { + float h_abs_sum(const float *src, size_t count); + } + + namespace avx + { + float h_abs_sum(const float *src, size_t count); + } + ) + + IF_ARCH_ARM( + namespace neon_d32 + { + float h_abs_sum(const float *src, size_t count); + } + ) + + IF_ARCH_AARCH64( + namespace asimd + { + float h_abs_sum(const float *src, size_t count); + } + ) + + typedef float (* h_sum_t)(const float *src, size_t count); +} + +UTEST_BEGIN("dsp.hmath", h_abs_sum) + + void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2) + { + if (!UTEST_SUPPORTED(func1)) + return; + if (!UTEST_SUPPORTED(func2)) + return; + + UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 32, 64, 65, 100, 768, 999, 0x1fff) + { + for (size_t mask=0; mask <= 0x01; ++mask) + { + printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask)); + + FloatBuffer src(count, align, mask & 0x01); + src.randomize_sign(); + + // Call functions + float a = func1(src, count); + float b = func2(src, count); + + UTEST_ASSERT_MSG(src.valid(), "Source buffer corrupted"); + + // Compare buffers + if (!float_equals_adaptive(a, b, TOLERANCE)) + { + src.dump("src1"); + UTEST_FAIL_MSG("Result of function 1 (%f) differs result of function 2 (%f)", a, b); + } + } + } + } + + UTEST_MAIN + { + #define CALL(generic, func, align) \ + call(#func, align, generic, func); + + IF_ARCH_X86(CALL(generic::h_abs_sum, sse::h_abs_sum, 16)); + IF_ARCH_X86(CALL(generic::h_abs_sum, avx::h_abs_sum, 32)); + IF_ARCH_ARM(CALL(generic::h_abs_sum, neon_d32::h_abs_sum, 16)); + IF_ARCH_AARCH64(CALL(generic::h_abs_sum, asimd::h_abs_sum, 16)); + } +UTEST_END diff --git a/src/test/utest/hmath/hsum.cpp b/src/test/utest/hmath/h_sqr_sum.cpp similarity index 77% rename from src/test/utest/hmath/hsum.cpp rename to src/test/utest/hmath/h_sqr_sum.cpp index 5ee290a4..7979bd21 100644 --- a/src/test/utest/hmath/hsum.cpp +++ b/src/test/utest/hmath/h_sqr_sum.cpp @@ -36,25 +36,19 @@ namespace lsp { namespace generic { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } IF_ARCH_X86( namespace sse { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } namespace avx { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); float h_sqr_sum_fma3(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } ) @@ -70,16 +64,14 @@ namespace lsp IF_ARCH_AARCH64( namespace asimd { - float h_sum(const float *src, size_t count); float h_sqr_sum(const float *src, size_t count); - float h_abs_sum(const float *src, size_t count); } ) typedef float (* h_sum_t)(const float *src, size_t count); } -UTEST_BEGIN("dsp.hmath", hsum) +UTEST_BEGIN("dsp.hmath", h_sqr_sum) void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2) { @@ -119,21 +111,10 @@ UTEST_BEGIN("dsp.hmath", hsum) #define CALL(generic, func, align) \ call(#func, align, generic, func); - IF_ARCH_X86(CALL(generic::h_sum, sse::h_sum, 16)); IF_ARCH_X86(CALL(generic::h_sqr_sum, sse::h_sqr_sum, 16)); - IF_ARCH_X86(CALL(generic::h_abs_sum, sse::h_abs_sum, 16)); - - IF_ARCH_X86(CALL(generic::h_sum, avx::h_sum, 32)); IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum, 32)); IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum_fma3, 32)); - IF_ARCH_X86(CALL(generic::h_abs_sum, avx::h_abs_sum, 32)); - - IF_ARCH_ARM(CALL(generic::h_sum, neon_d32::h_sum, 16)); IF_ARCH_ARM(CALL(generic::h_sqr_sum, neon_d32::h_sqr_sum, 16)); - IF_ARCH_ARM(CALL(generic::h_abs_sum, neon_d32::h_abs_sum, 16)); - - IF_ARCH_AARCH64(CALL(generic::h_sum, asimd::h_sum, 16)); IF_ARCH_AARCH64(CALL(generic::h_sqr_sum, asimd::h_sqr_sum, 16)); - IF_ARCH_AARCH64(CALL(generic::h_abs_sum, asimd::h_abs_sum, 16)); } UTEST_END diff --git a/src/test/utest/hmath/h_sum.cpp b/src/test/utest/hmath/h_sum.cpp new file mode 100644 index 00000000..1d157aab --- /dev/null +++ b/src/test/utest/hmath/h_sum.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2023 Linux Studio Plugins Project + * (C) 2023 Vladimir Sadovnikov + * + * This file is part of lsp-dsp-lib + * Created on: 31 мар. 2020 г. + * + * lsp-dsp-lib is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * lsp-dsp-lib is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with lsp-dsp-lib. If not, see . + */ + +#include +#include +#include +#include + +#ifdef ARCH_ARM + #define TOLERANCE 1e-3 +#endif + +#ifndef TOLERANCE + #define TOLERANCE 1e-4 +#endif + +namespace lsp +{ + namespace generic + { + float h_sum(const float *src, size_t count); + } + + IF_ARCH_X86( + namespace sse + { + float h_sum(const float *src, size_t count); + } + + namespace avx + { + float h_sum(const float *src, size_t count); + } + ) + + IF_ARCH_ARM( + namespace neon_d32 + { + float h_sum(const float *src, size_t count); + } + ) + + IF_ARCH_AARCH64( + namespace asimd + { + float h_sum(const float *src, size_t count); + } + ) + + typedef float (* h_sum_t)(const float *src, size_t count); +} + +UTEST_BEGIN("dsp.hmath", h_sum) + + void call(const char *label, size_t align, h_sum_t func1, h_sum_t func2) + { + if (!UTEST_SUPPORTED(func1)) + return; + if (!UTEST_SUPPORTED(func2)) + return; + + UTEST_FOREACH(count, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 32, 64, 65, 100, 768, 999, 0x1fff) + { + for (size_t mask=0; mask <= 0x01; ++mask) + { + printf("Testing %s on input buffer of %d numbers, mask=0x%x...\n", label, int(count), int(mask)); + + FloatBuffer src(count, align, mask & 0x01); + src.randomize_sign(); + + // Call functions + float a = func1(src, count); + float b = func2(src, count); + + UTEST_ASSERT_MSG(src.valid(), "Source buffer corrupted"); + + // Compare buffers + if (!float_equals_adaptive(a, b, TOLERANCE)) + { + src.dump("src1"); + UTEST_FAIL_MSG("Result of function 1 (%f) differs result of function 2 (%f)", a, b); + } + } + } + } + + UTEST_MAIN + { + #define CALL(generic, func, align) \ + call(#func, align, generic, func); + + IF_ARCH_X86(CALL(generic::h_sum, sse::h_sum, 16)); + IF_ARCH_X86(CALL(generic::h_sum, avx::h_sum, 32)); + IF_ARCH_ARM(CALL(generic::h_sum, neon_d32::h_sum, 16)); + IF_ARCH_AARCH64(CALL(generic::h_sum, asimd::h_sum, 16)); + } +UTEST_END