diff --git a/CHANGELOG b/CHANGELOG index 1ea096fc..fde4dd16 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,7 +3,7 @@ ******************************************************************************* === 1.0.28 === - +* Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions. === 1.0.27 === * Updated build scripts. diff --git a/include/private/dsp/arch/generic/pmath/abs_vv.h b/include/private/dsp/arch/generic/pmath/abs_vv.h index 725aeba1..6dcf2752 100644 --- a/include/private/dsp/arch/generic/pmath/abs_vv.h +++ b/include/private/dsp/arch/generic/pmath/abs_vv.h @@ -87,7 +87,7 @@ namespace lsp void abs_min2(float *dst, const float *src, size_t count) { for (size_t i=0; i - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2024 Linux Studio Plugins Project + * (C) 2024 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -213,6 +213,38 @@ namespace lsp ); } + void x64_abs_max2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_V4_CORE("dst", "dst", "src", "vmax", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src] "r"(src), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9" + ); + } + + void x64_abs_min2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_V4_CORE("dst", "dst", "src", "vmin", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src] "r"(src), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9" + ); + } + void x64_abs_add3(float *dst, const float *src1, const float *src2, size_t count) { IF_ARCH_X86_64(size_t off); @@ -309,6 +341,38 @@ namespace lsp ); } + void x64_abs_max3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_V4_CORE("dst", "src1", "src2", "vmax", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9" + ); + } + + void x64_abs_min3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_V4_CORE("dst", "src1", "src2", "vmin", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9" + ); + } + #undef ABS_V4_CORE #define ABS_CORE(DST, SRC) \ @@ -425,7 +489,7 @@ namespace lsp #undef OP_DSEL #undef OP_RSEL - } -} + } /* namespace avx */ +} /* namespace lsp */ #endif /* PRIVATE_DSP_ARCH_X86_AVX_PMATH_ABS_VV_H_ */ diff --git a/include/private/dsp/arch/x86/avx512/pmath/abs_vv.h b/include/private/dsp/arch/x86/avx512/pmath/abs_vv.h index 5bc453cb..c96d1e56 100644 --- a/include/private/dsp/arch/x86/avx512/pmath/abs_vv.h +++ b/include/private/dsp/arch/x86/avx512/pmath/abs_vv.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2023 Linux Studio Plugins Project - * (C) 2023 Vladimir Sadovnikov + * Copyright (C) 2024 Linux Studio Plugins Project + * (C) 2024 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 24 окт. 2023 г. @@ -207,6 +207,36 @@ namespace lsp ); } + void abs_max2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_OP_CORE("dst", "dst", "src", "vmax", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src] "r"(src), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + + void abs_min2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_OP_CORE("dst", "dst", "src", "vmin", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src] "r"(src), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + void abs_add3(float *dst, const float *src1, const float *src2, size_t count) { IF_ARCH_X86_64(size_t off); @@ -297,6 +327,36 @@ namespace lsp ); } + void abs_max3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_OP_CORE("dst", "src1", "src2", "vmax", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + + void abs_min3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86_64(size_t off); + ARCH_X86_64_ASM + ( + ABS_OP_CORE("dst", "src1", "src2", "vmin", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2), + [SIGN] "m" (abs_vv_const) + : "cc", "memory", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + #undef ABS_OP_CORE #define ABS_CORE(DST, SRC) \ diff --git a/include/private/dsp/arch/x86/sse/pmath/abs_vv.h b/include/private/dsp/arch/x86/sse/pmath/abs_vv.h index 25e44bbf..cc71ab92 100644 --- a/include/private/dsp/arch/x86/sse/pmath/abs_vv.h +++ b/include/private/dsp/arch/x86/sse/pmath/abs_vv.h @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2024 Linux Studio Plugins Project + * (C) 2024 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -303,6 +303,34 @@ namespace lsp ); } + void abs_max2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86(size_t off); + ARCH_X86_ASM + ( + ABS_OP2_CORE("max", "dst", "dst", "src", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r" (dst), [src] "r" (src), + [X_SIGN] "m" (abs_vv_const) + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + + void abs_min2(float *dst, const float *src, size_t count) + { + IF_ARCH_X86(size_t off); + ARCH_X86_ASM + ( + ABS_OP2_CORE("min", "dst", "dst", "src", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r" (dst), [src] "r" (src), + [X_SIGN] "m" (abs_vv_const) + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + void abs_add3(float *dst, const float *src1, const float *src2, size_t count) { IF_ARCH_X86(size_t off); @@ -387,11 +415,39 @@ namespace lsp ); } + void abs_max3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86(size_t off); + ARCH_X86_ASM + ( + ABS_OP2_CORE("max", "dst", "src1", "src2", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r" (dst), [src1] "r" (src1), [src2] "r" (src2), + [X_SIGN] "m" (abs_vv_const) + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + + void abs_min3(float *dst, const float *src1, const float *src2, size_t count) + { + IF_ARCH_X86(size_t off); + ARCH_X86_ASM + ( + ABS_OP2_CORE("min", "dst", "src1", "src2", OP_DSEL) + : [off] "=&r" (off), [count] "+r" (count) + : [dst] "r" (dst), [src1] "r" (src1), [src2] "r" (src2), + [X_SIGN] "m" (abs_vv_const) + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); + } + #undef ABS_OP2_CORE #undef OP_DSEL #undef OP_RSEL - } -} + } /* namespace sse */ +} /* namespace lsp */ #endif /* PRIVATE_DSP_ARCH_X86_SSE_PMATH_ABS_VV_H_ */ diff --git a/src/main/x86/avx.cpp b/src/main/x86/avx.cpp index 54e517a7..b6fb180c 100644 --- a/src/main/x86/avx.cpp +++ b/src/main/x86/avx.cpp @@ -258,6 +258,8 @@ CEXPORT2_X64(favx, abs_mul2, x64_abs_mul2); CEXPORT2_X64(favx, abs_div2, x64_abs_div2); CEXPORT2_X64(favx, abs_rdiv2, x64_abs_rdiv2); + CEXPORT2_X64(favx, abs_max2, x64_abs_max2); + CEXPORT2_X64(favx, abs_min2, x64_abs_min2); CEXPORT2_X64(favx, abs_add3, x64_abs_add3); CEXPORT2_X64(favx, abs_sub3, x64_abs_sub3); @@ -265,6 +267,8 @@ CEXPORT2_X64(favx, abs_mul3, x64_abs_mul3); CEXPORT2_X64(favx, abs_div3, x64_abs_div3); CEXPORT2_X64(favx, abs_rdiv3, x64_abs_rdiv3); + CEXPORT2_X64(favx, abs_max3, x64_abs_max3); + CEXPORT2_X64(favx, abs_min3, x64_abs_min3); CEXPORT2_X64(favx, abs1, x64_abs1); CEXPORT2_X64(favx, abs2, x64_abs2); diff --git a/src/main/x86/avx512.cpp b/src/main/x86/avx512.cpp index cd89d334..9cfa8a67 100644 --- a/src/main/x86/avx512.cpp +++ b/src/main/x86/avx512.cpp @@ -95,6 +95,8 @@ CEXPORT1(vl, abs_mul2); CEXPORT1(vl, abs_div2); CEXPORT1(vl, abs_rdiv2); + CEXPORT1(vl, abs_max2); + CEXPORT1(vl, abs_min2); CEXPORT1(vl, abs_add3); CEXPORT1(vl, abs_sub3); @@ -102,6 +104,8 @@ CEXPORT1(vl, abs_mul3); CEXPORT1(vl, abs_div3); CEXPORT1(vl, abs_rdiv3); + CEXPORT1(vl, abs_max3); + CEXPORT1(vl, abs_min3); CEXPORT1(vl, exp1); CEXPORT1(vl, exp2); diff --git a/src/main/x86/sse.cpp b/src/main/x86/sse.cpp index 7211c429..d680730f 100644 --- a/src/main/x86/sse.cpp +++ b/src/main/x86/sse.cpp @@ -157,6 +157,8 @@ EXPORT1(abs_mul2); EXPORT1(abs_div2); EXPORT1(abs_rdiv2); + EXPORT1(abs_max2); + EXPORT1(abs_min2); EXPORT1(abs_add3); EXPORT1(abs_sub3); @@ -164,6 +166,8 @@ EXPORT1(abs_mul3); EXPORT1(abs_div3); EXPORT1(abs_rdiv3); + EXPORT1(abs_max3); + EXPORT1(abs_min3); EXPORT1(min); EXPORT1(max); diff --git a/src/test/ptest/pmath/abs_op2.cpp b/src/test/ptest/pmath/abs_op2.cpp index f3eb3608..e205ba97 100644 --- a/src/test/ptest/pmath/abs_op2.cpp +++ b/src/test/ptest/pmath/abs_op2.cpp @@ -38,6 +38,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } IF_ARCH_X86( @@ -49,6 +51,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } namespace avx512 @@ -59,6 +63,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) @@ -71,6 +77,8 @@ namespace lsp void x64_abs_mul2(float *dst, const float *src, size_t count); void x64_abs_div2(float *dst, const float *src, size_t count); void x64_abs_rdiv2(float *dst, const float *src, size_t count); + void x64_abs_max2(float *dst, const float *src, size_t count); + void x64_abs_min2(float *dst, const float *src, size_t count); } ) @@ -83,6 +91,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) @@ -95,6 +105,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) @@ -183,6 +195,22 @@ PTEST_BEGIN("dsp.pmath", abs_op2, 5, 1000) IF_ARCH_X86(CALL(avx512::abs_rdiv2)); IF_ARCH_ARM(CALL(neon_d32::abs_rdiv2)); IF_ARCH_AARCH64(CALL(asimd::abs_rdiv2)); + PTEST_SEPARATOR; + + CALL(generic::abs_max2); + IF_ARCH_X86(CALL(sse::abs_max2)); + IF_ARCH_X86_64(CALL(avx::x64_abs_max2)); + IF_ARCH_X86(CALL(avx512::abs_max2)); + IF_ARCH_ARM(CALL(neon_d32::abs_max2)); + IF_ARCH_AARCH64(CALL(asimd::abs_max2)); + PTEST_SEPARATOR; + + CALL(generic::abs_min2); + IF_ARCH_X86(CALL(sse::abs_min2)); + IF_ARCH_X86_64(CALL(avx::x64_abs_min2)); + IF_ARCH_X86(CALL(avx512::abs_min2)); + IF_ARCH_ARM(CALL(neon_d32::abs_min2)); + IF_ARCH_AARCH64(CALL(asimd::abs_min2)); PTEST_SEPARATOR2; } diff --git a/src/test/ptest/pmath/abs_op3.cpp b/src/test/ptest/pmath/abs_op3.cpp index c2a0e042..53a2020b 100644 --- a/src/test/ptest/pmath/abs_op3.cpp +++ b/src/test/ptest/pmath/abs_op3.cpp @@ -38,6 +38,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } IF_ARCH_X86( @@ -49,6 +51,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } namespace avx512 @@ -59,6 +63,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -71,6 +77,8 @@ namespace lsp void x64_abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void x64_abs_div3(float *dst, const float *src1, const float *src2, size_t count); void x64_abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void x64_abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void x64_abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -83,6 +91,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -95,6 +105,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -183,6 +195,22 @@ PTEST_BEGIN("dsp.pmath", abs_op3, 5, 1000) IF_ARCH_X86(CALL(avx512::abs_rdiv3)); IF_ARCH_ARM(CALL(neon_d32::abs_rdiv3)); IF_ARCH_AARCH64(CALL(asimd::abs_rdiv3)); + PTEST_SEPARATOR; + + CALL(generic::abs_max3); + IF_ARCH_X86(CALL(sse::abs_max3)); + IF_ARCH_X86_64(CALL(avx::x64_abs_max3)); + IF_ARCH_X86(CALL(avx512::abs_max3)); + IF_ARCH_ARM(CALL(neon_d32::abs_max3)); + IF_ARCH_AARCH64(CALL(asimd::abs_max3)); + PTEST_SEPARATOR; + + CALL(generic::abs_min3); + IF_ARCH_X86(CALL(sse::abs_min3)); + IF_ARCH_X86_64(CALL(avx::x64_abs_min3)); + IF_ARCH_X86(CALL(avx512::abs_min3)); + IF_ARCH_ARM(CALL(neon_d32::abs_min3)); + IF_ARCH_AARCH64(CALL(asimd::abs_min3)); PTEST_SEPARATOR2; } diff --git a/src/test/utest/pmath/abs_op2.cpp b/src/test/utest/pmath/abs_op2.cpp index c30da2f1..46e51bf3 100644 --- a/src/test/utest/pmath/abs_op2.cpp +++ b/src/test/utest/pmath/abs_op2.cpp @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2024 Linux Studio Plugins Project + * (C) 2024 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -36,6 +36,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } IF_ARCH_X86( @@ -47,6 +49,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } namespace avx512 @@ -57,6 +61,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) @@ -69,6 +75,8 @@ namespace lsp void x64_abs_mul2(float *dst, const float *src, size_t count); void x64_abs_div2(float *dst, const float *src, size_t count); void x64_abs_rdiv2(float *dst, const float *src, size_t count); + void x64_abs_max2(float *dst, const float *src, size_t count); + void x64_abs_min2(float *dst, const float *src, size_t count); } ) @@ -81,6 +89,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) @@ -93,6 +103,8 @@ namespace lsp void abs_mul2(float *dst, const float *src, size_t count); void abs_div2(float *dst, const float *src, size_t count); void abs_rdiv2(float *dst, const float *src, size_t count); + void abs_max2(float *dst, const float *src, size_t count); + void abs_min2(float *dst, const float *src, size_t count); } ) } @@ -159,6 +171,8 @@ UTEST_BEGIN("dsp.pmath", abs_op2) IF_ARCH_X86(CALL(generic::abs_mul2, sse::abs_mul2, 16)); IF_ARCH_X86(CALL(generic::abs_div2, sse::abs_div2, 16)); IF_ARCH_X86(CALL(generic::abs_rdiv2, sse::abs_rdiv2, 16)); + IF_ARCH_X86(CALL(generic::abs_max2, sse::abs_max2, 16)); + IF_ARCH_X86(CALL(generic::abs_min2, sse::abs_min2, 16)); IF_ARCH_X86_64(CALL(generic::abs_add2, avx::x64_abs_add2, 32)); IF_ARCH_X86_64(CALL(generic::abs_sub2, avx::x64_abs_sub2, 32)); @@ -166,6 +180,8 @@ UTEST_BEGIN("dsp.pmath", abs_op2) IF_ARCH_X86_64(CALL(generic::abs_mul2, avx::x64_abs_mul2, 32)); IF_ARCH_X86_64(CALL(generic::abs_div2, avx::x64_abs_div2, 32)); IF_ARCH_X86_64(CALL(generic::abs_rdiv2, avx::x64_abs_rdiv2, 32)); + IF_ARCH_X86_64(CALL(generic::abs_max2, avx::x64_abs_max2, 32)); + IF_ARCH_X86_64(CALL(generic::abs_min2, avx::x64_abs_min2, 32)); IF_ARCH_X86(CALL(generic::abs_add2, avx512::abs_add2, 64)); IF_ARCH_X86(CALL(generic::abs_sub2, avx512::abs_sub2, 64)); @@ -173,6 +189,8 @@ UTEST_BEGIN("dsp.pmath", abs_op2) IF_ARCH_X86(CALL(generic::abs_mul2, avx512::abs_mul2, 64)); IF_ARCH_X86(CALL(generic::abs_div2, avx512::abs_div2, 64)); IF_ARCH_X86(CALL(generic::abs_rdiv2, avx512::abs_rdiv2, 64)); + IF_ARCH_X86(CALL(generic::abs_max2, avx512::abs_max2, 64)); + IF_ARCH_X86(CALL(generic::abs_min2, avx512::abs_min2, 64)); IF_ARCH_ARM(CALL(generic::abs_add2, neon_d32::abs_add2, 16)); IF_ARCH_ARM(CALL(generic::abs_sub2, neon_d32::abs_sub2, 16)); @@ -180,6 +198,8 @@ UTEST_BEGIN("dsp.pmath", abs_op2) IF_ARCH_ARM(CALL(generic::abs_mul2, neon_d32::abs_mul2, 16)); IF_ARCH_ARM(CALL(generic::abs_div2, neon_d32::abs_div2, 16)); IF_ARCH_ARM(CALL(generic::abs_rdiv2, neon_d32::abs_rdiv2, 16)); + IF_ARCH_ARM(CALL(generic::abs_max2, neon_d32::abs_max2, 16)); + IF_ARCH_ARM(CALL(generic::abs_min2, neon_d32::abs_min2, 16)); IF_ARCH_AARCH64(CALL(generic::abs_add2, asimd::abs_add2, 16)); IF_ARCH_AARCH64(CALL(generic::abs_sub2, asimd::abs_sub2, 16)); @@ -187,6 +207,8 @@ UTEST_BEGIN("dsp.pmath", abs_op2) IF_ARCH_AARCH64(CALL(generic::abs_mul2, asimd::abs_mul2, 16)); IF_ARCH_AARCH64(CALL(generic::abs_div2, asimd::abs_div2, 16)); IF_ARCH_AARCH64(CALL(generic::abs_rdiv2, asimd::abs_rdiv2, 16)); + IF_ARCH_AARCH64(CALL(generic::abs_max2, asimd::abs_max2, 16)); + IF_ARCH_AARCH64(CALL(generic::abs_min2, asimd::abs_min2, 16)); } UTEST_END diff --git a/src/test/utest/pmath/abs_op3.cpp b/src/test/utest/pmath/abs_op3.cpp index 19439828..91713d40 100644 --- a/src/test/utest/pmath/abs_op3.cpp +++ b/src/test/utest/pmath/abs_op3.cpp @@ -1,6 +1,6 @@ /* - * Copyright (C) 2020 Linux Studio Plugins Project - * (C) 2020 Vladimir Sadovnikov + * Copyright (C) 2024 Linux Studio Plugins Project + * (C) 2024 Vladimir Sadovnikov * * This file is part of lsp-dsp-lib * Created on: 31 мар. 2020 г. @@ -36,6 +36,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } IF_ARCH_X86( @@ -47,6 +49,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } namespace avx512 @@ -57,6 +61,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -69,6 +75,8 @@ namespace lsp void x64_abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void x64_abs_div3(float *dst, const float *src1, const float *src2, size_t count); void x64_abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void x64_abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void x64_abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -81,6 +89,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) @@ -93,6 +103,8 @@ namespace lsp void abs_mul3(float *dst, const float *src1, const float *src2, size_t count); void abs_div3(float *dst, const float *src1, const float *src2, size_t count); void abs_rdiv3(float *dst, const float *src1, const float *src2, size_t count); + void abs_max3(float *dst, const float *src1, const float *src2, size_t count); + void abs_min3(float *dst, const float *src1, const float *src2, size_t count); } ) } @@ -158,6 +170,8 @@ UTEST_BEGIN("dsp.pmath", abs_op3) IF_ARCH_X86(CALL(generic::abs_mul3, sse::abs_mul3, 16)); IF_ARCH_X86(CALL(generic::abs_div3, sse::abs_div3, 16)); IF_ARCH_X86(CALL(generic::abs_rdiv3, sse::abs_rdiv3, 16)); + IF_ARCH_X86(CALL(generic::abs_max3, sse::abs_max3, 16)); + IF_ARCH_X86(CALL(generic::abs_min3, sse::abs_min3, 16)); IF_ARCH_X86_64(CALL(generic::abs_add3, avx::x64_abs_add3, 32)); IF_ARCH_X86_64(CALL(generic::abs_sub3, avx::x64_abs_sub3, 32)); @@ -165,6 +179,8 @@ UTEST_BEGIN("dsp.pmath", abs_op3) IF_ARCH_X86_64(CALL(generic::abs_mul3, avx::x64_abs_mul3, 32)); IF_ARCH_X86_64(CALL(generic::abs_div3, avx::x64_abs_div3, 32)); IF_ARCH_X86_64(CALL(generic::abs_rdiv3, avx::x64_abs_rdiv3, 32)); + IF_ARCH_X86_64(CALL(generic::abs_max3, avx::x64_abs_max3, 32)); + IF_ARCH_X86_64(CALL(generic::abs_min3, avx::x64_abs_min3, 32)); IF_ARCH_X86(CALL(generic::abs_add3, avx512::abs_add3, 64)); IF_ARCH_X86(CALL(generic::abs_sub3, avx512::abs_sub3, 64)); @@ -172,6 +188,8 @@ UTEST_BEGIN("dsp.pmath", abs_op3) IF_ARCH_X86(CALL(generic::abs_mul3, avx512::abs_mul3, 64)); IF_ARCH_X86(CALL(generic::abs_div3, avx512::abs_div3, 64)); IF_ARCH_X86(CALL(generic::abs_rdiv3, avx512::abs_rdiv3, 64)); + IF_ARCH_X86(CALL(generic::abs_max3, avx512::abs_max3, 64)); + IF_ARCH_X86(CALL(generic::abs_min3, avx512::abs_min3, 64)); IF_ARCH_ARM(CALL(generic::abs_add3, neon_d32::abs_add3, 16)); IF_ARCH_ARM(CALL(generic::abs_sub3, neon_d32::abs_sub3, 16)); @@ -179,6 +197,8 @@ UTEST_BEGIN("dsp.pmath", abs_op3) IF_ARCH_ARM(CALL(generic::abs_mul3, neon_d32::abs_mul3, 16)); IF_ARCH_ARM(CALL(generic::abs_div3, neon_d32::abs_div3, 16)); IF_ARCH_ARM(CALL(generic::abs_rdiv3, neon_d32::abs_rdiv3, 16)); + IF_ARCH_ARM(CALL(generic::abs_max3, neon_d32::abs_max3, 16)); + IF_ARCH_ARM(CALL(generic::abs_min3, neon_d32::abs_min3, 16)); IF_ARCH_AARCH64(CALL(generic::abs_add3, asimd::abs_add3, 16)); IF_ARCH_AARCH64(CALL(generic::abs_sub3, asimd::abs_sub3, 16)); @@ -186,6 +206,8 @@ UTEST_BEGIN("dsp.pmath", abs_op3) IF_ARCH_AARCH64(CALL(generic::abs_mul3, asimd::abs_mul3, 16)); IF_ARCH_AARCH64(CALL(generic::abs_div3, asimd::abs_div3, 16)); IF_ARCH_AARCH64(CALL(generic::abs_rdiv3, asimd::abs_rdiv3, 16)); + IF_ARCH_AARCH64(CALL(generic::abs_max3, asimd::abs_max3, 16)); + IF_ARCH_AARCH64(CALL(generic::abs_min3, asimd::abs_min3, 16)); } UTEST_END