Skip to content

Commit

Permalink
Implementation of x86 abs_max and abs_min functions
Browse files Browse the repository at this point in the history
  • Loading branch information
sadko4u committed Nov 2, 2024
1 parent ad05221 commit 3ec2043
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 16 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*******************************************************************************

=== 1.0.28 ===

* Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions.

=== 1.0.27 ===
* Updated build scripts.
Expand Down
2 changes: 1 addition & 1 deletion include/private/dsp/arch/generic/pmath/abs_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ namespace lsp
void abs_min2(float *dst, const float *src, size_t count)
{
for (size_t i=0; i<count; ++i)
dst[i] = lsp_max(dst[i], fabsf(src[i]));
dst[i] = lsp_min(dst[i], fabsf(src[i]));
}

void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
Expand Down
72 changes: 68 additions & 4 deletions include/private/dsp/arch/x86/avx/pmath/abs_vv.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2020 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -213,6 +213,38 @@ namespace lsp
);
}

void x64_abs_max2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_V4_CORE("dst", "dst", "src", "vmax", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src] "r"(src),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9"
);
}

void x64_abs_min2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_V4_CORE("dst", "dst", "src", "vmin", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src] "r"(src),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9"
);
}

void x64_abs_add3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
Expand Down Expand Up @@ -309,6 +341,38 @@ namespace lsp
);
}

void x64_abs_max3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_V4_CORE("dst", "src1", "src2", "vmax", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9"
);
}

void x64_abs_min3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_V4_CORE("dst", "src1", "src2", "vmin", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
"%xmm8", "%xmm9"
);
}

#undef ABS_V4_CORE

#define ABS_CORE(DST, SRC) \
Expand Down Expand Up @@ -425,7 +489,7 @@ namespace lsp

#undef OP_DSEL
#undef OP_RSEL
}
}
} /* namespace avx */
} /* namespace lsp */

#endif /* PRIVATE_DSP_ARCH_X86_AVX_PMATH_ABS_VV_H_ */
64 changes: 62 additions & 2 deletions include/private/dsp/arch/x86/avx512/pmath/abs_vv.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 24 окт. 2023 г.
Expand Down Expand Up @@ -207,6 +207,36 @@ namespace lsp
);
}

void abs_max2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_OP_CORE("dst", "dst", "src", "vmax", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src] "r"(src),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_min2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_OP_CORE("dst", "dst", "src", "vmin", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src] "r"(src),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
Expand Down Expand Up @@ -297,6 +327,36 @@ namespace lsp
);
}

void abs_max3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_OP_CORE("dst", "src1", "src2", "vmax", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_min3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86_64(size_t off);
ARCH_X86_64_ASM
(
ABS_OP_CORE("dst", "src1", "src2", "vmin", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r"(dst), [src1] "r" (src1), [src2] "r" (src2),
[SIGN] "m" (abs_vv_const)
: "cc", "memory",
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

#undef ABS_OP_CORE

#define ABS_CORE(DST, SRC) \
Expand Down
64 changes: 60 additions & 4 deletions include/private/dsp/arch/x86/sse/pmath/abs_vv.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2020 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -303,6 +303,34 @@ namespace lsp
);
}

void abs_max2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86(size_t off);
ARCH_X86_ASM
(
ABS_OP2_CORE("max", "dst", "dst", "src", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r" (dst), [src] "r" (src),
[X_SIGN] "m" (abs_vv_const)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_min2(float *dst, const float *src, size_t count)
{
IF_ARCH_X86(size_t off);
ARCH_X86_ASM
(
ABS_OP2_CORE("min", "dst", "dst", "src", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r" (dst), [src] "r" (src),
[X_SIGN] "m" (abs_vv_const)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86(size_t off);
Expand Down Expand Up @@ -387,11 +415,39 @@ namespace lsp
);
}

void abs_max3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86(size_t off);
ARCH_X86_ASM
(
ABS_OP2_CORE("max", "dst", "src1", "src2", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r" (dst), [src1] "r" (src1), [src2] "r" (src2),
[X_SIGN] "m" (abs_vv_const)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

void abs_min3(float *dst, const float *src1, const float *src2, size_t count)
{
IF_ARCH_X86(size_t off);
ARCH_X86_ASM
(
ABS_OP2_CORE("min", "dst", "src1", "src2", OP_DSEL)
: [off] "=&r" (off), [count] "+r" (count)
: [dst] "r" (dst), [src1] "r" (src1), [src2] "r" (src2),
[X_SIGN] "m" (abs_vv_const)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
}

#undef ABS_OP2_CORE

#undef OP_DSEL
#undef OP_RSEL
}
}
} /* namespace sse */
} /* namespace lsp */

#endif /* PRIVATE_DSP_ARCH_X86_SSE_PMATH_ABS_VV_H_ */
4 changes: 4 additions & 0 deletions src/main/x86/avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,17 @@
CEXPORT2_X64(favx, abs_mul2, x64_abs_mul2);
CEXPORT2_X64(favx, abs_div2, x64_abs_div2);
CEXPORT2_X64(favx, abs_rdiv2, x64_abs_rdiv2);
CEXPORT2_X64(favx, abs_max2, x64_abs_max2);
CEXPORT2_X64(favx, abs_min2, x64_abs_min2);

CEXPORT2_X64(favx, abs_add3, x64_abs_add3);
CEXPORT2_X64(favx, abs_sub3, x64_abs_sub3);
CEXPORT2_X64(favx, abs_rsub3, x64_abs_rsub3);
CEXPORT2_X64(favx, abs_mul3, x64_abs_mul3);
CEXPORT2_X64(favx, abs_div3, x64_abs_div3);
CEXPORT2_X64(favx, abs_rdiv3, x64_abs_rdiv3);
CEXPORT2_X64(favx, abs_max3, x64_abs_max3);
CEXPORT2_X64(favx, abs_min3, x64_abs_min3);

CEXPORT2_X64(favx, abs1, x64_abs1);
CEXPORT2_X64(favx, abs2, x64_abs2);
Expand Down
4 changes: 4 additions & 0 deletions src/main/x86/avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,17 @@
CEXPORT1(vl, abs_mul2);
CEXPORT1(vl, abs_div2);
CEXPORT1(vl, abs_rdiv2);
CEXPORT1(vl, abs_max2);
CEXPORT1(vl, abs_min2);

CEXPORT1(vl, abs_add3);
CEXPORT1(vl, abs_sub3);
CEXPORT1(vl, abs_rsub3);
CEXPORT1(vl, abs_mul3);
CEXPORT1(vl, abs_div3);
CEXPORT1(vl, abs_rdiv3);
CEXPORT1(vl, abs_max3);
CEXPORT1(vl, abs_min3);

CEXPORT1(vl, exp1);
CEXPORT1(vl, exp2);
Expand Down
4 changes: 4 additions & 0 deletions src/main/x86/sse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,17 @@
EXPORT1(abs_mul2);
EXPORT1(abs_div2);
EXPORT1(abs_rdiv2);
EXPORT1(abs_max2);
EXPORT1(abs_min2);

EXPORT1(abs_add3);
EXPORT1(abs_sub3);
EXPORT1(abs_rsub3);
EXPORT1(abs_mul3);
EXPORT1(abs_div3);
EXPORT1(abs_rdiv3);
EXPORT1(abs_max3);
EXPORT1(abs_min3);

EXPORT1(min);
EXPORT1(max);
Expand Down
Loading

0 comments on commit 3ec2043

Please sign in to comment.