diff --git a/include/private/dsp/arch/aarch64/asimd/pmath/abs_vv.h b/include/private/dsp/arch/aarch64/asimd/pmath/abs_vv.h
index d32c96c3..5624681f 100644
--- a/include/private/dsp/arch/aarch64/asimd/pmath/abs_vv.h
+++ b/include/private/dsp/arch/aarch64/asimd/pmath/abs_vv.h
@@ -1,6 +1,6 @@
/*
- * Copyright (C) 2020 Linux Studio Plugins Project
- * (C) 2020 Vladimir Sadovnikov
+ * Copyright (C) 2024 Linux Studio Plugins Project
+ * (C) 2024 Vladimir Sadovnikov
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
@@ -324,59 +324,87 @@ namespace lsp
void abs_add2(float *dst, const float *src, size_t count)
{
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV2_CORE("dst", "src", "fadd", OP_DSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_sub2(float *dst, const float *src, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV2_CORE("dst", "src", "fsub", OP_DSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_rsub2(float *dst, const float *src, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV2_CORE("dst", "src", "fsub", OP_RSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_mul2(float *dst, const float *src, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV2_CORE("dst", "src", "fmul", OP_DSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fadd", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_sub2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fsub", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_rsub2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fsub", OP_RSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_mul2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fmul", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_max2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fmax", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_min2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV2_CORE("dst", "src", "fmin", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
#undef OP_ABS_VV2_CORE
@@ -485,35 +513,35 @@ namespace lsp
__ASM_EMIT("bge 7b") \
__ASM_EMIT("8:")
- void abs_div2(float *dst, const float *src, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_DIV2_CORE("dst", "src", OP_DSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19",
- "v24", "v25", "v26", "v27"
- );
- }
-
- void abs_rdiv2(float *dst, const float *src, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_DIV2_CORE("dst", "src", OP_RSEL)
- : [dst] "+r" (dst), [src] "+r" (src),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19",
- "v24", "v25", "v26", "v27"
- );
- }
+ void abs_div2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_DIV2_CORE("dst", "src", OP_DSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19",
+ "v24", "v25", "v26", "v27"
+ );
+ }
+
+ void abs_rdiv2(float *dst, const float *src, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_DIV2_CORE("dst", "src", OP_RSEL)
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19",
+ "v24", "v25", "v26", "v27"
+ );
+ }
#undef OP_ABS_DIV2_CORE
@@ -622,61 +650,89 @@ namespace lsp
__ASM_EMIT("bge 9b") \
__ASM_EMIT("10:")
- void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV3_CORE("dst", "src1", "src2", "fadd", OP_DSEL)
- : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_sub3(float *dst, const float *src1, const float *src2, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV3_CORE("dst", "src1", "src2", "fsub", OP_DSEL)
- : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_rsub3(float *dst, const float *src1, const float *src2, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV3_CORE("dst", "src1", "src2", "fsub", OP_RSEL)
- : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
-
- void abs_mul3(float *dst, const float *src1, const float *src2, size_t count)
- {
- ARCH_AARCH64_ASM
- (
- OP_ABS_VV3_CORE("dst", "src1", "src2", "fmul", OP_DSEL)
- : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
- [count] "+r" (count)
- :
- : "cc", "memory",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
- }
+ void abs_add3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fadd", OP_DSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_sub3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fsub", OP_DSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_rsub3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fsub", OP_RSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_mul3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fmul", OP_DSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_max3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fmax", OP_DSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
+
+ void abs_min3(float *dst, const float *src1, const float *src2, size_t count)
+ {
+ ARCH_AARCH64_ASM
+ (
+ OP_ABS_VV3_CORE("dst", "src1", "src2", "fmin", OP_DSEL)
+ : [dst] "+r" (dst), [src1] "+r" (src1), [src2] "+r" (src2),
+ [count] "+r" (count)
+ :
+ : "cc", "memory",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"
+ );
+ }
#undef OP_ABS_VV3_CORE
@@ -823,7 +879,8 @@ namespace lsp
#undef OP_DSEL
#undef OP_RSEL
- }
-}
+
+ } /* namespace asimd */
+} /* namespace lsp */
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_PMATH_ABS_VV_H_ */
diff --git a/src/main/aarch64/aarch64.cpp b/src/main/aarch64/aarch64.cpp
index ae2b8b56..b21b23ad 100644
--- a/src/main/aarch64/aarch64.cpp
+++ b/src/main/aarch64/aarch64.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (C) 2020 Linux Studio Plugins Project
- * (C) 2020 Vladimir Sadovnikov
+ * Copyright (C) 2024 Linux Studio Plugins Project
+ * (C) 2024 Vladimir Sadovnikov
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
@@ -334,8 +334,8 @@ namespace lsp
// Initialize Advanced SIMD support
asimd::dsp_init(f);
}
- }
-}
+ } /* namespace aarch64 */
+} /* namespace lsp */
#endif /* ARCH_AARCH64 */
diff --git a/src/main/aarch64/asimd.cpp b/src/main/aarch64/asimd.cpp
index a5eac977..670073e9 100644
--- a/src/main/aarch64/asimd.cpp
+++ b/src/main/aarch64/asimd.cpp
@@ -264,6 +264,8 @@
EXPORT1(abs_mul2);
EXPORT1(abs_div2);
EXPORT1(abs_rdiv2);
+ EXPORT1(abs_max2);
+ EXPORT1(abs_min2);
EXPORT1(abs_add3);
EXPORT1(abs_sub3);
@@ -271,6 +273,8 @@
EXPORT1(abs_mul3);
EXPORT1(abs_div3);
EXPORT1(abs_rdiv3);
+ EXPORT1(abs_max3);
+ EXPORT1(abs_min3);
EXPORT1(h_sum);
EXPORT1(h_sqr_sum);