-
-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Split unit tests and performance tests, some AMD-related optimizations
- Loading branch information
Showing
11 changed files
with
507 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* | ||
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2020 Vladimir Sadovnikov <[email protected]> | ||
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2023 Vladimir Sadovnikov <[email protected]> | ||
* | ||
* This file is part of lsp-dsp-lib | ||
* Created on: 31 мар. 2020 г. | ||
|
@@ -116,12 +116,18 @@ | |
|
||
//------------------------------------------------------------------------- | ||
// Different processor families | ||
|
||
#define INTEL_FAMILY_686_CORE 0x06 | ||
|
||
#define AMD_FAMILY_K8_HAMMER 0x0f | ||
#define AMD_FAMILY_K10 0x10 | ||
#define AMD_FAMILY_BOBCAT 0x14 | ||
#define AMD_FAMILY_BULLDOZER 0x15 | ||
#define AMD_FAMILY_JAGUAR 0x16 | ||
#define AMD_FAMILY_ZEN_1_2 0x17 | ||
#define AMD_FAMILY_DHYANA 0x18 | ||
#define AMD_FAMILY_ZEN_3_4 0x19 | ||
#define AMD_FAMILY_ZEN_5 0x1a | ||
|
||
#define AMD_MODEL_ZEN_2 0x31 | ||
|
||
|
@@ -191,8 +197,9 @@ namespace lsp | |
} | ||
|
||
uint64_t read_xcr(umword_t xcr_id); | ||
} | ||
} | ||
|
||
} /* namespace x86 */ | ||
} /* namespace lsp */ | ||
|
||
|
||
#endif /* PRIVATE_DSP_ARCH_X86_CPUID_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* | ||
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2020 Vladimir Sadovnikov <[email protected]> | ||
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2023 Vladimir Sadovnikov <[email protected]> | ||
* | ||
* This file is part of lsp-dsp-lib | ||
* Created on: 31 мар. 2020 г. | ||
|
@@ -127,8 +127,9 @@ | |
|
||
// This routine sucks on AMD Bulldozer processor family but is pretty great on Intel | ||
// Not tested on AMD Processors above Bulldozer family | ||
bool favx = feature_check(f, FEAT_FAST_AVX); | ||
bool ffma = favx && feature_check(f, FEAT_FAST_FMA3); | ||
bool favx = feature_check(f, FEAT_FAST_AVX); | ||
bool ffma = favx && feature_check(f, FEAT_FAST_FMA3); | ||
bool below_zen3 = feature_check(f, FEAT_BELOW_ZEN3); | ||
|
||
CEXPORT2_X64(favx, reverse1, reverse1); | ||
CEXPORT2_X64(favx, reverse2, reverse2); | ||
|
@@ -448,7 +449,10 @@ | |
CEXPORT2(favx, pcomplex_rdiv2, pcomplex_rdiv2_fma3); | ||
CEXPORT2(favx, pcomplex_div3, pcomplex_div3_fma3); | ||
|
||
CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3); | ||
if (!below_zen3) | ||
{ | ||
CEXPORT2(favx, h_sqr_sum, h_sqr_sum_fma3); | ||
} | ||
|
||
CEXPORT2(favx, direct_fft, direct_fft_fma3); | ||
CEXPORT2(favx, reverse_fft, reverse_fft_fma3); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/* | ||
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2023 Vladimir Sadovnikov <[email protected]> | ||
* | ||
* This file is part of lsp-dsp-lib | ||
* Created on: 31 мар. 2020 г. | ||
* | ||
* lsp-dsp-lib is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* any later version. | ||
* | ||
* lsp-dsp-lib is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include <lsp-plug.in/dsp/dsp.h> | ||
#include <lsp-plug.in/test-fw/ptest.h> | ||
#include <lsp-plug.in/test-fw/helpers.h> | ||
#include <lsp-plug.in/common/alloc.h> | ||
|
||
#define MIN_RANK 8 | ||
#define MAX_RANK 16 | ||
|
||
namespace lsp | ||
{ | ||
namespace generic | ||
{ | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
|
||
IF_ARCH_X86( | ||
namespace sse | ||
{ | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
|
||
namespace avx | ||
{ | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
IF_ARCH_ARM( | ||
namespace neon_d32 | ||
{ | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
IF_ARCH_AARCH64( | ||
namespace asimd | ||
{ | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
typedef float (* h_sum_t)(const float *src, size_t count); | ||
} | ||
|
||
PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) | ||
|
||
void call(const char *label, float *src, size_t count, h_sum_t func) | ||
{ | ||
if (!PTEST_SUPPORTED(func)) | ||
return; | ||
|
||
char buf[80]; | ||
sprintf(buf, "%s x %d", label, int(count)); | ||
printf("Testing %s numbers...\n", buf); | ||
|
||
PTEST_LOOP(buf, | ||
func(src, count); | ||
); | ||
} | ||
|
||
PTEST_MAIN | ||
{ | ||
size_t buf_size = 1 << MAX_RANK; | ||
uint8_t *data = NULL; | ||
float *src = alloc_aligned<float>(data, buf_size, 64); | ||
|
||
for (size_t i=0; i < buf_size; ++i) | ||
src[i] = randf(0.0f, 1.0f); | ||
|
||
#define CALL(func) \ | ||
call(#func, src, count, func) | ||
|
||
for (size_t i=MIN_RANK; i <= MAX_RANK; ++i) | ||
{ | ||
size_t count = 1 << i; | ||
|
||
CALL(generic::h_abs_sum); | ||
IF_ARCH_X86(CALL(sse::h_abs_sum)); | ||
IF_ARCH_X86(CALL(avx::h_abs_sum)); | ||
IF_ARCH_ARM(CALL(neon_d32::h_abs_sum)); | ||
IF_ARCH_AARCH64(CALL(asimd::h_abs_sum)); | ||
PTEST_SEPARATOR; | ||
} | ||
|
||
free_aligned(data); | ||
} | ||
|
||
PTEST_END | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* | ||
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2020 Vladimir Sadovnikov <[email protected]> | ||
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/> | ||
* (C) 2023 Vladimir Sadovnikov <[email protected]> | ||
* | ||
* This file is part of lsp-dsp-lib | ||
* Created on: 31 мар. 2020 г. | ||
|
@@ -31,50 +31,40 @@ namespace lsp | |
{ | ||
namespace generic | ||
{ | ||
float h_sum(const float *src, size_t count); | ||
float h_sqr_sum(const float *src, size_t count); | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
|
||
IF_ARCH_X86( | ||
namespace sse | ||
{ | ||
float h_sum(const float *src, size_t count); | ||
float h_sqr_sum(const float *src, size_t count); | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
|
||
namespace avx | ||
{ | ||
float h_sum(const float *src, size_t count); | ||
float h_sqr_sum(const float *src, size_t count); | ||
float h_sqr_sum_fma3(const float *src, size_t count); | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
IF_ARCH_ARM( | ||
namespace neon_d32 | ||
{ | ||
float h_sum(const float *src, size_t count); | ||
float h_sqr_sum(const float *src, size_t count); | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
IF_ARCH_AARCH64( | ||
namespace asimd | ||
{ | ||
float h_sum(const float *src, size_t count); | ||
float h_sqr_sum(const float *src, size_t count); | ||
float h_abs_sum(const float *src, size_t count); | ||
} | ||
) | ||
|
||
typedef float (* h_sum_t)(const float *src, size_t count); | ||
} | ||
|
||
PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) | ||
PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 10000) | ||
|
||
void call(const char *label, float *src, size_t count, h_sum_t func) | ||
{ | ||
|
@@ -106,27 +96,13 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000) | |
{ | ||
size_t count = 1 << i; | ||
|
||
CALL(generic::h_sum); | ||
IF_ARCH_X86(CALL(sse::h_sum)); | ||
IF_ARCH_X86(CALL(avx::h_sum)); | ||
IF_ARCH_ARM(CALL(neon_d32::h_sum)); | ||
IF_ARCH_AARCH64(CALL(asimd::h_sum)); | ||
PTEST_SEPARATOR; | ||
|
||
CALL(generic::h_sqr_sum); | ||
IF_ARCH_X86(CALL(sse::h_sqr_sum)); | ||
IF_ARCH_X86(CALL(avx::h_sqr_sum)); | ||
IF_ARCH_X86(CALL(avx::h_sqr_sum_fma3)); | ||
IF_ARCH_ARM(CALL(neon_d32::h_sqr_sum)); | ||
IF_ARCH_AARCH64(CALL(asimd::h_sqr_sum)); | ||
PTEST_SEPARATOR; | ||
|
||
CALL(generic::h_abs_sum); | ||
IF_ARCH_X86(CALL(sse::h_abs_sum)); | ||
IF_ARCH_X86(CALL(avx::h_abs_sum)); | ||
IF_ARCH_ARM(CALL(neon_d32::h_abs_sum)); | ||
IF_ARCH_AARCH64(CALL(asimd::h_abs_sum)); | ||
PTEST_SEPARATOR2; | ||
} | ||
|
||
free_aligned(data); | ||
|
Oops, something went wrong.