Skip to content

Commit

Permalink
Implemented SSE-optimized pcomplex_r2c functions
Browse files Browse the repository at this point in the history
  • Loading branch information
sadko4u committed Aug 9, 2023
1 parent b2dc99f commit 8aa9fd5
Show file tree
Hide file tree
Showing 15 changed files with 1,836 additions and 1 deletion.
2 changes: 1 addition & 1 deletion include/private/dsp/arch/generic/pcomplex.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ namespace lsp
{
for (size_t i=0; i<count; ++i)
{
float k = src[0] / (dst[0] * dst[0] + dst[1] * dst[1]);
float k = src[i] / (dst[0] * dst[0] + dst[1] * dst[1]);

dst[0] = k * dst[0];
dst[1] = -k * dst[1];
Expand Down
412 changes: 412 additions & 0 deletions include/private/dsp/arch/x86/sse/pcomplex.h

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/main/x86/sse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@
// EXPORT1(complex_cvt2modarg);
// EXPORT1(complex_cvt2reim);

EXPORT1(pcomplex_r2c_add2);
EXPORT1(pcomplex_r2c_sub2);
EXPORT1(pcomplex_r2c_rsub2);
EXPORT1(pcomplex_r2c_mul2);
EXPORT1(pcomplex_r2c_div2);
EXPORT1(pcomplex_r2c_rdiv2);

EXPORT1(lr_to_ms);
EXPORT1(lr_to_mid);
EXPORT1(lr_to_side);
Expand Down
120 changes: 120 additions & 0 deletions src/test/ptest/pcomplex/r2c_add.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 9 авг. 2023 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 6
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
void pcomplex_r2c_add2(float *dst, const float *src, size_t count);
}

IF_ARCH_X86(
namespace sse
{
void pcomplex_r2c_add2(float *dst, const float *src, size_t count);
}

// namespace avx
// {
// void pcomplex_r2c_add2(float *dst, const float *src, size_t count);
// }
)

// IF_ARCH_ARM(
// namespace neon_d32
// {
// void pcomplex_r2c_add2(float *dst, const float *src, size_t count);
// }
// )

// IF_ARCH_AARCH64(
// namespace asimd
// {
// void pcomplex_r2c_add2(float *dst, const float *src, size_t count);
// }
// )

typedef void (* pcomplex_r2c_op2_t)(float *dst, const float *src, size_t count);
}

//-----------------------------------------------------------------------------
// Performance test for complex multiplication
PTEST_BEGIN("dsp.pcomplex", r2c_add, 5, 1000)

void call(const char *label, float *dst, const float *src, size_t count, pcomplex_r2c_op2_t op)
{
if (!PTEST_SUPPORTED(op))
return;

char buf[80];
sprintf(buf, "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
op(dst, src, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *out = alloc_aligned<float>(data, buf_size*6, 64);
float *in = &out[buf_size*2];
float *backup = &in[buf_size*1];

randomize_sign(out, buf_size*3);
dsp::copy(backup, out, buf_size*3);

#define CALL(func) \
dsp::copy(out, backup, buf_size*3); \
call(#func, out, in, count, func)

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::pcomplex_r2c_add2);
IF_ARCH_X86(CALL(sse::pcomplex_r2c_add2));
// IF_ARCH_X86(CALL(avx::pcomplex_r2c_add2));
// IF_ARCH_ARM(CALL(neon_d32::pcomplex_r2c_add2));
// IF_ARCH_AARCH64(CALL(asimd::pcomplex_r2c_add2));

PTEST_SEPARATOR;
}

free_aligned(data);
}
PTEST_END





120 changes: 120 additions & 0 deletions src/test/ptest/pcomplex/r2c_div.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 9 авг. 2023 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 6
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
void pcomplex_r2c_div2(float *dst, const float *src, size_t count);
}

IF_ARCH_X86(
namespace sse
{
void pcomplex_r2c_div2(float *dst, const float *src, size_t count);
}

// namespace avx
// {
// void pcomplex_r2c_div2(float *dst, const float *src, size_t count);
// }
)

// IF_ARCH_ARM(
// namespace neon_d32
// {
// void pcomplex_r2c_div2(float *dst, const float *src, size_t count);
// }
// )

// IF_ARCH_AARCH64(
// namespace asimd
// {
// void pcomplex_r2c_div2(float *dst, const float *src, size_t count);
// }
// )

typedef void (* pcomplex_r2c_op2_t)(float *dst, const float *src, size_t count);
}

//-----------------------------------------------------------------------------
// Performance test for complex multiplication
PTEST_BEGIN("dsp.pcomplex", r2c_div, 5, 1000)

void call(const char *label, float *dst, const float *src, size_t count, pcomplex_r2c_op2_t op)
{
if (!PTEST_SUPPORTED(op))
return;

char buf[80];
sprintf(buf, "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
op(dst, src, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *out = alloc_aligned<float>(data, buf_size*6, 64);
float *in = &out[buf_size*2];
float *backup = &in[buf_size*1];

randomize_sign(out, buf_size*3);
dsp::copy(backup, out, buf_size*3);

#define CALL(func) \
dsp::copy(out, backup, buf_size*3); \
call(#func, out, in, count, func)

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::pcomplex_r2c_div2);
IF_ARCH_X86(CALL(sse::pcomplex_r2c_div2));
// IF_ARCH_X86(CALL(avx::pcomplex_r2c_div2));
// IF_ARCH_ARM(CALL(neon_d32::pcomplex_r2c_div2));
// IF_ARCH_AARCH64(CALL(asimd::pcomplex_r2c_div2));

PTEST_SEPARATOR;
}

free_aligned(data);
}
PTEST_END





120 changes: 120 additions & 0 deletions src/test/ptest/pcomplex/r2c_mul.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 9 авг. 2023 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 6
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
void pcomplex_r2c_mul2(float *dst, const float *src, size_t count);
}

IF_ARCH_X86(
namespace sse
{
void pcomplex_r2c_mul2(float *dst, const float *src, size_t count);
}

// namespace avx
// {
// void pcomplex_r2c_mul2(float *dst, const float *src, size_t count);
// }
)

// IF_ARCH_ARM(
// namespace neon_d32
// {
// void pcomplex_r2c_mul2(float *dst, const float *src, size_t count);
// }
// )

// IF_ARCH_AARCH64(
// namespace asimd
// {
// void pcomplex_r2c_mul2(float *dst, const float *src, size_t count);
// }
// )

typedef void (* pcomplex_r2c_op2_t)(float *dst, const float *src, size_t count);
}

//-----------------------------------------------------------------------------
// Performance test for complex multiplication
PTEST_BEGIN("dsp.pcomplex", r2c_mul, 5, 1000)

void call(const char *label, float *dst, const float *src, size_t count, pcomplex_r2c_op2_t op)
{
if (!PTEST_SUPPORTED(op))
return;

char buf[80];
sprintf(buf, "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
op(dst, src, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *out = alloc_aligned<float>(data, buf_size*6, 64);
float *in = &out[buf_size*2];
float *backup = &in[buf_size*1];

randomize_sign(out, buf_size*3);
dsp::copy(backup, out, buf_size*3);

#define CALL(func) \
dsp::copy(out, backup, buf_size*3); \
call(#func, out, in, count, func)

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::pcomplex_r2c_mul2);
IF_ARCH_X86(CALL(sse::pcomplex_r2c_mul2));
// IF_ARCH_X86(CALL(avx::pcomplex_r2c_mul2));
// IF_ARCH_ARM(CALL(neon_d32::pcomplex_r2c_mul2));
// IF_ARCH_AARCH64(CALL(asimd::pcomplex_r2c_mul2));

PTEST_SEPARATOR;
}

free_aligned(data);
}
PTEST_END





Loading

0 comments on commit 8aa9fd5

Please sign in to comment.