Skip to content

Commit

Permalink
opus: fix reset state cleaning (avoid cache false sharing)
Browse files Browse the repository at this point in the history
  • Loading branch information
rasky committed Nov 1, 2024
1 parent e93802a commit 6b7978b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/audio/libopus_rsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void rsp_opus_memmove(celt_sig *dst, celt_sig *src, opus_int32 len) {
rspq_flush();
}

/** @brief Clear output buffer with RSP */
/** @brief Clear output buffer with RSP. Len must be in 32-bit samples (not bytes) */
void rsp_opus_clear(celt_sig *dst, opus_int32 len) {
rsp_cmd_clear(dst, len);
rspq_flush();
Expand Down
26 changes: 22 additions & 4 deletions src/audio/opus/celt_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@
/** Decoder state
@brief Decoder state
*/
#ifdef N64
__attribute__((aligned(16)))
#endif
struct OpusCustomDecoder {
const OpusCustomMode *mode;
int overlap;
Expand Down Expand Up @@ -107,9 +110,16 @@ struct OpusCustomDecoder {
int postfilter_tapset;
int postfilter_tapset_old;

#ifdef N64
/* Everything beyond this point, before lpc, is cleared with RSP on reset.
* We want it to sit on separate cachelines to avoid cache bugs. */
__attribute__((aligned(16)))
#endif
celt_sig preemph_memD[2];
celt_sig __padding[2]; /* make sure the size of the RSP cleared area is 16-byte multiple */

celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
/**************** Everything beyond this point is CPU cleared on reset */
/* opus_val16 lpc[], Size = channels*LPC_ORDER */
/* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
/* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
Expand Down Expand Up @@ -174,7 +184,11 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int
CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
{
int ret;
#ifdef N64
CELTDecoder *st = (CELTDecoder *)memalign(16, opus_custom_decoder_get_size(mode, channels));
#else
CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
#endif
ret = opus_custom_decoder_init(st, mode, channels);
if (ret != OPUS_OK)
{
Expand Down Expand Up @@ -1358,13 +1372,17 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
OPUS_CLEAR((char*)lpc,
(char*)st + st_size - (char*)lpc);
#ifdef N64
data_cache_hit_writeback_invalidate(st->preemph_memD, sizeof(st->preemph_memD));
data_cache_hit_writeback_invalidate(st->_decode_mem, (DECODE_BUFFER_SIZE+st->overlap)*st->channels*sizeof(celt_sig));
void *rsp_clear_data_start = (char*)&st->preemph_memD[0];
void *rsp_clear_data_end = (char*)lpc;
int rsp_clear_data_size = rsp_clear_data_end - rsp_clear_data_start;
assert((uint32_t)rsp_clear_data_start % 16 == 0);
assert(rsp_clear_data_size % 16 == 0);
data_cache_hit_writeback_invalidate(rsp_clear_data_start, rsp_clear_data_size);
rspq_highpri_begin();
rsp_opus_clear(st->preemph_memD, 2);
rsp_opus_clear(st->_decode_mem, (DECODE_BUFFER_SIZE+st->overlap)*st->channels);
rsp_opus_clear(rsp_clear_data_start, rsp_clear_data_size/4);
rspq_highpri_end();
#else
OPUS_CLEAR(st->_preemph_memD, 2);
OPUS_CLEAR(st->_decode_mem, (DECODE_BUFFER_SIZE+st->overlap)*st->channels);
#endif

Expand Down

0 comments on commit 6b7978b

Please sign in to comment.