From 2cf8f62379029da943bf7e6735081740145c7b82 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 7 Aug 2023 16:39:08 +0200 Subject: [PATCH] VADPCM: complete stereo support --- src/audio/rsp_mixer.S | 78 ++++++++++++++++++++-------------- src/audio/wav64.c | 57 +++++++++++++++++-------- tools/audioconv64/conv_wav64.c | 30 ++++++------- 3 files changed, 98 insertions(+), 67 deletions(-) diff --git a/src/audio/rsp_mixer.S b/src/audio/rsp_mixer.S index 4a6836b6f9..969290862f 100644 --- a/src/audio/rsp_mixer.S +++ b/src/audio/rsp_mixer.S @@ -1214,6 +1214,8 @@ VADPCM_STATE: .space 16*2 # state, left/right #define input_aligned s6 #define next_input_aligned s7 #define stereo_toggle k1 +#define input_incr v0 +#define output_incr v1 #define vscale $v01 #define vstatef $v02 @@ -1275,21 +1277,29 @@ VADPCM_Decompress: addiu nframes, 1 li dmem_input, %lo(VADPCM_INPUT) + li input_incr, 9 li dmem_output, %lo(VADPCM_OUTPUT) + li output_incr, 32 + bgez a2, VADPCM_FetchFirstFrame + move stereo_toggle, a2 + + # If the waveform is stereo, double increments + sll input_incr, 1 + sll output_incr, 1 + +VADPCM_FetchFirstFrame: # Fetch the first frame. Wait for the transfer to finish move s4, dmem_input move s0, a0 jal DMAIn - li t0, DMA_SIZE(9, 1) - addiu a0, 9 + addiu t0, input_incr, -1 + add a0, input_incr + move next_input_aligned, s4 lqv vstate1, 0x00,dmem_state lqv vstate3, 0x10,dmem_state - move next_input_aligned, s4 - move stereo_toggle, a2 - ################################################################## VADPCM_DecompressLoop: @@ -1302,11 +1312,11 @@ VADPCM_DecompressLoop: xori dmem_input, 32 # swap mtc0 dmem_input, COP0_DMA_SPADDR mtc0 a0, COP0_DMA_RAMADDR - li t0, DMA_SIZE(9, 1) + addiu t0, input_incr, -1 mtc0 t0, COP0_DMA_READ andi t0, a0, 7 add next_input_aligned, dmem_input, t0 - addiu a0, 9 + add a0, input_incr VADPCM_DecompressMono: # Read from input: control byte, plus residuals @@ -1387,43 +1397,47 @@ VADPCM_DecompressMono: # Stereo deinterleave addiu dmem_output, -64 + lqv vstate0, 0x00,dmem_output + lqv vstate1, 0x10,dmem_output + lqv vstate2, 0x20,dmem_output + lqv vstate3, 0x30,dmem_output VADPCM_InterleaveStereo: - lqv vstate1, 0x00,dmem_output - lqv vstate3, 0x20,dmem_output - ssv vstate1.e0, 0x00,dmem_output - ssv vstate3.e0, 0x02,dmem_output - ssv vstate1.e1, 0x04,dmem_output - ssv vstate3.e1, 0x06,dmem_output - ssv vstate1.e2, 0x08,dmem_output - ssv vstate3.e2, 0x0A,dmem_output - ssv vstate1.e3, 0x0C,dmem_output - ssv vstate3.e3, 0x0E,dmem_output - - ssv vstate1.e4, 0x10,dmem_output - ssv vstate3.e4, 0x12,dmem_output - ssv vstate1.e5, 0x14,dmem_output - ssv vstate3.e5, 0x16,dmem_output - ssv vstate1.e6, 0x18,dmem_output - ssv vstate3.e6, 0x1A,dmem_output - ssv vstate1.e7, 0x1C,dmem_output - ssv vstate3.e7, 0x1E,dmem_output - - addiu dmem_output, 0x10 + ssv vstate0.e0, 0x00,dmem_output + ssv vstate2.e0, 0x02,dmem_output + ssv vstate0.e1, 0x04,dmem_output + ssv vstate2.e1, 0x06,dmem_output + ssv vstate0.e2, 0x08,dmem_output + ssv vstate2.e2, 0x0A,dmem_output + ssv vstate0.e3, 0x0C,dmem_output + ssv vstate2.e3, 0x0E,dmem_output + + ssv vstate0.e4, 0x10,dmem_output + ssv vstate2.e4, 0x12,dmem_output + ssv vstate0.e5, 0x14,dmem_output + ssv vstate2.e5, 0x16,dmem_output + ssv vstate0.e6, 0x18,dmem_output + ssv vstate2.e6, 0x1A,dmem_output + ssv vstate0.e7, 0x1C,dmem_output + ssv vstate2.e7, 0x1E,dmem_output + + vcopy vstate0, vstate1 + vcopy vstate2, vstate3 + addiu dmem_output, 0x20 bltz stereo_toggle, VADPCM_InterleaveStereo xor stereo_toggle, a2 - addiu dmem_output, -0x20 + addiu dmem_output, -0x40 # Write output into RDRAM (async) VADPCM_Output: mfc0 t0, COP0_DMA_FULL bnez t0, VADPCM_Output - li t0, DMA_SIZE(32, 1) + addiu t0, output_incr, -1 mtc0 dmem_output, COP0_DMA_SPADDR mtc0 a1, COP0_DMA_RAMADDR mtc0 t0, COP0_DMA_WRITE - addiu a1, 32 + add a1, output_incr xori dmem_output, 128 # swap addiu nframes, -1 @@ -1432,8 +1446,6 @@ VADPCM_Output: ################################################################## - emux_trace_stop - # Save back state sqv vstate1, 0x00,dmem_state sqv vstate3, 0x10,dmem_state diff --git a/src/audio/wav64.c b/src/audio/wav64.c index 8575ec0bd1..5564efaaa3 100644 --- a/src/audio/wav64.c +++ b/src/audio/wav64.c @@ -23,7 +23,7 @@ #include #include -/** @brief Set to 1 to use the refernece C decode for VADPCM */ +/** @brief Set to 1 to use the reference C decode for VADPCM */ #define VADPCM_REFERENCE_DECODER 0 /** ID of a standard WAV file */ @@ -138,10 +138,6 @@ static inline void rsp_vadpcm_decompress(void *input, int16_t *output, bool ster wav64_vadpcm_vector_t *state, wav64_vadpcm_vector_t *codebook) { assert(nframes > 0 && nframes <= 256); - if (stereo) { - assert(nframes % 2 == 0); - nframes /= 2; - } rspq_write(__mixer_overlay_id, 0x1, PhysicalAddr(input), PhysicalAddr(output) | (nframes-1) << 24, @@ -196,29 +192,54 @@ static void waveform_vadpcm_read(void *ctx, samplebuffer_t *sbuf, int wpos, int assert(nframes <= 256); nframes = MIN(nframes, 256); + // Acquire destination buffer from the sample buffer int16_t *dest = (int16_t*)samplebuffer_append(sbuf, nframes*16); - assert((nframes & 1) == 0); - dma_read(dest + wlen - 9*nframes/2, vhead->current_rom_addr, 9*nframes); + // Calculate source pointer at the end of the destination buffer. + // VADPCM decoding can be safely made in-place, so no auxillary buffer + // is necessary. + int src_bytes = 9 * nframes * wav->wave.channels; + void *src = (void*)dest + ((nframes*16) << SAMPLES_BPS_SHIFT(sbuf)) - src_bytes; + + // Fetch compressed data + dma_read(src, vhead->current_rom_addr, src_bytes); + vhead->current_rom_addr += src_bytes; #if VADPCM_REFERENCE_DECODER - vadpcm_error err = vadpcm_decode( - vhead->npredictors, vhead->order, vhead->codebook, &vhead->state, - nframes, dest, dest + wlen - 9*nframes/2); - assertf(err == 0, "VADPCM decoding error: %d\n", err); + if (wav->wave.channels == 1) { + vadpcm_error err = vadpcm_decode( + vhead->npredictors, vhead->order, vhead->codebook, vhead->state, + nframes, dest, src); + assertf(err == 0, "VADPCM decoding error: %d\n", err); + } else { + assert(wav->wave.channels == 2); + int16_t uncomp[2][16]; + int16_t *dst = dest; + + for (int i=0; inpredictors, vhead->order, vhead->codebook + 8*j, &vhead->state[j], + 1, uncomp[j], src); + assertf(err == 0, "VADPCM decoding error: %d\n", err); + src += 9; + } + for (int j=0; j<16; j++) { + *dst++ = uncomp[0][j]; + *dst++ = uncomp[1][j]; + } + } + } #else // Switch to highpri as late as possible if (!highpri) { rspq_highpri_begin(); highpri = true; } - rsp_vadpcm_decompress(dest + wlen - 9*nframes/2, dest, wav->wave.channels==2, nframes, - &vhead->state, vhead->codebook); + rsp_vadpcm_decompress(src, dest, wav->wave.channels==2, nframes, vhead->state, vhead->codebook); #endif - vhead->current_rom_addr += 9*nframes; wlen -= 16*nframes; - dest += 16*nframes; } if (highpri) @@ -260,9 +281,11 @@ void wav64_open(wav64_t *wav, const char *fn) { wav64_header_vadpcm_t vhead = {0}; dfs_read(&vhead, 1, sizeof(vhead), fh); - void *ext = malloc_uncached(sizeof(vhead) + vhead.npredictors * vhead.order * sizeof(wav64_vadpcm_vector_t)); + int codebook_size = vhead.npredictors * vhead.order * head.channels * sizeof(wav64_vadpcm_vector_t); + + void *ext = malloc_uncached(sizeof(vhead) + codebook_size); memcpy(ext, &vhead, sizeof(vhead)); - dfs_read(ext + sizeof(vhead), 1, vhead.npredictors * vhead.order * sizeof(wav64_vadpcm_vector_t), fh); + dfs_read(ext + sizeof(vhead), 1, codebook_size, fh); wav->ext = ext; wav->wave.read = waveform_vadpcm_read; wav->wave.ctx = wav; diff --git a/tools/audioconv64/conv_wav64.c b/tools/audioconv64/conv_wav64.c index db5e63615d..00e7580e12 100644 --- a/tools/audioconv64/conv_wav64.c +++ b/tools/audioconv64/conv_wav64.c @@ -24,11 +24,6 @@ int wav_convert(const char *infn, const char *outfn) { fprintf(stderr, "Converting: %s => %s (%d bits, %d Hz, %d channels, %s)\n", infn, outfn, wav.bitsPerSample, wav.sampleRate, wav.channels, flag_wav_compress ? "vadpcm" : "raw"); - if (flag_wav_compress == 1 && wav.channels != 1) { - fprintf(stderr, "ERROR: VADPCM compression only support mono files\n"); - return 1; - } - // Decode the samples as 16bit big-endian. This will decode everything including // compressed formats so that we're able to read any kind of WAV file, though // it will end up as an uncompressed file. @@ -114,32 +109,33 @@ int wav_convert(const char *infn, const char *outfn) { } break; case 1: { // vadpcm - if ((cnt / wav.channels) % kVADPCMFrameSampleCount) { - int newcnt = (cnt / wav.channels + kVADPCMFrameSampleCount - 1) / kVADPCMFrameSampleCount * kVADPCMFrameSampleCount * wav.channels; - samples = realloc(samples, newcnt * sizeof(int16_t)); - memset(samples + cnt, 0, (newcnt - cnt) * sizeof(int16_t)); + if (cnt % kVADPCMFrameSampleCount) { + int newcnt = (cnt + kVADPCMFrameSampleCount - 1) / kVADPCMFrameSampleCount * kVADPCMFrameSampleCount; + samples = realloc(samples, newcnt * wav.channels * sizeof(int16_t)); + memset(samples + cnt, 0, (newcnt - cnt) * wav.channels * sizeof(int16_t)); cnt = newcnt; } enum { kPREDICTORS = 4 }; + assert(cnt % kVADPCMFrameSampleCount == 0); int nframes = cnt / kVADPCMFrameSampleCount; void *scratch = malloc(vadpcm_encode_scratch_size(nframes)); struct vadpcm_vector *codebook = alloca(kPREDICTORS * kVADPCMEncodeOrder * wav.channels * sizeof(struct vadpcm_vector)); struct vadpcm_params parms = { .predictor_count = kPREDICTORS }; - void *dest = malloc(nframes * kVADPCMFrameByteSize); + void *dest = malloc(nframes * kVADPCMFrameByteSize * wav.channels); - int16_t *schan = malloc(cnt / wav.channels * sizeof(int16_t));; + int16_t *schan = malloc(cnt * sizeof(int16_t)); uint8_t *destchan = dest; - for (int i=0;i