mp3 config decode and bug fix (#358)

* mp3 config decode and bug fix * revert audio transrater changes * review comments * revert ts fix in audiodecoder worker * return error message
medooze · Oct 16, 2024 · b4e640b · b4e640b
1 parent 1bd336c
commit b4e640b
Show file tree

Hide file tree

Showing 7 changed files with 243 additions and 15 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -206,6 +206,7 @@ add_executable(MediaServerUnitTest
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestAAC.cpp
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestMP3.cpp
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestOpus.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestMP3Config.cpp
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestAudioPipe.cpp
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestAMFNumber.cpp
     ${CMAKE_CURRENT_LIST_DIR}/test/unit/TestVideoLayersAllocation.cpp

diff --git a/include/AudioBuffer.h b/include/AudioBuffer.h
@@ -85,7 +85,7 @@ class AudioBuffer
 	uint16_t numSamplesPerFrame;
 	uint8_t  numChannels;
 	uint64_t ts = 0;
-	uint16_t clockRate = 0;
+	uint32_t clockRate = 0;
 	std::vector<int16_t> pcmBuffer;
 };
 

diff --git a/src/aac/AACDecoder.cpp b/src/aac/AACDecoder.cpp
@@ -63,14 +63,15 @@ AACDecoder::~AACDecoder()
 
 bool AACDecoder::SetConfig(const uint8_t* data,const size_t size)
 {
+	if (inited) return true;
 	AACSpecificConfig config;
 
 	Debug("-AACDecoder::SetConfig()\n");
 
 	//Decode it
 	if (!config.Decode(data,size))
 		//Error
-		return false;
+		return Error("AACDecoder::SetConfig() Could not parse AAC config\n");;
 
 	//Set data
 	ctx->channels		= config.GetChannels();
@@ -83,7 +84,6 @@ bool AACDecoder::SetConfig(const uint8_t* data,const size_t size)
 	memcpy(side,data,size);
 	//We are inited
 	inited = true;
-
 	//Done
 	return true;
 }
@@ -93,10 +93,7 @@ int AACDecoder::Decode(const AudioFrame::const_shared& audioFrame)
 	//Check we have config
 	if (audioFrame->HasCodecConfig())
 		SetConfig(audioFrame->GetCodecConfigData(), audioFrame->GetCodecConfigSize());
-
-	if (!inited)
-		return Error("-AACDecoder::Decode() Not inited\n");
-
+
 	//Set data
 	packet->data = audioFrame ? (uint8_t*)audioFrame->GetData() : nullptr;
 	packet->size = audioFrame ? audioFrame->GetLength() : 0;

diff --git a/src/mp3/MP3Config.h b/src/mp3/MP3Config.h
@@ -0,0 +1,133 @@
+#ifndef MP3CONFIG_H
+#define	MP3CONFIG_H
+
+#include "config.h"
+#include "bitstream/BitReader.h"
+#include "bitstream/BitWriter.h"
+
+class MP3Config
+{
+public:
+	enum class MPEGAudioVersion {
+		MPEGVersion2,
+		MPEGVersion1,
+		MPEGVersionReserved
+	};
+	enum class MPEGAudioLayer {
+		MPEGLayer3,
+		MPEGLayerReserved
+	};
+
+	static constexpr std::array<DWORD, 3> Mpeg1SamplingRates = {44100, 48000, 32000};
+	static constexpr std::array<DWORD, 15> Mpeg1Bitrates = {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320};
+
+	static constexpr std::array<DWORD, 3> Mpeg2SamplingRates = {22050, 24000, 16000};
+	static constexpr std::array<DWORD, 15> Mpeg2Bitrates = {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160};
+public:
+
+	MP3Config() = default;
+	MP3Config(DWORD rate, BYTE channels)
+	{
+		this->samplingRate = rate;
+		this->channels = channels;
+	}
+
+	bool Decode(const BYTE* data,const DWORD size)
+	{
+		if (size != 4) 
+			return Error("-MP3Config::Decode() config data must be size of 4 bytes\n");
+
+		//Create bit reader
+		BufferReader reader(data,size);
+		BitReader r(reader);
+
+		try{
+			auto frameSync = r.Get(11);
+            if (frameSync != 0x7FF) 
+                return Error("-MP3Config::Decode() Wrong FrameSync\n");
+
+			auto ver  = r.Get(2);
+            switch (ver) {
+                case 2:
+                    audioVersion = MPEGAudioVersion::MPEGVersion2;
+                    break;
+                case 3:
+                    audioVersion = MPEGAudioVersion::MPEGVersion1;
+                    break;
+                default:
+                    return Error("-MP3Config::Decode() MPEG audio version not supported\n");
+            }; 
+			auto layer  = r.Get(2);
+			if (layer == 1)
+			{
+				audioLayer = MPEGAudioLayer::MPEGLayer3;
+				frameLength = audioVersion == MPEGAudioVersion::MPEGVersion1 ? 1152 : 576;
+			}	
+			else
+			{
+				return Error("-MP3Config::Decode() MPEG audio layer not supported\n");
+			}
+			r.Skip(1);
+
+			auto bitrateIdx = r.Get(4);
+			if (bitrateIdx >= 15) 
+				return Error("-MP3Config::Decode() bitrate idx out of range\n");
+			else if (bitrateIdx == 0)
+				return Error("-MP3Config::Decode() free bitrate idx not supported\n");
+
+			bitrate = audioVersion == MPEGAudioVersion::MPEGVersion1 ? Mpeg1Bitrates[bitrateIdx] : Mpeg2Bitrates[bitrateIdx];
+
+			auto samplingRateIdx = r.Get(2);
+			if (samplingRateIdx >= 3) 
+				return Error("-MP3Config::Decode() invalid sampling rate idx\n");
+
+            samplingRate = audioVersion == MPEGAudioVersion::MPEGVersion1 ? Mpeg1SamplingRates[samplingRateIdx] : Mpeg2SamplingRates[samplingRateIdx];
+
+			padding = r.Get(1);
+			if (padding)
+				paddingSize = 1;
+
+			r.Skip(1);
+			auto channelMode = r.Get(2);
+			if (channelMode == 3)
+				channels = 1;
+			else 
+				channels = 2;
+		}
+		catch (std::exception& e)
+		{
+			return false;
+		}
+		return true;
+	}
+
+	void Dump() const
+	{
+		Debug("[MP3Config \n");
+		Debug("\t mpegVersion=%u\n"	, audioVersion);
+        Debug("\t mpegLayer=%u\n"	, audioLayer);
+		Debug("\t rate=%u\n"		, samplingRate);
+		Debug("\t channels=%u\n"	, channels);
+		Debug("\t paddingSize=%u\n"	, paddingSize);
+		Debug("\t frameLength=%u\n"	, frameLength);
+		Debug("/]\n");
+	}
+	MPEGAudioVersion GetAudioVersion() const {return audioVersion;}
+	MPEGAudioLayer GetAudioLayer() const {return audioLayer;}
+	DWORD GetRate() const		{ return samplingRate;				}
+	BYTE  GetChannels() const	{ return channels;			}
+	DWORD GetPaddingSize() const	{ return padding ? paddingSize:0; }
+	DWORD GetFrameLength() const	{ return frameLength; }
+	DWORD GetBitrate() const	{ return bitrate * 1000; }
+
+private:
+	MPEGAudioVersion audioVersion = MPEGAudioVersion::MPEGVersionReserved;
+	MPEGAudioLayer audioLayer = MPEGAudioLayer::MPEGLayerReserved;
+	bool padding;
+	BYTE paddingSize;
+	DWORD samplingRate = 0;
+	DWORD bitrate = 0;
+	BYTE channels = 0;
+	DWORD frameLength = 0;
+};
+#endif	/* MPEGCONFIG_H */
diff --git a/src/mp3/MP3Decoder.cpp b/src/mp3/MP3Decoder.cpp
@@ -3,7 +3,7 @@ extern "C" {
 }
 #include "log.h"
 #include "MP3Decoder.h"
-
+#include "MP3Config.h"
 
 
 MP3Decoder::MP3Decoder()
@@ -34,9 +34,6 @@ MP3Decoder::MP3Decoder()
 	packet = av_packet_alloc();
 	//Allocate frame
 	frame = av_frame_alloc();
-
-	//Get the number of samples
-	numFrameSamples = 1024;
 }
 
 MP3Decoder::~MP3Decoder()
@@ -59,14 +56,24 @@ MP3Decoder::~MP3Decoder()
 
 bool MP3Decoder::SetConfig(const uint8_t* data,const size_t size)
 {
+	if (inited) return true;
+	MP3Config config;
 
-
+	Debug("-MP3Decoder::SetConfig()\n");
+
+	//Decode it
+	if (!config.Decode(data, size))
+		//Error
+		return Error("MP3Decoder::SetConfig() Could not parse MP3 config\n");;
 	//Set side data pon packet
 	uint8_t *side = av_packet_new_side_data(packet, AV_PKT_DATA_NEW_EXTRADATA, size);
 	//Copy it
 	memcpy(side,data,size);
 	//We are inited
 	inited = true;
+
+	ctx->channels = config.GetChannels();
+	ctx->sample_rate = config.GetRate();
 
 	//Done
 	return true;
@@ -78,9 +85,6 @@ int MP3Decoder::Decode(const AudioFrame::const_shared& audioFrame)
 	//Check we have config
 	if (audioFrame->HasCodecConfig())
 		SetConfig(audioFrame->GetCodecConfigData(), audioFrame->GetCodecConfigSize());
-
-	if (!inited)
-		return Error("-MP3Decoder::Decode() Not inited\n");
 
 	//Set data
 	packet->data = audioFrame ? (uint8_t*)audioFrame->GetData() : nullptr;

diff --git a/test/unit/TestAudioPipe.cpp b/test/unit/TestAudioPipe.cpp
@@ -35,6 +35,7 @@ void helperTestAudioPipe(const AudioPipeParam& playParam, const AudioPipeParam&
             auto audioBuffer = std::make_shared<AudioBuffer>(playFrameSize, numChannels);
             audioBuffer->SetSamples(inLoc, playFrameSize*numChannels);
             audioBuffer->SetTimestamp(playPTS[i]);
+            audioBuffer->SetClockRate(playSampleRate);
             audPipe.PlayBuffer(audioBuffer);
             inLoc += playFrameSize*numChannels;
         };

diff --git a/test/unit/TestMP3Config.cpp b/test/unit/TestMP3Config.cpp
@@ -0,0 +1,92 @@
+#include "TestCommon.h"
+
+#include "config.h"
+#include "log.h"
+#include "mp3/MP3Config.h"
+#include <algorithm>
+
+TEST(TestMP3Config, SupportedMP3Config)
+{
+	{
+		BYTE mp3[4] = { 0xff, 0xfb, 0x74, 0xc4 };
+		MP3Config config;
+		ASSERT_TRUE(config.Decode(mp3, sizeof(mp3)));
+		/*
+		[MP3Config
+			mpegVersion = 1
+			mpegLayer = layerIII
+			rate = 48000
+			channels = 1
+			paddingSize = 0
+			frameLength = 1152
+		/ ]
+		*/
+		ASSERT_EQ(config.GetAudioVersion()	, MP3Config::MPEGAudioVersion::MPEGVersion1);
+		ASSERT_EQ(config.GetAudioLayer()	, MP3Config::MPEGAudioLayer::MPEGLayer3);
+		ASSERT_EQ(config.GetRate()		, 48000);
+		ASSERT_EQ(config.GetChannels()		, 1);
+		ASSERT_EQ(config.GetFrameLength()	, 1152);
+		ASSERT_EQ(config.GetBitrate()	, 96000);
+
+	}
+	{
+		BYTE mp3[4] = { 0xff, 0xf3, 0x44, 0xc4 };
+		MP3Config config;
+		ASSERT_TRUE(config.Decode(mp3, sizeof(mp3)));
+		/*
+		[MP3Config
+			mpegVersion = 2
+			mpegLayer = layerIII
+			rate = 24000
+			channels = 1
+			paddingSize = 0
+			frameLength = 576
+			bitrate = 96000
+		/ ]
+		*/
+		ASSERT_EQ(config.GetAudioVersion()	, MP3Config::MPEGAudioVersion::MPEGVersion2);
+		ASSERT_EQ(config.GetAudioLayer()	, MP3Config::MPEGAudioLayer::MPEGLayer3);
+		ASSERT_EQ(config.GetRate()		, 24000);
+		ASSERT_EQ(config.GetChannels()		, 1);
+		ASSERT_EQ(config.GetFrameLength()	, 576);
+		ASSERT_EQ(config.GetBitrate()	, 32000);
+	}
+	{
+		BYTE mp3[4] = { 0xff, 0xfb, 0x70, 0x64 };
+		MP3Config config;
+		ASSERT_TRUE(config.Decode(mp3, sizeof(mp3)));
+		/*
+		[MP3Config
+			mpegVersion = 1
+			mpegLayer = layerIII
+			rate = 44100
+			channels = 2
+			paddingSize = 0
+			frameLength = 1152
+			bitrate = 96000
+		/ ]
+		*/
+		ASSERT_EQ(config.GetAudioVersion()	, MP3Config::MPEGAudioVersion::MPEGVersion1);
+		ASSERT_EQ(config.GetAudioLayer()	, MP3Config::MPEGAudioLayer::MPEGLayer3);
+		ASSERT_EQ(config.GetRate()		, 44100);
+		ASSERT_EQ(config.GetChannels()		, 2);
+		ASSERT_EQ(config.GetFrameLength()	, 1152);
+		ASSERT_EQ(config.GetBitrate()	, 96000);
+	}
+}
+
+TEST(TestMP3Config, UnsupportedMP3Config)
+{
+	{
+		// mpeg version : mpeg2extension
+		BYTE mp3[4] = { 0xff, 0xe3, 0x74, 0xc4 };
+		MP3Config config;
+		ASSERT_FALSE(config.Decode(mp3, sizeof(mp3)));
+	}
+	{
+		// mpeg layer I
+		BYTE mp3[4] = { 0xff, 0xf7, 0x44, 0xc4 };
+		MP3Config config;
+		ASSERT_FALSE(config.Decode(mp3, sizeof(mp3)));
+	}
+}