AudioDecoder: New method SampleRateHz, + implementations for our codecs
This will let NetEq (and the factory, and every layer in between) keep
track of just the decoder, instead of decoder and sample rate.
BUG=webrtc:5801
Review-Url: https://codereview.webrtc.org/2024633002
Cr-Original-Commit-Position: refs/heads/master@{#12968}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: 6c2eab34f8ff1f7a724622e3473858a4c0724bd8
diff --git a/modules/audio_coding/codecs/audio_decoder.cc b/modules/audio_coding/codecs/audio_decoder.cc
index 442ddc1..e91161e 100644
--- a/modules/audio_coding/codecs/audio_decoder.cc
+++ b/modules/audio_coding/codecs/audio_decoder.cc
@@ -82,6 +82,10 @@
return false;
}
+int AudioDecoder::SampleRateHz() const {
+ return -1;
+}
+
AudioDecoder::SpeechType AudioDecoder::ConvertSpeechType(int16_t type) {
switch (type) {
case 0: // TODO(hlundin): Both iSAC and Opus return 0 for speech.
diff --git a/modules/audio_coding/codecs/audio_decoder.h b/modules/audio_coding/codecs/audio_decoder.h
index 580ddbf..c77a069 100644
--- a/modules/audio_coding/codecs/audio_decoder.h
+++ b/modules/audio_coding/codecs/audio_decoder.h
@@ -93,6 +93,14 @@
// Returns true if the packet has FEC and false otherwise.
virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
+ // Returns the actual sample rate of the decoder's output.
+ // NOTE: For now, this has a default implementation that returns an unusable
+ // value (-1). That default implementation will go away soon, and at the same
+ // time callers will start relying on the return value, so make sure you
+ // override it with something that returns a correct value!
+ // TODO(kwiberg): Remove the default implementation.
+ virtual int SampleRateHz() const;
+
virtual size_t Channels() const = 0;
protected:
diff --git a/modules/audio_coding/codecs/builtin_audio_decoder_factory.cc b/modules/audio_coding/codecs/builtin_audio_decoder_factory.cc
index deb1b1f..48b2f5d 100644
--- a/modules/audio_coding/codecs/builtin_audio_decoder_factory.cc
+++ b/modules/audio_coding/codecs/builtin_audio_decoder_factory.cc
@@ -74,7 +74,7 @@
{"isac",
[](const SdpAudioFormat& format) {
return format.clockrate_hz == 16000 && format.num_channels == 1
- ? Unique(new AudioDecoderIsacFix)
+ ? Unique(new AudioDecoderIsacFix(format.clockrate_hz))
: nullptr;
}},
#elif defined(WEBRTC_CODEC_ISAC)
@@ -82,14 +82,15 @@
[](const SdpAudioFormat& format) {
return (format.clockrate_hz == 16000 || format.clockrate_hz == 32000) &&
format.num_channels == 1
- ? Unique(new AudioDecoderIsac)
+ ? Unique(new AudioDecoderIsac(format.clockrate_hz))
: nullptr;
}},
#endif
{"l16",
[](const SdpAudioFormat& format) {
return format.num_channels >= 1
- ? Unique(new AudioDecoderPcm16B(format.num_channels))
+ ? Unique(new AudioDecoderPcm16B(format.clockrate_hz,
+ format.num_channels))
: nullptr;
}},
#ifdef WEBRTC_CODEC_G722
@@ -136,7 +137,15 @@
const SdpAudioFormat& format) override {
for (const auto& dc : decoder_constructors) {
if (STR_CASE_CMP(format.name.c_str(), dc.name) == 0) {
- return std::unique_ptr<AudioDecoder>(dc.constructor(format));
+ std::unique_ptr<AudioDecoder> dec = dc.constructor(format);
+ if (dec) {
+ const int expected_sample_rate_hz =
+ STR_CASE_CMP(format.name.c_str(), "g722") == 0
+ ? 2 * format.clockrate_hz
+ : format.clockrate_hz;
+ RTC_CHECK_EQ(expected_sample_rate_hz, dec->SampleRateHz());
+ }
+ return dec;
}
}
return nullptr;
diff --git a/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc b/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc
index 9757b4a..af164c4 100644
--- a/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc
+++ b/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc
@@ -16,6 +16,10 @@
void AudioDecoderPcmU::Reset() {}
+int AudioDecoderPcmU::SampleRateHz() const {
+ return 8000;
+}
+
size_t AudioDecoderPcmU::Channels() const {
return num_channels_;
}
@@ -25,7 +29,7 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_DCHECK_EQ(sample_rate_hz, 8000);
+ RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz);
int16_t temp_type = 1; // Default is speech.
size_t ret = WebRtcG711_DecodeU(encoded, encoded_len, decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
@@ -40,6 +44,10 @@
void AudioDecoderPcmA::Reset() {}
+int AudioDecoderPcmA::SampleRateHz() const {
+ return 8000;
+}
+
size_t AudioDecoderPcmA::Channels() const {
return num_channels_;
}
@@ -49,7 +57,7 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_DCHECK_EQ(sample_rate_hz, 8000);
+ RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz);
int16_t temp_type = 1; // Default is speech.
size_t ret = WebRtcG711_DecodeA(encoded, encoded_len, decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
diff --git a/modules/audio_coding/codecs/g711/audio_decoder_pcm.h b/modules/audio_coding/codecs/g711/audio_decoder_pcm.h
index 7a627e7..7fdc359 100644
--- a/modules/audio_coding/codecs/g711/audio_decoder_pcm.h
+++ b/modules/audio_coding/codecs/g711/audio_decoder_pcm.h
@@ -24,6 +24,7 @@
}
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
@@ -45,6 +46,7 @@
}
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
diff --git a/modules/audio_coding/codecs/g722/audio_decoder_g722.cc b/modules/audio_coding/codecs/g722/audio_decoder_g722.cc
index 7676e90..379293b 100644
--- a/modules/audio_coding/codecs/g722/audio_decoder_g722.cc
+++ b/modules/audio_coding/codecs/g722/audio_decoder_g722.cc
@@ -35,7 +35,7 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_DCHECK_EQ(sample_rate_hz, 16000);
+ RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz);
int16_t temp_type = 1; // Default is speech.
size_t ret =
WebRtcG722_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
@@ -53,6 +53,10 @@
return static_cast<int>(2 * encoded_len / Channels());
}
+int AudioDecoderG722::SampleRateHz() const {
+ return 16000;
+}
+
size_t AudioDecoderG722::Channels() const {
return 1;
}
@@ -74,7 +78,7 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_DCHECK_EQ(sample_rate_hz, 16000);
+ RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz);
int16_t temp_type = 1; // Default is speech.
// De-interleave the bit-stream into two separate payloads.
uint8_t* encoded_deinterleaved = new uint8_t[encoded_len];
@@ -100,6 +104,10 @@
return static_cast<int>(ret);
}
+int AudioDecoderG722Stereo::SampleRateHz() const {
+ return 16000;
+}
+
size_t AudioDecoderG722Stereo::Channels() const {
return 2;
}
diff --git a/modules/audio_coding/codecs/g722/audio_decoder_g722.h b/modules/audio_coding/codecs/g722/audio_decoder_g722.h
index 1837ffa..ccca73d 100644
--- a/modules/audio_coding/codecs/g722/audio_decoder_g722.h
+++ b/modules/audio_coding/codecs/g722/audio_decoder_g722.h
@@ -25,6 +25,7 @@
bool HasDecodePlc() const override;
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
@@ -44,6 +45,8 @@
AudioDecoderG722Stereo();
~AudioDecoderG722Stereo() override;
void Reset() override;
+ int SampleRateHz() const override;
+ size_t Channels() const override;
protected:
int DecodeInternal(const uint8_t* encoded,
@@ -51,7 +54,6 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
- size_t Channels() const override;
private:
// Splits the stereo-interleaved payload in |encoded| into separate payloads
diff --git a/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc b/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc
index 9ae0e1a..dab5805 100644
--- a/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc
+++ b/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc
@@ -49,6 +49,10 @@
WebRtcIlbcfix_Decoderinit30Ms(dec_state_);
}
+int AudioDecoderIlbc::SampleRateHz() const {
+ return 8000;
+}
+
size_t AudioDecoderIlbc::Channels() const {
return 1;
}
diff --git a/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h b/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h
index 036c11f..1083479 100644
--- a/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h
+++ b/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h
@@ -25,6 +25,7 @@
bool HasDecodePlc() const override;
size_t DecodePlc(size_t num_frames, int16_t* decoded) override;
void Reset() override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
diff --git a/modules/audio_coding/codecs/isac/audio_decoder_isac_t.h b/modules/audio_coding/codecs/isac/audio_decoder_isac_t.h
index b1907bb..264cca2 100644
--- a/modules/audio_coding/codecs/isac/audio_decoder_isac_t.h
+++ b/modules/audio_coding/codecs/isac/audio_decoder_isac_t.h
@@ -14,18 +14,24 @@
#include <vector>
#include "webrtc/base/constructormagic.h"
+#include "webrtc/base/optional.h"
#include "webrtc/base/scoped_ref_ptr.h"
#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/codecs/isac/locked_bandwidth_info.h"
namespace webrtc {
+// TODO(kwiberg): Remove the possibility of not specifying the sample rate at
+// object creation time.
template <typename T>
class AudioDecoderIsacT final : public AudioDecoder {
public:
AudioDecoderIsacT();
explicit AudioDecoderIsacT(
const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo);
+ explicit AudioDecoderIsacT(int sample_rate_hz);
+ AudioDecoderIsacT(int sample_rate_hz,
+ const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo);
~AudioDecoderIsacT() override;
bool HasDecodePlc() const override;
@@ -37,6 +43,7 @@
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) override;
int ErrorCode() override;
+ int SampleRateHz() const override;
size_t Channels() const override;
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
@@ -45,9 +52,12 @@
SpeechType* speech_type) override;
private:
+ AudioDecoderIsacT(rtc::Optional<int> sample_rate_hz,
+ const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo);
+
typename T::instance_type* isac_state_;
+ rtc::Optional<int> sample_rate_hz_;
rtc::scoped_refptr<LockedIsacBandwidthInfo> bwinfo_;
- int decoder_sample_rate_hz_;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsacT);
};
diff --git a/modules/audio_coding/codecs/isac/audio_decoder_isac_t_impl.h b/modules/audio_coding/codecs/isac/audio_decoder_isac_t_impl.h
index 4998fea..4316321 100644
--- a/modules/audio_coding/codecs/isac/audio_decoder_isac_t_impl.h
+++ b/modules/audio_coding/codecs/isac/audio_decoder_isac_t_impl.h
@@ -19,12 +19,31 @@
template <typename T>
AudioDecoderIsacT<T>::AudioDecoderIsacT()
- : AudioDecoderIsacT(nullptr) {}
+ : AudioDecoderIsacT(rtc::Optional<int>(), nullptr) {}
template <typename T>
AudioDecoderIsacT<T>::AudioDecoderIsacT(
const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo)
- : bwinfo_(bwinfo), decoder_sample_rate_hz_(-1) {
+ : AudioDecoderIsacT(rtc::Optional<int>(), bwinfo) {}
+
+template <typename T>
+AudioDecoderIsacT<T>::AudioDecoderIsacT(int sample_rate_hz)
+ : AudioDecoderIsacT(rtc::Optional<int>(sample_rate_hz), nullptr) {}
+
+template <typename T>
+AudioDecoderIsacT<T>::AudioDecoderIsacT(
+ int sample_rate_hz,
+ const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo)
+ : AudioDecoderIsacT(rtc::Optional<int>(sample_rate_hz), bwinfo) {}
+
+template <typename T>
+AudioDecoderIsacT<T>::AudioDecoderIsacT(
+ rtc::Optional<int> sample_rate_hz,
+ const rtc::scoped_refptr<LockedIsacBandwidthInfo>& bwinfo)
+ : sample_rate_hz_(sample_rate_hz), bwinfo_(bwinfo) {
+ RTC_CHECK(!sample_rate_hz || *sample_rate_hz == 16000 ||
+ *sample_rate_hz == 32000)
+ << "Unsupported sample rate " << *sample_rate_hz;
RTC_CHECK_EQ(0, T::Create(&isac_state_));
T::DecoderInit(isac_state_);
if (bwinfo_) {
@@ -32,6 +51,9 @@
T::GetBandwidthInfo(isac_state_, &bi);
bwinfo_->Set(bi);
}
+ if (sample_rate_hz_) {
+ RTC_CHECK_EQ(0, T::SetDecSampRate(isac_state_, *sample_rate_hz_));
+ }
}
template <typename T>
@@ -45,11 +67,13 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_CHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000)
- << "Unsupported sample rate " << sample_rate_hz;
- if (sample_rate_hz != decoder_sample_rate_hz_) {
- RTC_CHECK_EQ(0, T::SetDecSampRate(isac_state_, sample_rate_hz));
- decoder_sample_rate_hz_ = sample_rate_hz;
+ if (sample_rate_hz_) {
+ RTC_CHECK_EQ(*sample_rate_hz_, sample_rate_hz);
+ } else {
+ RTC_CHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000)
+ << "Unsupported sample rate " << sample_rate_hz;
+ sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
+ RTC_CHECK_EQ(0, T::SetDecSampRate(isac_state_, *sample_rate_hz_));
}
int16_t temp_type = 1; // Default is speech.
int ret =
@@ -96,6 +120,12 @@
}
template <typename T>
+int AudioDecoderIsacT<T>::SampleRateHz() const {
+ RTC_CHECK(sample_rate_hz_) << "Sample rate not set yet!";
+ return *sample_rate_hz_;
+}
+
+template <typename T>
size_t AudioDecoderIsacT<T>::Channels() const {
return 1;
}
diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
index f64e811..42abd0a 100644
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
@@ -87,6 +87,10 @@
return (fec == 1);
}
+int AudioDecoderOpus::SampleRateHz() const {
+ return 48000;
+}
+
size_t AudioDecoderOpus::Channels() const {
return channels_;
}
diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.h b/modules/audio_coding/codecs/opus/audio_decoder_opus.h
index be48ca9..c222041 100644
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus.h
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.h
@@ -27,6 +27,7 @@
int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const override;
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
diff --git a/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc b/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc
index 834c070..dce5f4c 100644
--- a/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc
+++ b/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc
@@ -15,13 +15,20 @@
namespace webrtc {
-AudioDecoderPcm16B::AudioDecoderPcm16B(size_t num_channels)
- : num_channels_(num_channels) {
+AudioDecoderPcm16B::AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels)
+ : sample_rate_hz_(sample_rate_hz), num_channels_(num_channels) {
+ RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
+ sample_rate_hz == 32000 || sample_rate_hz == 48000)
+ << "Unsupported sample rate " << sample_rate_hz;
RTC_DCHECK_GE(num_channels, 1u);
}
void AudioDecoderPcm16B::Reset() {}
+int AudioDecoderPcm16B::SampleRateHz() const {
+ return sample_rate_hz_;
+}
+
size_t AudioDecoderPcm16B::Channels() const {
return num_channels_;
}
@@ -31,9 +38,7 @@
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
- RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
- sample_rate_hz == 32000 || sample_rate_hz == 48000)
- << "Unsupported sample rate " << sample_rate_hz;
+ RTC_DCHECK_EQ(sample_rate_hz_, sample_rate_hz);
size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len, decoded);
*speech_type = ConvertSpeechType(1);
return static_cast<int>(ret);
diff --git a/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h b/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h
index 692cb94..df94a6a 100644
--- a/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h
+++ b/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h
@@ -18,9 +18,10 @@
class AudioDecoderPcm16B final : public AudioDecoder {
public:
- explicit AudioDecoderPcm16B(size_t num_channels);
+ AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels);
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
+ int SampleRateHz() const override;
size_t Channels() const override;
protected:
@@ -31,6 +32,7 @@
SpeechType* speech_type) override;
private:
+ const int sample_rate_hz_;
const size_t num_channels_;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16B);
};
diff --git a/modules/audio_coding/neteq/audio_decoder_unittest.cc b/modules/audio_coding/neteq/audio_decoder_unittest.cc
index 1d0802e..f83afcc 100644
--- a/modules/audio_coding/neteq/audio_decoder_unittest.cc
+++ b/modules/audio_coding/neteq/audio_decoder_unittest.cc
@@ -310,7 +310,7 @@
codec_input_rate_hz_ = 16000;
frame_size_ = 20 * codec_input_rate_hz_ / 1000;
data_length_ = 10 * frame_size_;
- decoder_ = new AudioDecoderPcm16B(1);
+ decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1);
assert(decoder_);
AudioEncoderPcm16B::Config config;
config.sample_rate_hz = codec_input_rate_hz_;
@@ -370,7 +370,7 @@
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsac(config));
- decoder_ = new AudioDecoderIsac();
+ decoder_ = new AudioDecoderIsac(codec_input_rate_hz_);
}
};
@@ -387,7 +387,7 @@
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsac(config));
- decoder_ = new AudioDecoderIsac();
+ decoder_ = new AudioDecoderIsac(codec_input_rate_hz_);
}
};
@@ -404,7 +404,7 @@
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsacFix(config));
- decoder_ = new AudioDecoderIsacFix();
+ decoder_ = new AudioDecoderIsacFix(codec_input_rate_hz_);
}
};