Handle padded audio packets correctly
RTP packets can be padded with extra data at the end of the payload. The usable
payload length of the packet should then be reduced with the padding length,
since the padding must be discarded. This was not the case; instead, the entire
payload, including padding data, was forwarded to the audio channel and in the
end to the decoder.
A special case of padding is packets which are empty except for the padding.
That is, they carry no usable payload. These packets are sometimes used for
probing the network and were discarded in
RTPReceiverAudio::ParseAudioCodecSpecific. The result is that NetEq never sees
those empty packets, just the holes in the sequence number series; this can
throw off the target buffer calculations.
With this change, the empty (after removing the padding) packets are let through,
all the way down to NetEq, to a new method called NetEq::InsertEmptyPacket. This
method notifies the DelayManager that an empty packet was received.
BUG=webrtc:7610, webrtc:7625
Review-Url: https://codereview.webrtc.org/2870043003
Cr-Commit-Position: refs/heads/master@{#18083}
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index f6f1786..2eacefd 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -77,6 +77,11 @@
uint32_t receive_timestamp = 0;
const RTPHeader* header = &rtp_header.header; // Just a shorthand.
+ if (incoming_payload.empty()) {
+ neteq_->InsertEmptyPacket(rtp_header.header);
+ return 0;
+ }
+
{
rtc::CritScope lock(&crit_sect_);
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
index e22eb5a..551ae05 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@@ -1079,6 +1079,7 @@
int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
const size_t payload_length,
const WebRtcRTPHeader& rtp_header) {
+ RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr);
return receiver_.InsertPacket(
rtp_header,
rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
diff --git a/webrtc/modules/audio_coding/neteq/delay_manager.cc b/webrtc/modules/audio_coding/neteq/delay_manager.cc
index 1f6cb05..2941126 100644
--- a/webrtc/modules/audio_coding/neteq/delay_manager.cc
+++ b/webrtc/modules/audio_coding/neteq/delay_manager.cc
@@ -374,6 +374,10 @@
}
}
+void DelayManager::RegisterEmptyPacket() {
+ ++last_seq_no_;
+}
+
bool DelayManager::SetMinimumDelay(int delay_ms) {
// Minimum delay shouldn't be more than maximum delay, if any maximum is set.
// Also, if possible check |delay| to less than 75% of
diff --git a/webrtc/modules/audio_coding/neteq/delay_manager.h b/webrtc/modules/audio_coding/neteq/delay_manager.h
index 7871572..f91c1de 100644
--- a/webrtc/modules/audio_coding/neteq/delay_manager.h
+++ b/webrtc/modules/audio_coding/neteq/delay_manager.h
@@ -95,6 +95,11 @@
// speech.
virtual void LastDecodedWasCngOrDtmf(bool it_was);
+ // Notify the delay manager that empty packets have been received. These are
+ // packets that are part of the sequence number series, so that an empty
+ // packet will shift the sequence numbers for the following packets.
+ virtual void RegisterEmptyPacket();
+
// Accessors and mutators.
// Assuming |delay| is in valid range.
virtual bool SetMinimumDelay(int delay_ms);
diff --git a/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc b/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc
index 84ceb88..a6ee58d 100644
--- a/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc
@@ -266,6 +266,58 @@
EXPECT_EQ(kMinDelayPackets << 8, dm_->TargetLevel());
}
+// Tests that skipped sequence numbers (simulating empty packets) are handled
+// correctly.
+TEST_F(DelayManagerTest, EmptyPacketsReported) {
+ SetPacketAudioLength(kFrameSizeMs);
+ // First packet arrival.
+ InsertNextPacket();
+
+ // Advance time by one frame size.
+ IncreaseTime(kFrameSizeMs);
+
+ // Advance the sequence number by 5, simulating that 5 empty packets were
+ // received, but never inserted.
+ seq_no_ += 10;
+ for (int j = 0; j < 10; ++j) {
+ dm_->RegisterEmptyPacket();
+ }
+
+ // Second packet arrival.
+ // Expect detector update method to be called once with inter-arrival time
+ // equal to 1 packet, and (base) target level equal to 1 as well.
+ // Return false to indicate no peaks found.
+ EXPECT_CALL(detector_, Update(1, 1)).WillOnce(Return(false));
+ InsertNextPacket();
+
+ EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
+}
+
+// Same as above, but do not call RegisterEmptyPacket. Observe the target level
+// increase dramatically.
+TEST_F(DelayManagerTest, EmptyPacketsNotReported) {
+ SetPacketAudioLength(kFrameSizeMs);
+ // First packet arrival.
+ InsertNextPacket();
+
+ // Advance time by one frame size.
+ IncreaseTime(kFrameSizeMs);
+
+ // Advance the sequence number by 5, simulating that 5 empty packets were
+ // received, but never inserted.
+ seq_no_ += 10;
+
+ // Second packet arrival.
+ // Expect detector update method to be called once with inter-arrival time
+ // equal to 1 packet, and (base) target level equal to 1 as well.
+ // Return false to indicate no peaks found.
+ EXPECT_CALL(detector_, Update(10, 10)).WillOnce(Return(false));
+ InsertNextPacket();
+
+ // Note 10 times higher target value.
+ EXPECT_EQ(10 * 1 << 8, dm_->TargetLevel()); // In Q8.
+}
+
TEST_F(DelayManagerTest, Failures) {
// Wrong sample rate.
EXPECT_EQ(-1, dm_->Update(0, 0, -1));
diff --git a/webrtc/modules/audio_coding/neteq/include/neteq.h b/webrtc/modules/audio_coding/neteq/include/neteq.h
index f034580..45aae11 100644
--- a/webrtc/modules/audio_coding/neteq/include/neteq.h
+++ b/webrtc/modules/audio_coding/neteq/include/neteq.h
@@ -145,6 +145,12 @@
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) = 0;
+ // Lets NetEq know that a packet arrived with an empty payload. This typically
+ // happens when empty packets are used for probing the network channel, and
+ // these packets use RTP sequence numbers from the same series as the actual
+ // audio packets.
+ virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
+
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
// |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
diff --git a/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h b/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h
index 7a66b68..85ed71d 100644
--- a/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h
+++ b/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h
@@ -45,6 +45,7 @@
void(int* lower_limit, int* higher_limit));
MOCK_CONST_METHOD0(TargetLevel,
int());
+ MOCK_METHOD0(RegisterEmptyPacket, void());
MOCK_METHOD1(set_extra_delay_ms,
void(int16_t delay));
MOCK_CONST_METHOD0(base_target_level,
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index 2d91b8c..f512d75 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -146,6 +146,14 @@
return kOK;
}
+void NetEqImpl::InsertEmptyPacket(const RTPHeader& /*rtp_header*/) {
+ // TODO(henrik.lundin) Handle NACK as well. This will make use of the
+ // rtp_header parameter.
+ // https://bugs.chromium.org/p/webrtc/issues/detail?id=7611
+ rtc::CritScope lock(&crit_sect_);
+ delay_manager_->RegisterEmptyPacket();
+}
+
namespace {
void SetAudioFrameActivityAndType(bool vad_enabled,
NetEqImpl::OutputType type,
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h
index 8a789bc..3436765 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@@ -109,6 +109,8 @@
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) override;
+ void InsertEmptyPacket(const RTPHeader& rtp_header) override;
+
int GetAudio(AudioFrame* audio_frame, bool* muted) override;
void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index fb87111..d1703e9 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -1284,6 +1284,21 @@
EXPECT_EQ(17 * 30, neteq_->TargetDelayMs());
}
+TEST_F(NetEqImplTest, InsertEmptyPacket) {
+ UseNoMocks();
+ use_mock_delay_manager_ = true;
+ CreateInstance();
+
+ RTPHeader rtp_header;
+ rtp_header.payloadType = 17;
+ rtp_header.sequenceNumber = 0x1234;
+ rtp_header.timestamp = 0x12345678;
+ rtp_header.ssrc = 0x87654321;
+
+ EXPECT_CALL(*mock_delay_manager_, RegisterEmptyPacket());
+ neteq_->InsertEmptyPacket(rtp_header);
+}
+
class Decoder120ms : public AudioDecoder {
public:
Decoder120ms(int sample_rate_hz, SpeechType speech_type)
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc b/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
index acc5926..7b66fd3 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
@@ -212,9 +212,13 @@
size_t payload_length,
const AudioPayload& audio_specific,
bool is_red) {
-
- if (payload_length == 0) {
- return 0;
+ RTC_DCHECK_GE(payload_length, rtp_header->header.paddingLength);
+ const size_t payload_data_length =
+ payload_length - rtp_header->header.paddingLength;
+ if (payload_data_length == 0) {
+ rtp_header->type.Audio.isCNG = false;
+ rtp_header->frameType = kEmptyFrame;
+ return data_callback_->OnReceivedPayloadData(nullptr, 0, rtp_header);
}
bool telephone_event_packet =
@@ -229,16 +233,17 @@
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
- if (payload_length % 4 != 0) {
+ if (payload_data_length % 4 != 0) {
return -1;
}
- size_t number_of_events = payload_length / 4;
+ size_t number_of_events = payload_data_length / 4;
// sanity
if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
}
for (size_t n = 0; n < number_of_events; ++n) {
+ RTC_DCHECK_GE(payload_data_length, (4 * n) + 2);
bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
std::set<uint8_t>::iterator event =
@@ -291,17 +296,18 @@
}
}
// TODO(holmer): Break this out to have RED parsing handled generically.
+ RTC_DCHECK_GT(payload_data_length, 0);
if (is_red && !(payload_data[0] & 0x80)) {
// we recive only one frame packed in a RED packet remove the RED wrapper
rtp_header->header.payloadType = payload_data[0];
// only one frame in the RED strip the one byte to help NetEq
return data_callback_->OnReceivedPayloadData(
- payload_data + 1, payload_length - 1, rtp_header);
+ payload_data + 1, payload_data_length - 1, rtp_header);
}
rtp_header->type.Audio.channel = audio_specific.channels;
- return data_callback_->OnReceivedPayloadData(
- payload_data, payload_length, rtp_header);
+ return data_callback_->OnReceivedPayloadData(payload_data,
+ payload_data_length, rtp_header);
}
} // namespace webrtc