blob: c4c7dbb4cd3eafaf811573115f302af8bafb84b4 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
#include <assert.h> // assert
#include <math.h> // pow()
#include <string.h> // memcpy()
#include "webrtc/base/logging.h"
#include "webrtc/base/trace_event.h"
#include "webrtc/system_wrappers/include/critical_section_wrapper.h"
namespace webrtc {
RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
RtpData* data_callback,
RtpAudioFeedback* incoming_messages_callback) {
return new RTPReceiverAudio(data_callback, incoming_messages_callback);
}
RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback,
RtpAudioFeedback* incoming_messages_callback)
: RTPReceiverStrategy(data_callback),
TelephoneEventHandler(),
last_received_frequency_(8000),
telephone_event_forward_to_decoder_(false),
telephone_event_payload_type_(-1),
cng_nb_payload_type_(-1),
cng_wb_payload_type_(-1),
cng_swb_payload_type_(-1),
cng_fb_payload_type_(-1),
cng_payload_type_(-1),
g722_payload_type_(-1),
last_received_g722_(false),
num_energy_(0),
current_remote_energy_(),
cb_audio_feedback_(incoming_messages_callback) {
last_payload_.Audio.channels = 1;
memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
}
// Outband TelephoneEvent(DTMF) detection
void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
bool forward_to_decoder) {
CriticalSectionScoped lock(crit_sect_.get());
telephone_event_forward_to_decoder_ = forward_to_decoder;
}
// Is forwarding of outband telephone events turned on/off?
bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
CriticalSectionScoped lock(crit_sect_.get());
return telephone_event_forward_to_decoder_;
}
bool RTPReceiverAudio::TelephoneEventPayloadType(
int8_t payload_type) const {
CriticalSectionScoped lock(crit_sect_.get());
return telephone_event_payload_type_ == payload_type;
}
bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
uint32_t* frequency,
bool* cng_payload_type_has_changed) {
CriticalSectionScoped lock(crit_sect_.get());
*cng_payload_type_has_changed = false;
// We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
if (cng_nb_payload_type_ == payload_type) {
*frequency = 8000;
if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
*cng_payload_type_has_changed = true;
cng_payload_type_ = cng_nb_payload_type_;
return true;
} else if (cng_wb_payload_type_ == payload_type) {
// if last received codec is G.722 we must use frequency 8000
if (last_received_g722_) {
*frequency = 8000;
} else {
*frequency = 16000;
}
if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
*cng_payload_type_has_changed = true;
cng_payload_type_ = cng_wb_payload_type_;
return true;
} else if (cng_swb_payload_type_ == payload_type) {
*frequency = 32000;
if ((cng_payload_type_ != -1) &&
(cng_payload_type_ != cng_swb_payload_type_))
*cng_payload_type_has_changed = true;
cng_payload_type_ = cng_swb_payload_type_;
return true;
} else if (cng_fb_payload_type_ == payload_type) {
*frequency = 48000;
if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
*cng_payload_type_has_changed = true;
cng_payload_type_ = cng_fb_payload_type_;
return true;
} else {
// not CNG
if (g722_payload_type_ == payload_type) {
last_received_g722_ = true;
} else {
last_received_g722_ = false;
}
}
return false;
}
bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
// Don't do this for DTMF packets, otherwise it's fine.
return !TelephoneEventPayloadType(payload_type);
}
// - Sample based or frame based codecs based on RFC 3551
// -
// - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
// - The correct rate is 4 bits/sample.
// -
// - name of sampling default
// - encoding sample/frame bits/sample rate ms/frame ms/packet
// -
// - Sample based audio codecs
// - DVI4 sample 4 var. 20
// - G722 sample 4 16,000 20
// - G726-40 sample 5 8,000 20
// - G726-32 sample 4 8,000 20
// - G726-24 sample 3 8,000 20
// - G726-16 sample 2 8,000 20
// - L8 sample 8 var. 20
// - L16 sample 16 var. 20
// - PCMA sample 8 var. 20
// - PCMU sample 8 var. 20
// -
// - Frame based audio codecs
// - G723 frame N/A 8,000 30 30
// - G728 frame N/A 8,000 2.5 20
// - G729 frame N/A 8,000 10 20
// - G729D frame N/A 8,000 10 20
// - G729E frame N/A 8,000 10 20
// - GSM frame N/A 8,000 20 20
// - GSM-EFR frame N/A 8,000 20 20
// - LPC frame N/A 8,000 20 20
// - MPA frame N/A var. var.
// -
// - G7221 frame N/A
int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
const char payload_name[RTP_PAYLOAD_NAME_SIZE],
int8_t payload_type,
uint32_t frequency) {
CriticalSectionScoped lock(crit_sect_.get());
if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
telephone_event_payload_type_ = payload_type;
}
if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
// we can have three CNG on 8000Hz, 16000Hz and 32000Hz
if (frequency == 8000) {
cng_nb_payload_type_ = payload_type;
} else if (frequency == 16000) {
cng_wb_payload_type_ = payload_type;
} else if (frequency == 32000) {
cng_swb_payload_type_ = payload_type;
} else if (frequency == 48000) {
cng_fb_payload_type_ = payload_type;
} else {
assert(false);
return -1;
}
}
return 0;
}
int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
const PayloadUnion& specific_payload,
bool is_red,
const uint8_t* payload,
size_t payload_length,
int64_t timestamp_ms,
bool is_first_packet) {
TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
"seqnum", rtp_header->header.sequenceNumber, "timestamp",
rtp_header->header.timestamp);
rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
num_energy_ = rtp_header->type.Audio.numEnergy;
if (rtp_header->type.Audio.numEnergy > 0 &&
rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
memcpy(current_remote_energy_,
rtp_header->type.Audio.arrOfEnergy,
rtp_header->type.Audio.numEnergy);
}
return ParseAudioCodecSpecific(rtp_header,
payload,
payload_length,
specific_payload.Audio,
is_red);
}
int RTPReceiverAudio::GetPayloadTypeFrequency() const {
CriticalSectionScoped lock(crit_sect_.get());
if (last_received_g722_) {
return 8000;
}
return last_received_frequency_;
}
RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
uint16_t last_payload_length) const {
// Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
// kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
if (last_payload_length < 10) { // our CNG is 9 bytes
return kRtpNoRtp;
} else {
return kRtpDead;
}
}
void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
PayloadUnion* specific_payload,
bool* should_discard_changes) {
*should_discard_changes = false;
if (TelephoneEventPayloadType(payload_type)) {
// Don't do callbacks for DTMF packets.
*should_discard_changes = true;
return;
}
// frequency is updated for CNG
bool cng_payload_type_has_changed = false;
bool is_cng_payload_type = CNGPayloadType(payload_type,
&specific_payload->Audio.frequency,
&cng_payload_type_has_changed);
if (is_cng_payload_type) {
// Don't do callbacks for DTMF packets.
*should_discard_changes = true;
return;
}
}
int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
CriticalSectionScoped cs(crit_sect_.get());
assert(num_energy_ <= kRtpCsrcSize);
if (num_energy_ > 0) {
memcpy(array_of_energy, current_remote_energy_,
sizeof(uint8_t) * num_energy_);
}
return num_energy_;
}
int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
RtpFeedback* callback,
int8_t payload_type,
const char payload_name[RTP_PAYLOAD_NAME_SIZE],
const PayloadUnion& specific_payload) const {
if (-1 ==
callback->OnInitializeDecoder(
payload_type, payload_name, specific_payload.Audio.frequency,
specific_payload.Audio.channels, specific_payload.Audio.rate)) {
LOG(LS_ERROR) << "Failed to create decoder for payload type: "
<< payload_name << "/" << static_cast<int>(payload_type);
return -1;
}
return 0;
}
// We are not allowed to have any critsects when calling data_callback.
int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
WebRtcRTPHeader* rtp_header,
const uint8_t* payload_data,
size_t payload_length,
const AudioPayload& audio_specific,
bool is_red) {
if (payload_length == 0) {
return 0;
}
bool telephone_event_packet =
TelephoneEventPayloadType(rtp_header->header.payloadType);
if (telephone_event_packet) {
CriticalSectionScoped lock(crit_sect_.get());
// RFC 4733 2.3
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
if (payload_length % 4 != 0) {
return -1;
}
size_t number_of_events = payload_length / 4;
// sanity
if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
}
for (size_t n = 0; n < number_of_events; ++n) {
bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
std::set<uint8_t>::iterator event =
telephone_event_reported_.find(payload_data[4 * n]);
if (event != telephone_event_reported_.end()) {
// we have already seen this event
if (end) {
telephone_event_reported_.erase(payload_data[4 * n]);
}
} else {
if (end) {
// don't add if it's a end of a tone
} else {
telephone_event_reported_.insert(payload_data[4 * n]);
}
}
}
// RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
// should not be a problem since we don't care about the duration
// RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
}
{
CriticalSectionScoped lock(crit_sect_.get());
if (!telephone_event_packet) {
last_received_frequency_ = audio_specific.frequency;
}
// Check if this is a CNG packet, receiver might want to know
uint32_t ignored;
bool also_ignored;
if (CNGPayloadType(rtp_header->header.payloadType,
&ignored,
&also_ignored)) {
rtp_header->type.Audio.isCNG = true;
rtp_header->frameType = kAudioFrameCN;
} else {
rtp_header->frameType = kAudioFrameSpeech;
rtp_header->type.Audio.isCNG = false;
}
// check if it's a DTMF event, hence something we can playout
if (telephone_event_packet) {
if (!telephone_event_forward_to_decoder_) {
// don't forward event to decoder
return 0;
}
std::set<uint8_t>::iterator first =
telephone_event_reported_.begin();
if (first != telephone_event_reported_.end() && *first > 15) {
// don't forward non DTMF events
return 0;
}
}
}
// TODO(holmer): Break this out to have RED parsing handled generically.
if (is_red && !(payload_data[0] & 0x80)) {
// we recive only one frame packed in a RED packet remove the RED wrapper
rtp_header->header.payloadType = payload_data[0];
// only one frame in the RED strip the one byte to help NetEq
return data_callback_->OnReceivedPayloadData(
payload_data + 1, payload_length - 1, rtp_header);
}
rtp_header->type.Audio.channel = audio_specific.channels;
return data_callback_->OnReceivedPayloadData(
payload_data, payload_length, rtp_header);
}
} // namespace webrtc