blob: 804294ac5408547e9e347bd5a743348e9f26dc67 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h"
#include <string.h>
#include "webrtc/base/trace_event.h"
#include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
#include "webrtc/system_wrappers/include/tick_util.h"
namespace webrtc {
static const int kDtmfFrequencyHz = 8000;
RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtpSender)
: _clock(clock),
_rtpSender(rtpSender),
_sendAudioCritsect(CriticalSectionWrapper::CreateCriticalSection()),
_packetSizeSamples(160),
_dtmfEventIsOn(false),
_dtmfEventFirstPacketSent(false),
_dtmfPayloadType(-1),
_dtmfTimestamp(0),
_dtmfKey(0),
_dtmfLengthSamples(0),
_dtmfLevel(0),
_dtmfTimeLastSent(0),
_dtmfTimestampLastSent(0),
_REDPayloadType(-1),
_inbandVADactive(false),
_cngNBPayloadType(-1),
_cngWBPayloadType(-1),
_cngSWBPayloadType(-1),
_cngFBPayloadType(-1),
_lastPayloadType(-1),
_audioLevel_dBov(0) {}
RTPSenderAudio::~RTPSenderAudio() {}
int RTPSenderAudio::AudioFrequency() const {
return kDtmfFrequencyHz;
}
// set audio packet size, used to determine when it's time to send a DTMF packet
// in silence (CNG)
int32_t RTPSenderAudio::SetAudioPacketSize(uint16_t packetSizeSamples) {
CriticalSectionScoped cs(_sendAudioCritsect.get());
_packetSizeSamples = packetSizeSamples;
return 0;
}
int32_t RTPSenderAudio::RegisterAudioPayload(
const char payloadName[RTP_PAYLOAD_NAME_SIZE],
const int8_t payloadType,
const uint32_t frequency,
const size_t channels,
const uint32_t rate,
RtpUtility::Payload** payload) {
if (RtpUtility::StringCompare(payloadName, "cn", 2)) {
CriticalSectionScoped cs(_sendAudioCritsect.get());
// we can have multiple CNG payload types
switch (frequency) {
case 8000:
_cngNBPayloadType = payloadType;
break;
case 16000:
_cngWBPayloadType = payloadType;
break;
case 32000:
_cngSWBPayloadType = payloadType;
break;
case 48000:
_cngFBPayloadType = payloadType;
break;
default:
return -1;
}
} else if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) {
CriticalSectionScoped cs(_sendAudioCritsect.get());
// Don't add it to the list
// we dont want to allow send with a DTMF payloadtype
_dtmfPayloadType = payloadType;
return 0;
// The default timestamp rate is 8000 Hz, but other rates may be defined.
}
*payload = new RtpUtility::Payload;
(*payload)->typeSpecific.Audio.frequency = frequency;
(*payload)->typeSpecific.Audio.channels = channels;
(*payload)->typeSpecific.Audio.rate = rate;
(*payload)->audio = true;
(*payload)->name[RTP_PAYLOAD_NAME_SIZE - 1] = '\0';
strncpy((*payload)->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1);
return 0;
}
bool RTPSenderAudio::MarkerBit(FrameType frameType, int8_t payload_type) {
CriticalSectionScoped cs(_sendAudioCritsect.get());
// for audio true for first packet in a speech burst
bool markerBit = false;
if (_lastPayloadType != payload_type) {
if (payload_type != -1 && (_cngNBPayloadType == payload_type ||
_cngWBPayloadType == payload_type ||
_cngSWBPayloadType == payload_type ||
_cngFBPayloadType == payload_type)) {
// Only set a marker bit when we change payload type to a non CNG
return false;
}
// payload_type differ
if (_lastPayloadType == -1) {
if (frameType != kAudioFrameCN) {
// first packet and NOT CNG
return true;
} else {
// first packet and CNG
_inbandVADactive = true;
return false;
}
}
// not first packet AND
// not CNG AND
// payload_type changed
// set a marker bit when we change payload type
markerBit = true;
}
// For G.723 G.729, AMR etc we can have inband VAD
if (frameType == kAudioFrameCN) {
_inbandVADactive = true;
} else if (_inbandVADactive) {
_inbandVADactive = false;
markerBit = true;
}
return markerBit;
}
int32_t RTPSenderAudio::SendAudio(FrameType frameType,
int8_t payloadType,
uint32_t captureTimeStamp,
const uint8_t* payloadData,
size_t dataSize,
const RTPFragmentationHeader* fragmentation) {
// TODO(pwestin) Breakup function in smaller functions.
size_t payloadSize = dataSize;
size_t maxPayloadLength = _rtpSender->MaxPayloadLength();
uint16_t dtmfLengthMS = 0;
uint8_t key = 0;
int red_payload_type;
uint8_t audio_level_dbov;
int8_t dtmf_payload_type;
uint16_t packet_size_samples;
{
CriticalSectionScoped cs(_sendAudioCritsect.get());
red_payload_type = _REDPayloadType;
audio_level_dbov = _audioLevel_dBov;
dtmf_payload_type = _dtmfPayloadType;
packet_size_samples = _packetSizeSamples;
}
// Check if we have pending DTMFs to send
if (!_dtmfEventIsOn && PendingDTMF()) {
int64_t delaySinceLastDTMF =
_clock->TimeInMilliseconds() - _dtmfTimeLastSent;
if (delaySinceLastDTMF > 100) {
// New tone to play
_dtmfTimestamp = captureTimeStamp;
if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0) {
_dtmfEventFirstPacketSent = false;
_dtmfKey = key;
_dtmfLengthSamples = (kDtmfFrequencyHz / 1000) * dtmfLengthMS;
_dtmfEventIsOn = true;
}
}
}
// A source MAY send events and coded audio packets for the same time
// but we don't support it
if (_dtmfEventIsOn) {
if (frameType == kEmptyFrame) {
// kEmptyFrame is used to drive the DTMF when in CN mode
// it can be triggered more frequently than we want to send the
// DTMF packets.
if (packet_size_samples > (captureTimeStamp - _dtmfTimestampLastSent)) {
// not time to send yet
return 0;
}
}
_dtmfTimestampLastSent = captureTimeStamp;
uint32_t dtmfDurationSamples = captureTimeStamp - _dtmfTimestamp;
bool ended = false;
bool send = true;
if (_dtmfLengthSamples > dtmfDurationSamples) {
if (dtmfDurationSamples <= 0) {
// Skip send packet at start, since we shouldn't use duration 0
send = false;
}
} else {
ended = true;
_dtmfEventIsOn = false;
_dtmfTimeLastSent = _clock->TimeInMilliseconds();
}
if (send) {
if (dtmfDurationSamples > 0xffff) {
// RFC 4733 2.5.2.3 Long-Duration Events
SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
static_cast<uint16_t>(0xffff), false);
// set new timestap for this segment
_dtmfTimestamp = captureTimeStamp;
dtmfDurationSamples -= 0xffff;
_dtmfLengthSamples -= 0xffff;
return SendTelephoneEventPacket(
ended, dtmf_payload_type, _dtmfTimestamp,
static_cast<uint16_t>(dtmfDurationSamples), false);
} else {
if (SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
static_cast<uint16_t>(dtmfDurationSamples),
!_dtmfEventFirstPacketSent) != 0) {
return -1;
}
_dtmfEventFirstPacketSent = true;
return 0;
}
}
return 0;
}
if (payloadSize == 0 || payloadData == NULL) {
if (frameType == kEmptyFrame) {
// we don't send empty audio RTP packets
// no error since we use it to drive DTMF when we use VAD
return 0;
}
return -1;
}
uint8_t dataBuffer[IP_PACKET_SIZE];
bool markerBit = MarkerBit(frameType, payloadType);
int32_t rtpHeaderLength = 0;
uint16_t timestampOffset = 0;
if (red_payload_type >= 0 && fragmentation && !markerBit &&
fragmentation->fragmentationVectorSize > 1) {
// have we configured RED? use its payload type
// we need to get the current timestamp to calc the diff
uint32_t oldTimeStamp = _rtpSender->Timestamp();
rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, red_payload_type,
markerBit, captureTimeStamp,
_clock->TimeInMilliseconds());
timestampOffset = uint16_t(_rtpSender->Timestamp() - oldTimeStamp);
} else {
rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, payloadType,
markerBit, captureTimeStamp,
_clock->TimeInMilliseconds());
}
if (rtpHeaderLength <= 0) {
return -1;
}
if (maxPayloadLength < (rtpHeaderLength + payloadSize)) {
// Too large payload buffer.
return -1;
}
if (red_payload_type >= 0 && // Have we configured RED?
fragmentation && fragmentation->fragmentationVectorSize > 1 &&
!markerBit) {
if (timestampOffset <= 0x3fff) {
if (fragmentation->fragmentationVectorSize != 2) {
// we only support 2 codecs when using RED
return -1;
}
// only 0x80 if we have multiple blocks
dataBuffer[rtpHeaderLength++] =
0x80 + fragmentation->fragmentationPlType[1];
size_t blockLength = fragmentation->fragmentationLength[1];
// sanity blockLength
if (blockLength > 0x3ff) { // block length 10 bits 1023 bytes
return -1;
}
uint32_t REDheader = (timestampOffset << 10) + blockLength;
ByteWriter<uint32_t>::WriteBigEndian(dataBuffer + rtpHeaderLength,
REDheader);
rtpHeaderLength += 3;
dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
// copy the RED data
memcpy(dataBuffer + rtpHeaderLength,
payloadData + fragmentation->fragmentationOffset[1],
fragmentation->fragmentationLength[1]);
// copy the normal data
memcpy(
dataBuffer + rtpHeaderLength + fragmentation->fragmentationLength[1],
payloadData + fragmentation->fragmentationOffset[0],
fragmentation->fragmentationLength[0]);
payloadSize = fragmentation->fragmentationLength[0] +
fragmentation->fragmentationLength[1];
} else {
// silence for too long send only new data
dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
memcpy(dataBuffer + rtpHeaderLength,
payloadData + fragmentation->fragmentationOffset[0],
fragmentation->fragmentationLength[0]);
payloadSize = fragmentation->fragmentationLength[0];
}
} else {
if (fragmentation && fragmentation->fragmentationVectorSize > 0) {
// use the fragment info if we have one
dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
memcpy(dataBuffer + rtpHeaderLength,
payloadData + fragmentation->fragmentationOffset[0],
fragmentation->fragmentationLength[0]);
payloadSize = fragmentation->fragmentationLength[0];
} else {
memcpy(dataBuffer + rtpHeaderLength, payloadData, payloadSize);
}
}
{
CriticalSectionScoped cs(_sendAudioCritsect.get());
_lastPayloadType = payloadType;
}
// Update audio level extension, if included.
size_t packetSize = payloadSize + rtpHeaderLength;
RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize);
RTPHeader rtp_header;
rtp_parser.Parse(&rtp_header);
_rtpSender->UpdateAudioLevel(dataBuffer, packetSize, rtp_header,
(frameType == kAudioFrameSpeech),
audio_level_dbov);
TRACE_EVENT_ASYNC_END2("webrtc", "Audio", captureTimeStamp, "timestamp",
_rtpSender->Timestamp(), "seqnum",
_rtpSender->SequenceNumber());
return _rtpSender->SendToNetwork(dataBuffer, payloadSize, rtpHeaderLength,
TickTime::MillisecondTimestamp(),
kAllowRetransmission,
RtpPacketSender::kHighPriority);
}
// Audio level magnitude and voice activity flag are set for each RTP packet
int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dBov) {
if (level_dBov > 127) {
return -1;
}
CriticalSectionScoped cs(_sendAudioCritsect.get());
_audioLevel_dBov = level_dBov;
return 0;
}
// Set payload type for Redundant Audio Data RFC 2198
int32_t RTPSenderAudio::SetRED(int8_t payloadType) {
if (payloadType < -1) {
return -1;
}
CriticalSectionScoped cs(_sendAudioCritsect.get());
_REDPayloadType = payloadType;
return 0;
}
// Get payload type for Redundant Audio Data RFC 2198
int32_t RTPSenderAudio::RED(int8_t* payloadType) const {
CriticalSectionScoped cs(_sendAudioCritsect.get());
if (_REDPayloadType == -1) {
// not configured
return -1;
}
*payloadType = _REDPayloadType;
return 0;
}
// Send a TelephoneEvent tone using RFC 2833 (4733)
int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
uint16_t time_ms,
uint8_t level) {
{
CriticalSectionScoped lock(_sendAudioCritsect.get());
if (_dtmfPayloadType < 0) {
// TelephoneEvent payloadtype not configured
return -1;
}
}
return AddDTMF(key, time_ms, level);
}
int32_t RTPSenderAudio::SendTelephoneEventPacket(bool ended,
int8_t dtmf_payload_type,
uint32_t dtmfTimeStamp,
uint16_t duration,
bool markerBit) {
uint8_t dtmfbuffer[IP_PACKET_SIZE];
uint8_t sendCount = 1;
int32_t retVal = 0;
if (ended) {
// resend last packet in an event 3 times
sendCount = 3;
}
do {
// Send DTMF data
_rtpSender->BuildRTPheader(dtmfbuffer, dtmf_payload_type, markerBit,
dtmfTimeStamp, _clock->TimeInMilliseconds());
// reset CSRC and X bit
dtmfbuffer[0] &= 0xe0;
// Create DTMF data
/* From RFC 2833:
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| event |E|R| volume | duration |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
// R bit always cleared
uint8_t R = 0x00;
uint8_t volume = _dtmfLevel;
// First packet un-ended
uint8_t E = ended ? 0x80 : 0x00;
// First byte is Event number, equals key number
dtmfbuffer[12] = _dtmfKey;
dtmfbuffer[13] = E | R | volume;
ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 14, duration);
TRACE_EVENT_INSTANT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"),
"Audio::SendTelephoneEvent", "timestamp",
dtmfTimeStamp, "seqnum", _rtpSender->SequenceNumber());
retVal = _rtpSender->SendToNetwork(
dtmfbuffer, 4, 12, TickTime::MillisecondTimestamp(),
kAllowRetransmission, RtpPacketSender::kHighPriority);
sendCount--;
} while (sendCount > 0 && retVal == 0);
return retVal;
}
} // namespace webrtc