blob: e5eb592efa1347565b62b0b19da75f71a3200dd3 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/delay_manager.h"
#include <assert.h>
#include <math.h>
#include <algorithm> // max, min
#include <numeric>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/delay_peak_detector.h"
#include "modules/include/module_common_types.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/field_trial.h"
namespace {
constexpr int kLimitProbability = 53687091; // 1/20 in Q30.
constexpr int kLimitProbabilityStreaming = 536871; // 1/2000 in Q30.
constexpr int kMaxStreamingPeakPeriodMs = 600000; // 10 minutes in ms.
constexpr int kCumulativeSumDrift = 2; // Drift term for cumulative sum
// |iat_cumulative_sum_|.
// Steady-state forgetting factor for |iat_vector_|, 0.9993 in Q15.
constexpr int kIatFactor_ = 32745;
constexpr int kMaxIat = 64; // Max inter-arrival time to register.
absl::optional<int> GetForcedLimitProbability() {
constexpr char kForceTargetDelayPercentileFieldTrial[] =
"WebRTC-Audio-NetEqForceTargetDelayPercentile";
const bool use_forced_target_delay_percentile =
webrtc::field_trial::IsEnabled(kForceTargetDelayPercentileFieldTrial);
if (use_forced_target_delay_percentile) {
const std::string field_trial_string = webrtc::field_trial::FindFullName(
kForceTargetDelayPercentileFieldTrial);
double percentile = -1.0;
if (sscanf(field_trial_string.c_str(), "Enabled-%lf", &percentile) == 1 &&
percentile >= 0.0 && percentile <= 100.0) {
return absl::make_optional<int>(static_cast<int>(
(1 << 30) * (100.0 - percentile) / 100.0 + 0.5)); // in Q30.
} else {
RTC_LOG(LS_WARNING) << "Invalid parameter for "
<< kForceTargetDelayPercentileFieldTrial
<< ", ignored.";
}
}
return absl::nullopt;
}
} // namespace
namespace webrtc {
DelayManager::DelayManager(size_t max_packets_in_buffer,
DelayPeakDetector* peak_detector,
const TickTimer* tick_timer)
: first_packet_received_(false),
max_packets_in_buffer_(max_packets_in_buffer),
iat_vector_(kMaxIat + 1, 0),
iat_factor_(0),
tick_timer_(tick_timer),
base_target_level_(4), // In Q0 domain.
target_level_(base_target_level_ << 8), // In Q8 domain.
packet_len_ms_(0),
streaming_mode_(false),
last_seq_no_(0),
last_timestamp_(0),
minimum_delay_ms_(0),
maximum_delay_ms_(target_level_),
iat_cumulative_sum_(0),
max_iat_cumulative_sum_(0),
peak_detector_(*peak_detector),
last_pack_cng_or_dtmf_(1),
frame_length_change_experiment_(
field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")),
forced_limit_probability_(GetForcedLimitProbability()) {
assert(peak_detector); // Should never be NULL.
Reset();
}
DelayManager::~DelayManager() {}
const DelayManager::IATVector& DelayManager::iat_vector() const {
return iat_vector_;
}
// Set the histogram vector to an exponentially decaying distribution
// iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ...
// iat_vector_ is in Q30.
void DelayManager::ResetHistogram() {
// Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum
// of iat_vector_ is 1.
uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary.
IATVector::iterator it = iat_vector_.begin();
for (; it < iat_vector_.end(); it++) {
temp_prob >>= 1;
(*it) = temp_prob << 16;
}
base_target_level_ = 4;
target_level_ = base_target_level_ << 8;
}
int DelayManager::Update(uint16_t sequence_number,
uint32_t timestamp,
int sample_rate_hz) {
if (sample_rate_hz <= 0) {
return -1;
}
if (!first_packet_received_) {
// Prepare for next packet arrival.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
first_packet_received_ = true;
return 0;
}
// Try calculating packet length from current and previous timestamps.
int packet_len_ms;
if (!IsNewerTimestamp(timestamp, last_timestamp_) ||
!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
// Wrong timestamp or sequence order; use stored value.
packet_len_ms = packet_len_ms_;
} else {
// Calculate timestamps per packet and derive packet length in ms.
int64_t packet_len_samp =
static_cast<uint32_t>(timestamp - last_timestamp_) /
static_cast<uint16_t>(sequence_number - last_seq_no_);
packet_len_ms =
rtc::saturated_cast<int>(1000 * packet_len_samp / sample_rate_hz);
}
if (packet_len_ms > 0) {
// Cannot update statistics unless |packet_len_ms| is valid.
// Calculate inter-arrival time (IAT) in integer "packet times"
// (rounding down). This is the value used as index to the histogram
// vector |iat_vector_|.
int iat_packets = packet_iat_stopwatch_->ElapsedMs() / packet_len_ms;
if (streaming_mode_) {
UpdateCumulativeSums(packet_len_ms, sequence_number);
}
// Check for discontinuous packet sequence and re-ordering.
if (IsNewerSequenceNumber(sequence_number, last_seq_no_ + 1)) {
// Compensate for gap in the sequence numbers. Reduce IAT with the
// expected extra time due to lost packets, but ensure that the IAT is
// not negative.
iat_packets -= static_cast<uint16_t>(sequence_number - last_seq_no_ - 1);
iat_packets = std::max(iat_packets, 0);
} else if (!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
iat_packets += static_cast<uint16_t>(last_seq_no_ + 1 - sequence_number);
}
// Saturate IAT at maximum value.
const int max_iat = kMaxIat;
iat_packets = std::min(iat_packets, max_iat);
UpdateHistogram(iat_packets);
// Calculate new |target_level_| based on updated statistics.
target_level_ = CalculateTargetLevel(iat_packets);
if (streaming_mode_) {
target_level_ = std::max(target_level_, max_iat_cumulative_sum_);
}
LimitTargetLevel();
} // End if (packet_len_ms > 0).
// Prepare for next packet arrival.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
return 0;
}
void DelayManager::UpdateCumulativeSums(int packet_len_ms,
uint16_t sequence_number) {
// Calculate IAT in Q8, including fractions of a packet (i.e., more
// accurate than |iat_packets|.
int iat_packets_q8 =
(packet_iat_stopwatch_->ElapsedMs() << 8) / packet_len_ms;
// Calculate cumulative sum IAT with sequence number compensation. The sum
// is zero if there is no clock-drift.
iat_cumulative_sum_ +=
(iat_packets_q8 -
(static_cast<int>(sequence_number - last_seq_no_) << 8));
// Subtract drift term.
iat_cumulative_sum_ -= kCumulativeSumDrift;
// Ensure not negative.
iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0);
if (iat_cumulative_sum_ > max_iat_cumulative_sum_) {
// Found a new maximum.
max_iat_cumulative_sum_ = iat_cumulative_sum_;
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
}
if (max_iat_stopwatch_->ElapsedMs() > kMaxStreamingPeakPeriodMs) {
// Too long since the last maximum was observed; decrease max value.
max_iat_cumulative_sum_ -= kCumulativeSumDrift;
}
}
// Each element in the vector is first multiplied by the forgetting factor
// |iat_factor_|. Then the vector element indicated by |iat_packets| is then
// increased (additive) by 1 - |iat_factor_|. This way, the probability of
// |iat_packets| is slightly increased, while the sum of the histogram remains
// constant (=1).
// Due to inaccuracies in the fixed-point arithmetic, the histogram may no
// longer sum up to 1 (in Q30) after the update. To correct this, a correction
// term is added or subtracted from the first element (or elements) of the
// vector.
// The forgetting factor |iat_factor_| is also updated. When the DelayManager
// is reset, the factor is set to 0 to facilitate rapid convergence in the
// beginning. With each update of the histogram, the factor is increased towards
// the steady-state value |kIatFactor_|.
void DelayManager::UpdateHistogram(size_t iat_packets) {
assert(iat_packets < iat_vector_.size());
int vector_sum = 0; // Sum up the vector elements as they are processed.
// Multiply each element in |iat_vector_| with |iat_factor_|.
for (IATVector::iterator it = iat_vector_.begin(); it != iat_vector_.end();
++it) {
*it = (static_cast<int64_t>(*it) * iat_factor_) >> 15;
vector_sum += *it;
}
// Increase the probability for the currently observed inter-arrival time
// by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30.
// Thus, left-shift 15 steps to obtain result in Q30.
iat_vector_[iat_packets] += (32768 - iat_factor_) << 15;
vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum.
// |iat_vector_| should sum up to 1 (in Q30), but it may not due to
// fixed-point rounding errors.
vector_sum -= 1 << 30; // Should be zero. Compensate if not.
if (vector_sum != 0) {
// Modify a few values early in |iat_vector_|.
int flip_sign = vector_sum > 0 ? -1 : 1;
IATVector::iterator it = iat_vector_.begin();
while (it != iat_vector_.end() && abs(vector_sum) > 0) {
// Add/subtract 1/16 of the element, but not more than |vector_sum|.
int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4);
*it += correction;
vector_sum += correction;
++it;
}
}
assert(vector_sum == 0); // Verify that the above is correct.
// Update |iat_factor_| (changes only during the first seconds after a reset).
// The factor converges to |kIatFactor_|.
iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2;
}
// Enforces upper and lower limits for |target_level_|. The upper limit is
// chosen to be minimum of i) 75% of |max_packets_in_buffer_|, to leave some
// headroom for natural fluctuations around the target, and ii) equivalent of
// |maximum_delay_ms_| in packets. Note that in practice, if no
// |maximum_delay_ms_| is specified, this does not have any impact, since the
// target level is far below the buffer capacity in all reasonable cases.
// The lower limit is equivalent of |minimum_delay_ms_| in packets. We update
// |least_required_level_| while the above limits are applied.
// TODO(hlundin): Move this check to the buffer logistics class.
void DelayManager::LimitTargetLevel() {
if (packet_len_ms_ > 0 && minimum_delay_ms_ > 0) {
int minimum_delay_packet_q8 = (minimum_delay_ms_ << 8) / packet_len_ms_;
target_level_ = std::max(target_level_, minimum_delay_packet_q8);
}
if (maximum_delay_ms_ > 0 && packet_len_ms_ > 0) {
int maximum_delay_packet_q8 = (maximum_delay_ms_ << 8) / packet_len_ms_;
target_level_ = std::min(target_level_, maximum_delay_packet_q8);
}
// Shift to Q8, then 75%.;
int max_buffer_packets_q8 =
static_cast<int>((3 * (max_packets_in_buffer_ << 8)) / 4);
target_level_ = std::min(target_level_, max_buffer_packets_q8);
// Sanity check, at least 1 packet (in Q8).
target_level_ = std::max(target_level_, 1 << 8);
}
int DelayManager::CalculateTargetLevel(int iat_packets) {
int limit_probability = forced_limit_probability_.value_or(kLimitProbability);
if (streaming_mode_) {
limit_probability = kLimitProbabilityStreaming;
}
// Calculate target buffer level from inter-arrival time histogram.
// Find the |iat_index| for which the probability of observing an
// inter-arrival time larger than or equal to |iat_index| is less than or
// equal to |limit_probability|. The sought probability is estimated using
// the histogram as the reverse cumulant PDF, i.e., the sum of elements from
// the end up until |iat_index|. Now, since the sum of all elements is 1
// (in Q30) by definition, and since the solution is often a low value for
// |iat_index|, it is more efficient to start with |sum| = 1 and subtract
// elements from the start of the histogram.
size_t index = 0; // Start from the beginning of |iat_vector_|.
int sum = 1 << 30; // Assign to 1 in Q30.
sum -= iat_vector_[index]; // Ensure that target level is >= 1.
do {
// Subtract the probabilities one by one until the sum is no longer greater
// than limit_probability.
++index;
sum -= iat_vector_[index];
} while ((sum > limit_probability) && (index < iat_vector_.size() - 1));
// This is the base value for the target buffer level.
int target_level = static_cast<int>(index);
base_target_level_ = static_cast<int>(index);
// Update detector for delay peaks.
bool delay_peak_found = peak_detector_.Update(iat_packets, target_level);
if (delay_peak_found) {
target_level = std::max(target_level, peak_detector_.MaxPeakHeight());
}
// Sanity check. |target_level| must be strictly positive.
target_level = std::max(target_level, 1);
// Scale to Q8 and assign to member variable.
target_level_ = target_level << 8;
return target_level_;
}
int DelayManager::SetPacketAudioLength(int length_ms) {
if (length_ms <= 0) {
RTC_LOG_F(LS_ERROR) << "length_ms = " << length_ms;
return -1;
}
if (frame_length_change_experiment_ && packet_len_ms_ != length_ms) {
iat_vector_ = ScaleHistogram(iat_vector_, packet_len_ms_, length_ms);
}
packet_len_ms_ = length_ms;
peak_detector_.SetPacketAudioLength(packet_len_ms_);
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove?
return 0;
}
void DelayManager::Reset() {
packet_len_ms_ = 0; // Packet size unknown.
streaming_mode_ = false;
peak_detector_.Reset();
ResetHistogram(); // Resets target levels too.
iat_factor_ = 0; // Adapt the histogram faster for the first few packets.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
iat_cumulative_sum_ = 0;
max_iat_cumulative_sum_ = 0;
last_pack_cng_or_dtmf_ = 1;
}
double DelayManager::EstimatedClockDriftPpm() const {
double sum = 0.0;
// Calculate the expected value based on the probabilities in |iat_vector_|.
for (size_t i = 0; i < iat_vector_.size(); ++i) {
sum += static_cast<double>(iat_vector_[i]) * i;
}
// The probabilities in |iat_vector_| are in Q30. Divide by 1 << 30 to convert
// to Q0; subtract the nominal inter-arrival time (1) to make a zero
// clockdrift represent as 0; mulitply by 1000000 to produce parts-per-million
// (ppm).
return (sum / (1 << 30) - 1) * 1e6;
}
bool DelayManager::PeakFound() const {
return peak_detector_.peak_found();
}
void DelayManager::ResetPacketIatCount() {
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
}
// Note that |low_limit| and |higher_limit| are not assigned to
// |minimum_delay_ms_| and |maximum_delay_ms_| defined by the client of this
// class. They are computed from |target_level_| and used for decision making.
void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const {
if (!lower_limit || !higher_limit) {
RTC_LOG_F(LS_ERROR) << "NULL pointers supplied as input";
assert(false);
return;
}
int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness.
if (packet_len_ms_ > 0) {
window_20ms = (20 << 8) / packet_len_ms_;
}
// |target_level_| is in Q8 already.
*lower_limit = (target_level_ * 3) / 4;
// |higher_limit| is equal to |target_level_|, but should at
// least be 20 ms higher than |lower_limit_|.
*higher_limit = std::max(target_level_, *lower_limit + window_20ms);
}
int DelayManager::TargetLevel() const {
return target_level_;
}
void DelayManager::LastDecodedWasCngOrDtmf(bool it_was) {
if (it_was) {
last_pack_cng_or_dtmf_ = 1;
} else if (last_pack_cng_or_dtmf_ != 0) {
last_pack_cng_or_dtmf_ = -1;
}
}
void DelayManager::RegisterEmptyPacket() {
++last_seq_no_;
}
DelayManager::IATVector DelayManager::ScaleHistogram(const IATVector& histogram,
int old_packet_length,
int new_packet_length) {
if (old_packet_length == 0) {
// If we don't know the previous frame length, don't make any changes to the
// histogram.
return histogram;
}
RTC_DCHECK_GT(new_packet_length, 0);
RTC_DCHECK_EQ(old_packet_length % 10, 0);
RTC_DCHECK_EQ(new_packet_length % 10, 0);
IATVector new_histogram(histogram.size(), 0);
int64_t acc = 0;
int time_counter = 0;
size_t new_histogram_idx = 0;
for (size_t i = 0; i < histogram.size(); i++) {
acc += histogram[i];
time_counter += old_packet_length;
// The bins should be scaled, to ensure the histogram still sums to one.
const int64_t scaled_acc = acc * new_packet_length / time_counter;
int64_t actually_used_acc = 0;
while (time_counter >= new_packet_length) {
const int64_t old_histogram_val = new_histogram[new_histogram_idx];
new_histogram[new_histogram_idx] =
rtc::saturated_cast<int>(old_histogram_val + scaled_acc);
actually_used_acc += new_histogram[new_histogram_idx] - old_histogram_val;
new_histogram_idx =
std::min(new_histogram_idx + 1, new_histogram.size() - 1);
time_counter -= new_packet_length;
}
// Only subtract the part that was succesfully written to the new histogram.
acc -= actually_used_acc;
}
// If there is anything left in acc (due to rounding errors), add it to the
// last bin. If we cannot add everything to the last bin we need to add as
// much as possible to the bins after the last bin (this is only possible
// when compressing a histogram).
while (acc > 0 && new_histogram_idx < new_histogram.size()) {
const int64_t old_histogram_val = new_histogram[new_histogram_idx];
new_histogram[new_histogram_idx] =
rtc::saturated_cast<int>(old_histogram_val + acc);
acc -= new_histogram[new_histogram_idx] - old_histogram_val;
new_histogram_idx++;
}
RTC_DCHECK_EQ(histogram.size(), new_histogram.size());
if (acc == 0) {
// If acc is non-zero, we were not able to add everything to the new
// histogram, so this check will not hold.
RTC_DCHECK_EQ(accumulate(histogram.begin(), histogram.end(), 0ll),
accumulate(new_histogram.begin(), new_histogram.end(), 0ll));
}
return new_histogram;
}
bool DelayManager::SetMinimumDelay(int delay_ms) {
// Minimum delay shouldn't be more than maximum delay, if any maximum is set.
// Also, if possible check |delay| to less than 75% of
// |max_packets_in_buffer_|.
if ((maximum_delay_ms_ > 0 && delay_ms > maximum_delay_ms_) ||
(packet_len_ms_ > 0 &&
delay_ms >
static_cast<int>(3 * max_packets_in_buffer_ * packet_len_ms_ / 4))) {
return false;
}
minimum_delay_ms_ = delay_ms;
return true;
}
bool DelayManager::SetMaximumDelay(int delay_ms) {
if (delay_ms == 0) {
// Zero input unsets the maximum delay.
maximum_delay_ms_ = 0;
return true;
} else if (delay_ms < minimum_delay_ms_ || delay_ms < packet_len_ms_) {
// Maximum delay shouldn't be less than minimum delay or less than a packet.
return false;
}
maximum_delay_ms_ = delay_ms;
return true;
}
int DelayManager::base_target_level() const {
return base_target_level_;
}
void DelayManager::set_streaming_mode(bool value) {
streaming_mode_ = value;
}
int DelayManager::last_pack_cng_or_dtmf() const {
return last_pack_cng_or_dtmf_;
}
void DelayManager::set_last_pack_cng_or_dtmf(int value) {
last_pack_cng_or_dtmf_ = value;
}
} // namespace webrtc