| /* |
| * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_coding/neteq/decision_logic.h" |
| |
| #include <assert.h> |
| #include <stdio.h> |
| #include <string> |
| |
| #include "absl/types/optional.h" |
| #include "modules/audio_coding/neteq/buffer_level_filter.h" |
| #include "modules/audio_coding/neteq/decoder_database.h" |
| #include "modules/audio_coding/neteq/delay_manager.h" |
| #include "modules/audio_coding/neteq/expand.h" |
| #include "modules/audio_coding/neteq/packet_buffer.h" |
| #include "modules/audio_coding/neteq/sync_buffer.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/experiments/field_trial_parser.h" |
| #include "rtc_base/logging.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| #include "system_wrappers/include/field_trial.h" |
| |
| namespace { |
| |
| constexpr int kPostponeDecodingLevel = 50; |
| constexpr int kDefaultTargetLevelWindowMs = 100; |
| |
| } // namespace |
| |
| namespace webrtc { |
| |
| DecisionLogic* DecisionLogic::Create(int fs_hz, |
| size_t output_size_samples, |
| bool disallow_time_stretching, |
| DecoderDatabase* decoder_database, |
| const PacketBuffer& packet_buffer, |
| DelayManager* delay_manager, |
| BufferLevelFilter* buffer_level_filter, |
| const TickTimer* tick_timer) { |
| return new DecisionLogic(fs_hz, output_size_samples, disallow_time_stretching, |
| decoder_database, packet_buffer, delay_manager, |
| buffer_level_filter, tick_timer); |
| } |
| |
| DecisionLogic::DecisionLogic(int fs_hz, |
| size_t output_size_samples, |
| bool disallow_time_stretching, |
| DecoderDatabase* decoder_database, |
| const PacketBuffer& packet_buffer, |
| DelayManager* delay_manager, |
| BufferLevelFilter* buffer_level_filter, |
| const TickTimer* tick_timer) |
| : decoder_database_(decoder_database), |
| packet_buffer_(packet_buffer), |
| delay_manager_(delay_manager), |
| buffer_level_filter_(buffer_level_filter), |
| tick_timer_(tick_timer), |
| cng_state_(kCngOff), |
| packet_length_samples_(0), |
| sample_memory_(0), |
| prev_time_scale_(false), |
| disallow_time_stretching_(disallow_time_stretching), |
| timescale_countdown_( |
| tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), |
| num_consecutive_expands_(0), |
| time_stretched_cn_samples_(0), |
| estimate_dtx_delay_("estimate_dtx_delay", false), |
| time_stretch_cn_("time_stretch_cn", false), |
| target_level_window_ms_("target_level_window", |
| kDefaultTargetLevelWindowMs, |
| 0, |
| absl::nullopt) { |
| SetSampleRate(fs_hz, output_size_samples); |
| const std::string field_trial_name = |
| field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings"); |
| ParseFieldTrial( |
| {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_}, |
| field_trial_name); |
| RTC_LOG(LS_INFO) << "NetEq decision logic settings:" |
| << " estimate_dtx_delay=" << estimate_dtx_delay_ |
| << " time_stretch_cn=" << time_stretch_cn_ |
| << " target_level_window_ms=" << target_level_window_ms_; |
| } |
| |
| DecisionLogic::~DecisionLogic() = default; |
| |
| void DecisionLogic::Reset() { |
| cng_state_ = kCngOff; |
| noise_fast_forward_ = 0; |
| packet_length_samples_ = 0; |
| sample_memory_ = 0; |
| prev_time_scale_ = false; |
| timescale_countdown_.reset(); |
| num_consecutive_expands_ = 0; |
| time_stretched_cn_samples_ = 0; |
| } |
| |
| void DecisionLogic::SoftReset() { |
| packet_length_samples_ = 0; |
| sample_memory_ = 0; |
| prev_time_scale_ = false; |
| timescale_countdown_ = |
| tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); |
| time_stretched_cn_samples_ = 0; |
| } |
| |
| void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { |
| // TODO(hlundin): Change to an enumerator and skip assert. |
| assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); |
| sample_rate_ = fs_hz; |
| output_size_samples_ = output_size_samples; |
| } |
| |
| Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, |
| const Expand& expand, |
| size_t decoder_frame_length, |
| const Packet* next_packet, |
| Modes prev_mode, |
| bool play_dtmf, |
| size_t generated_noise_samples, |
| bool* reset_decoder) { |
| // If last mode was CNG (or Expand, since this could be covering up for |
| // a lost CNG packet), remember that CNG is on. This is needed if comfort |
| // noise is interrupted by DTMF. |
| if (prev_mode == kModeRfc3389Cng) { |
| cng_state_ = kCngRfc3389On; |
| } else if (prev_mode == kModeCodecInternalCng) { |
| cng_state_ = kCngInternalOn; |
| } |
| |
| size_t cur_size_samples = |
| estimate_dtx_delay_ |
| ? packet_buffer_.GetSpanSamples(decoder_frame_length, sample_rate_, |
| true) |
| : packet_buffer_.NumSamplesInBuffer(decoder_frame_length); |
| |
| prev_time_scale_ = |
| prev_time_scale_ && (prev_mode == kModeAccelerateSuccess || |
| prev_mode == kModeAccelerateLowEnergy || |
| prev_mode == kModePreemptiveExpandSuccess || |
| prev_mode == kModePreemptiveExpandLowEnergy); |
| |
| // Do not update buffer history if currently playing CNG since it will bias |
| // the filtered buffer level. |
| if (prev_mode != kModeRfc3389Cng && prev_mode != kModeCodecInternalCng && |
| !(next_packet && next_packet->frame && |
| next_packet->frame->IsDtxPacket() && !estimate_dtx_delay_)) { |
| FilterBufferLevel(cur_size_samples); |
| } |
| |
| // Guard for errors, to avoid getting stuck in error mode. |
| if (prev_mode == kModeError) { |
| if (!next_packet) { |
| return kExpand; |
| } else { |
| return kUndefined; // Use kUndefined to flag for a reset. |
| } |
| } |
| |
| uint32_t target_timestamp = sync_buffer.end_timestamp(); |
| uint32_t available_timestamp = 0; |
| bool is_cng_packet = false; |
| if (next_packet) { |
| available_timestamp = next_packet->timestamp; |
| is_cng_packet = |
| decoder_database_->IsComfortNoise(next_packet->payload_type); |
| } |
| |
| if (is_cng_packet) { |
| return CngOperation(prev_mode, target_timestamp, available_timestamp, |
| generated_noise_samples); |
| } |
| |
| // Handle the case with no packet at all available (except maybe DTMF). |
| if (!next_packet) { |
| return NoPacket(play_dtmf); |
| } |
| |
| // If the expand period was very long, reset NetEQ since it is likely that the |
| // sender was restarted. |
| if (num_consecutive_expands_ > kReinitAfterExpands) { |
| *reset_decoder = true; |
| return kNormal; |
| } |
| |
| // Make sure we don't restart audio too soon after an expansion to avoid |
| // running out of data right away again. We should only wait if there are no |
| // DTX or CNG packets in the buffer (otherwise we should just play out what we |
| // have, since we cannot know the exact duration of DTX or CNG packets), and |
| // if the mute factor is low enough (otherwise the expansion was short enough |
| // to not be noticable). |
| // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. |
| size_t current_span = packet_buffer_.GetSpanSamples( |
| decoder_frame_length, sample_rate_, estimate_dtx_delay_); |
| if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) && |
| expand.MuteFactor(0) < 16384 / 2 && |
| current_span < static_cast<size_t>(delay_manager_->TargetLevel() * |
| packet_length_samples_ * |
| kPostponeDecodingLevel / 100)>> 8 && |
| !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_)) { |
| return kExpand; |
| } |
| |
| const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_); |
| // Check if the required packet is available. |
| if (target_timestamp == available_timestamp) { |
| return ExpectedPacketAvailable(prev_mode, play_dtmf); |
| } else if (!PacketBuffer::IsObsoleteTimestamp( |
| available_timestamp, target_timestamp, five_seconds_samples)) { |
| return FuturePacketAvailable(decoder_frame_length, prev_mode, |
| target_timestamp, available_timestamp, |
| play_dtmf, generated_noise_samples); |
| } else { |
| // This implies that available_timestamp < target_timestamp, which can |
| // happen when a new stream or codec is received. Signal for a reset. |
| return kUndefined; |
| } |
| } |
| |
| void DecisionLogic::ExpandDecision(Operations operation) { |
| if (operation == kExpand) { |
| num_consecutive_expands_++; |
| } else { |
| num_consecutive_expands_ = 0; |
| } |
| } |
| |
| void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { |
| buffer_level_filter_->SetTargetBufferLevel( |
| delay_manager_->base_target_level()); |
| |
| int time_stretched_samples = time_stretched_cn_samples_; |
| if (prev_time_scale_) { |
| time_stretched_samples += sample_memory_; |
| timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); |
| } |
| |
| buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples); |
| prev_time_scale_ = false; |
| time_stretched_cn_samples_ = 0; |
| } |
| |
| Operations DecisionLogic::CngOperation(Modes prev_mode, |
| uint32_t target_timestamp, |
| uint32_t available_timestamp, |
| size_t generated_noise_samples) { |
| // Signed difference between target and available timestamp. |
| int32_t timestamp_diff = static_cast<int32_t>( |
| static_cast<uint32_t>(generated_noise_samples + target_timestamp) - |
| available_timestamp); |
| int32_t optimal_level_samp = static_cast<int32_t>( |
| (delay_manager_->TargetLevel() * packet_length_samples_) >> 8); |
| const int64_t excess_waiting_time_samp = |
| -static_cast<int64_t>(timestamp_diff) - optimal_level_samp; |
| |
| if (excess_waiting_time_samp > optimal_level_samp / 2) { |
| // The waiting time for this packet will be longer than 1.5 |
| // times the wanted buffer delay. Apply fast-forward to cut the |
| // waiting time down to the optimal. |
| noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ + |
| excess_waiting_time_samp); |
| timestamp_diff = |
| rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp); |
| } |
| |
| if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) { |
| // Not time to play this packet yet. Wait another round before using this |
| // packet. Keep on playing CNG from previous CNG parameters. |
| return kRfc3389CngNoPacket; |
| } else { |
| // Otherwise, go for the CNG packet now. |
| noise_fast_forward_ = 0; |
| return kRfc3389Cng; |
| } |
| } |
| |
| Operations DecisionLogic::NoPacket(bool play_dtmf) { |
| if (cng_state_ == kCngRfc3389On) { |
| // Keep on playing comfort noise. |
| return kRfc3389CngNoPacket; |
| } else if (cng_state_ == kCngInternalOn) { |
| // Keep on playing codec internal comfort noise. |
| return kCodecInternalCng; |
| } else if (play_dtmf) { |
| return kDtmf; |
| } else { |
| // Nothing to play, do expand. |
| return kExpand; |
| } |
| } |
| |
| Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode, |
| bool play_dtmf) { |
| if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) { |
| // Check criterion for time-stretching. The values are in number of packets |
| // in Q8. |
| int low_limit, high_limit; |
| delay_manager_->BufferLimits(&low_limit, &high_limit); |
| int buffer_level_packets = 0; |
| if (packet_length_samples_ > 0) { |
| buffer_level_packets = |
| ((1 << 8) * buffer_level_filter_->filtered_current_level()) / |
| packet_length_samples_; |
| } |
| if (buffer_level_packets >= high_limit << 2) |
| return kFastAccelerate; |
| if (TimescaleAllowed()) { |
| if (buffer_level_packets >= high_limit) |
| return kAccelerate; |
| if (buffer_level_packets < low_limit) |
| return kPreemptiveExpand; |
| } |
| } |
| return kNormal; |
| } |
| |
| Operations DecisionLogic::FuturePacketAvailable( |
| size_t decoder_frame_length, |
| Modes prev_mode, |
| uint32_t target_timestamp, |
| uint32_t available_timestamp, |
| bool play_dtmf, |
| size_t generated_noise_samples) { |
| // Required packet is not available, but a future packet is. |
| // Check if we should continue with an ongoing expand because the new packet |
| // is too far into the future. |
| uint32_t timestamp_leap = available_timestamp - target_timestamp; |
| if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) && |
| !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && |
| PacketTooEarly(timestamp_leap) && UnderTargetLevel()) { |
| if (play_dtmf) { |
| // Still have DTMF to play, so do not do expand. |
| return kDtmf; |
| } else { |
| // Nothing to play. |
| return kExpand; |
| } |
| } |
| |
| if (prev_mode == kModeCodecPlc) { |
| return kNormal; |
| } |
| |
| // If previous was comfort noise, then no merge is needed. |
| if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) { |
| size_t cur_size_samples = |
| estimate_dtx_delay_ |
| ? cur_size_samples = packet_buffer_.GetSpanSamples( |
| decoder_frame_length, sample_rate_, true) |
| : packet_buffer_.NumPacketsInBuffer() * decoder_frame_length; |
| // Target level is in number of packets in Q8. |
| const size_t target_level_samples = |
| (delay_manager_->TargetLevel() * packet_length_samples_) >> 8; |
| const bool generated_enough_noise = |
| static_cast<uint32_t>(generated_noise_samples + target_timestamp) >= |
| available_timestamp; |
| |
| if (time_stretch_cn_) { |
| const size_t target_threshold_samples = |
| target_level_window_ms_ / 2 * (sample_rate_ / 1000); |
| const bool above_target_window = |
| cur_size_samples > target_level_samples + target_threshold_samples; |
| const bool below_target_window = |
| target_level_samples > target_threshold_samples && |
| cur_size_samples < target_level_samples - target_threshold_samples; |
| // Keep the delay same as before CNG, but make sure that it is within the |
| // target window. |
| if ((generated_enough_noise && !below_target_window) || |
| above_target_window) { |
| time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples; |
| return kNormal; |
| } |
| } else { |
| // Keep the same delay as before the CNG, but make sure that the number of |
| // samples in buffer is no higher than 4 times the optimal level. |
| if (generated_enough_noise || |
| cur_size_samples > target_level_samples * 4) { |
| // Time to play this new packet. |
| return kNormal; |
| } |
| } |
| |
| // Too early to play this new packet; keep on playing comfort noise. |
| if (prev_mode == kModeRfc3389Cng) { |
| return kRfc3389CngNoPacket; |
| } |
| // prevPlayMode == kModeCodecInternalCng. |
| return kCodecInternalCng; |
| } |
| |
| // Do not merge unless we have done an expand before. |
| if (prev_mode == kModeExpand) { |
| return kMerge; |
| } else if (play_dtmf) { |
| // Play DTMF instead of expand. |
| return kDtmf; |
| } else { |
| return kExpand; |
| } |
| } |
| |
| bool DecisionLogic::UnderTargetLevel() const { |
| int buffer_level_packets = 0; |
| if (packet_length_samples_ > 0) { |
| buffer_level_packets = |
| ((1 << 8) * buffer_level_filter_->filtered_current_level()) / |
| packet_length_samples_; |
| } |
| return buffer_level_packets <= delay_manager_->TargetLevel(); |
| } |
| |
| bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { |
| return timestamp_leap >= |
| static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands); |
| } |
| |
| bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { |
| return timestamp_leap > |
| static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_); |
| } |
| |
| bool DecisionLogic::MaxWaitForPacket() const { |
| return num_consecutive_expands_ >= kMaxWaitForPacket; |
| } |
| |
| } // namespace webrtc |