|  | /* | 
|  | *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS.  All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  | #include "modules/audio_coding/neteq/decision_logic.h" | 
|  |  | 
|  | #include <assert.h> | 
|  | #include <stdio.h> | 
|  |  | 
|  | #include <string> | 
|  |  | 
|  | #include "absl/types/optional.h" | 
|  | #include "modules/audio_coding/neteq/packet_buffer.h" | 
|  | #include "rtc_base/checks.h" | 
|  | #include "rtc_base/experiments/field_trial_parser.h" | 
|  | #include "rtc_base/logging.h" | 
|  | #include "rtc_base/numerics/safe_conversions.h" | 
|  | #include "system_wrappers/include/field_trial.h" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | constexpr int kPostponeDecodingLevel = 50; | 
|  | constexpr int kDefaultTargetLevelWindowMs = 100; | 
|  | constexpr int kDecelerationTargetLevelOffsetMs = 85; | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | namespace webrtc { | 
|  |  | 
|  | DecisionLogic::DecisionLogic(NetEqController::Config config) | 
|  | : delay_manager_(DelayManager::Create(config.max_packets_in_buffer, | 
|  | config.base_min_delay_ms, | 
|  | config.tick_timer)), | 
|  | tick_timer_(config.tick_timer), | 
|  | disallow_time_stretching_(!config.allow_time_stretching), | 
|  | timescale_countdown_( | 
|  | tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), | 
|  | estimate_dtx_delay_("estimate_dtx_delay", false), | 
|  | time_stretch_cn_("time_stretch_cn", false), | 
|  | target_level_window_ms_("target_level_window", | 
|  | kDefaultTargetLevelWindowMs, | 
|  | 0, | 
|  | absl::nullopt) { | 
|  | const std::string field_trial_name = | 
|  | field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings"); | 
|  | ParseFieldTrial( | 
|  | {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_}, | 
|  | field_trial_name); | 
|  | RTC_LOG(LS_INFO) << "NetEq decision logic settings:" | 
|  | " estimate_dtx_delay=" | 
|  | << estimate_dtx_delay_ | 
|  | << " time_stretch_cn=" << time_stretch_cn_ | 
|  | << " target_level_window_ms=" << target_level_window_ms_; | 
|  | } | 
|  |  | 
|  | DecisionLogic::~DecisionLogic() = default; | 
|  |  | 
|  | void DecisionLogic::Reset() { | 
|  | cng_state_ = kCngOff; | 
|  | noise_fast_forward_ = 0; | 
|  | packet_length_samples_ = 0; | 
|  | sample_memory_ = 0; | 
|  | prev_time_scale_ = false; | 
|  | last_pack_cng_or_dtmf_ = true; | 
|  | timescale_countdown_.reset(); | 
|  | num_consecutive_expands_ = 0; | 
|  | time_stretched_cn_samples_ = 0; | 
|  | } | 
|  |  | 
|  | void DecisionLogic::SoftReset() { | 
|  | packet_length_samples_ = 0; | 
|  | sample_memory_ = 0; | 
|  | prev_time_scale_ = false; | 
|  | last_pack_cng_or_dtmf_ = true; | 
|  | timescale_countdown_ = | 
|  | tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); | 
|  | time_stretched_cn_samples_ = 0; | 
|  | delay_manager_->Reset(); | 
|  | buffer_level_filter_.Reset(); | 
|  | } | 
|  |  | 
|  | void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { | 
|  | // TODO(hlundin): Change to an enumerator and skip assert. | 
|  | assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); | 
|  | sample_rate_ = fs_hz; | 
|  | output_size_samples_ = output_size_samples; | 
|  | } | 
|  |  | 
|  | NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, | 
|  | bool* reset_decoder) { | 
|  | // If last mode was CNG (or Expand, since this could be covering up for | 
|  | // a lost CNG packet), remember that CNG is on. This is needed if comfort | 
|  | // noise is interrupted by DTMF. | 
|  | if (status.last_mode == NetEq::Mode::kRfc3389Cng) { | 
|  | cng_state_ = kCngRfc3389On; | 
|  | } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) { | 
|  | cng_state_ = kCngInternalOn; | 
|  | } | 
|  |  | 
|  | size_t cur_size_samples = estimate_dtx_delay_ | 
|  | ? status.packet_buffer_info.span_samples | 
|  | : status.packet_buffer_info.num_samples; | 
|  | prev_time_scale_ = | 
|  | prev_time_scale_ && | 
|  | (status.last_mode == NetEq::Mode::kAccelerateSuccess || | 
|  | status.last_mode == NetEq::Mode::kAccelerateLowEnergy || | 
|  | status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess || | 
|  | status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy); | 
|  |  | 
|  | // Do not update buffer history if currently playing CNG since it will bias | 
|  | // the filtered buffer level. | 
|  | if (status.last_mode != NetEq::Mode::kRfc3389Cng && | 
|  | status.last_mode != NetEq::Mode::kCodecInternalCng && | 
|  | !(status.next_packet && status.next_packet->is_dtx && | 
|  | !estimate_dtx_delay_)) { | 
|  | FilterBufferLevel(cur_size_samples); | 
|  | } | 
|  |  | 
|  | // Guard for errors, to avoid getting stuck in error mode. | 
|  | if (status.last_mode == NetEq::Mode::kError) { | 
|  | if (!status.next_packet) { | 
|  | return NetEq::Operation::kExpand; | 
|  | } else { | 
|  | // Use kUndefined to flag for a reset. | 
|  | return NetEq::Operation::kUndefined; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (status.next_packet && status.next_packet->is_cng) { | 
|  | return CngOperation(status.last_mode, status.target_timestamp, | 
|  | status.next_packet->timestamp, | 
|  | status.generated_noise_samples); | 
|  | } | 
|  |  | 
|  | // Handle the case with no packet at all available (except maybe DTMF). | 
|  | if (!status.next_packet) { | 
|  | return NoPacket(status.play_dtmf); | 
|  | } | 
|  |  | 
|  | // If the expand period was very long, reset NetEQ since it is likely that the | 
|  | // sender was restarted. | 
|  | if (num_consecutive_expands_ > kReinitAfterExpands) { | 
|  | *reset_decoder = true; | 
|  | return NetEq::Operation::kNormal; | 
|  | } | 
|  |  | 
|  | // Make sure we don't restart audio too soon after an expansion to avoid | 
|  | // running out of data right away again. We should only wait if there are no | 
|  | // DTX or CNG packets in the buffer (otherwise we should just play out what we | 
|  | // have, since we cannot know the exact duration of DTX or CNG packets), and | 
|  | // if the mute factor is low enough (otherwise the expansion was short enough | 
|  | // to not be noticable). | 
|  | // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. | 
|  | const size_t current_span = | 
|  | estimate_dtx_delay_ ? status.packet_buffer_info.span_samples | 
|  | : status.packet_buffer_info.span_samples_no_dtx; | 
|  | const int target_level_samples = | 
|  | delay_manager_->TargetDelayMs() * sample_rate_ / 1000; | 
|  | if ((status.last_mode == NetEq::Mode::kExpand || | 
|  | status.last_mode == NetEq::Mode::kCodecPlc) && | 
|  | status.expand_mutefactor < 16384 / 2 && | 
|  | current_span < static_cast<size_t>(target_level_samples * | 
|  | kPostponeDecodingLevel / 100) && | 
|  | !status.packet_buffer_info.dtx_or_cng) { | 
|  | return NetEq::Operation::kExpand; | 
|  | } | 
|  |  | 
|  | const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_); | 
|  | // Check if the required packet is available. | 
|  | if (status.target_timestamp == status.next_packet->timestamp) { | 
|  | return ExpectedPacketAvailable(status.last_mode, status.play_dtmf); | 
|  | } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, | 
|  | status.target_timestamp, | 
|  | five_seconds_samples)) { | 
|  | return FuturePacketAvailable( | 
|  | status.last_packet_samples, status.last_mode, status.target_timestamp, | 
|  | status.next_packet->timestamp, status.play_dtmf, | 
|  | status.generated_noise_samples, status.packet_buffer_info.span_samples, | 
|  | status.packet_buffer_info.num_packets); | 
|  | } else { | 
|  | // This implies that available_timestamp < target_timestamp, which can | 
|  | // happen when a new stream or codec is received. Signal for a reset. | 
|  | return NetEq::Operation::kUndefined; | 
|  | } | 
|  | } | 
|  |  | 
|  | void DecisionLogic::ExpandDecision(NetEq::Operation operation) { | 
|  | if (operation == NetEq::Operation::kExpand) { | 
|  | num_consecutive_expands_++; | 
|  | } else { | 
|  | num_consecutive_expands_ = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | absl::optional<int> DecisionLogic::PacketArrived( | 
|  | int fs_hz, | 
|  | bool should_update_stats, | 
|  | const PacketArrivedInfo& info) { | 
|  | if (info.is_cng_or_dtmf) { | 
|  | last_pack_cng_or_dtmf_ = true; | 
|  | return absl::nullopt; | 
|  | } | 
|  | if (!should_update_stats) { | 
|  | return absl::nullopt; | 
|  | } | 
|  | if (info.packet_length_samples > 0 && fs_hz > 0 && | 
|  | info.packet_length_samples != packet_length_samples_) { | 
|  | packet_length_samples_ = info.packet_length_samples; | 
|  | delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); | 
|  | } | 
|  | auto relative_delay = delay_manager_->Update( | 
|  | info.main_timestamp, fs_hz, /*reset=*/last_pack_cng_or_dtmf_); | 
|  | last_pack_cng_or_dtmf_ = false; | 
|  | return relative_delay; | 
|  | } | 
|  |  | 
|  | void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { | 
|  | buffer_level_filter_.SetTargetBufferLevel(delay_manager_->TargetDelayMs()); | 
|  |  | 
|  | int time_stretched_samples = time_stretched_cn_samples_; | 
|  | if (prev_time_scale_) { | 
|  | time_stretched_samples += sample_memory_; | 
|  | timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); | 
|  | } | 
|  |  | 
|  | buffer_level_filter_.Update(buffer_size_samples, time_stretched_samples); | 
|  | prev_time_scale_ = false; | 
|  | time_stretched_cn_samples_ = 0; | 
|  | } | 
|  |  | 
|  | NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, | 
|  | uint32_t target_timestamp, | 
|  | uint32_t available_timestamp, | 
|  | size_t generated_noise_samples) { | 
|  | // Signed difference between target and available timestamp. | 
|  | int32_t timestamp_diff = static_cast<int32_t>( | 
|  | static_cast<uint32_t>(generated_noise_samples + target_timestamp) - | 
|  | available_timestamp); | 
|  | int optimal_level_samp = | 
|  | delay_manager_->TargetDelayMs() * sample_rate_ / 1000; | 
|  | const int64_t excess_waiting_time_samp = | 
|  | -static_cast<int64_t>(timestamp_diff) - optimal_level_samp; | 
|  |  | 
|  | if (excess_waiting_time_samp > optimal_level_samp / 2) { | 
|  | // The waiting time for this packet will be longer than 1.5 | 
|  | // times the wanted buffer delay. Apply fast-forward to cut the | 
|  | // waiting time down to the optimal. | 
|  | noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ + | 
|  | excess_waiting_time_samp); | 
|  | timestamp_diff = | 
|  | rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp); | 
|  | } | 
|  |  | 
|  | if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) { | 
|  | // Not time to play this packet yet. Wait another round before using this | 
|  | // packet. Keep on playing CNG from previous CNG parameters. | 
|  | return NetEq::Operation::kRfc3389CngNoPacket; | 
|  | } else { | 
|  | // Otherwise, go for the CNG packet now. | 
|  | noise_fast_forward_ = 0; | 
|  | return NetEq::Operation::kRfc3389Cng; | 
|  | } | 
|  | } | 
|  |  | 
|  | NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) { | 
|  | if (cng_state_ == kCngRfc3389On) { | 
|  | // Keep on playing comfort noise. | 
|  | return NetEq::Operation::kRfc3389CngNoPacket; | 
|  | } else if (cng_state_ == kCngInternalOn) { | 
|  | // Keep on playing codec internal comfort noise. | 
|  | return NetEq::Operation::kCodecInternalCng; | 
|  | } else if (play_dtmf) { | 
|  | return NetEq::Operation::kDtmf; | 
|  | } else { | 
|  | // Nothing to play, do expand. | 
|  | return NetEq::Operation::kExpand; | 
|  | } | 
|  | } | 
|  |  | 
|  | NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode, | 
|  | bool play_dtmf) { | 
|  | if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand && | 
|  | !play_dtmf) { | 
|  | const int samples_per_ms = sample_rate_ / 1000; | 
|  | const int target_level_samples = | 
|  | delay_manager_->TargetDelayMs() * samples_per_ms; | 
|  | const int low_limit = | 
|  | std::max(target_level_samples * 3 / 4, | 
|  | target_level_samples - | 
|  | kDecelerationTargetLevelOffsetMs * samples_per_ms); | 
|  | // |higher_limit| is equal to |target_level|, but should at | 
|  | // least be 20 ms higher than |lower_limit|. | 
|  | const int high_limit = | 
|  | std::max(target_level_samples, low_limit + 20 * samples_per_ms); | 
|  |  | 
|  | const int buffer_level_samples = | 
|  | buffer_level_filter_.filtered_current_level(); | 
|  | if (buffer_level_samples >= high_limit << 2) | 
|  | return NetEq::Operation::kFastAccelerate; | 
|  | if (TimescaleAllowed()) { | 
|  | if (buffer_level_samples >= high_limit) | 
|  | return NetEq::Operation::kAccelerate; | 
|  | if (buffer_level_samples < low_limit) | 
|  | return NetEq::Operation::kPreemptiveExpand; | 
|  | } | 
|  | } | 
|  | return NetEq::Operation::kNormal; | 
|  | } | 
|  |  | 
|  | NetEq::Operation DecisionLogic::FuturePacketAvailable( | 
|  | size_t decoder_frame_length, | 
|  | NetEq::Mode prev_mode, | 
|  | uint32_t target_timestamp, | 
|  | uint32_t available_timestamp, | 
|  | bool play_dtmf, | 
|  | size_t generated_noise_samples, | 
|  | size_t span_samples_in_packet_buffer, | 
|  | size_t num_packets_in_packet_buffer) { | 
|  | // Required packet is not available, but a future packet is. | 
|  | // Check if we should continue with an ongoing expand because the new packet | 
|  | // is too far into the future. | 
|  | uint32_t timestamp_leap = available_timestamp - target_timestamp; | 
|  | if ((prev_mode == NetEq::Mode::kExpand || | 
|  | prev_mode == NetEq::Mode::kCodecPlc) && | 
|  | !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && | 
|  | PacketTooEarly(timestamp_leap) && UnderTargetLevel()) { | 
|  | if (play_dtmf) { | 
|  | // Still have DTMF to play, so do not do expand. | 
|  | return NetEq::Operation::kDtmf; | 
|  | } else { | 
|  | // Nothing to play. | 
|  | return NetEq::Operation::kExpand; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (prev_mode == NetEq::Mode::kCodecPlc) { | 
|  | return NetEq::Operation::kNormal; | 
|  | } | 
|  |  | 
|  | // If previous was comfort noise, then no merge is needed. | 
|  | if (prev_mode == NetEq::Mode::kRfc3389Cng || | 
|  | prev_mode == NetEq::Mode::kCodecInternalCng) { | 
|  | size_t cur_size_samples = | 
|  | estimate_dtx_delay_ | 
|  | ? span_samples_in_packet_buffer | 
|  | : num_packets_in_packet_buffer * decoder_frame_length; | 
|  | // Target level is in number of packets in Q8. | 
|  | const size_t target_level_samples = | 
|  | delay_manager_->TargetDelayMs() * sample_rate_ / 1000; | 
|  | const bool generated_enough_noise = | 
|  | static_cast<uint32_t>(generated_noise_samples + target_timestamp) >= | 
|  | available_timestamp; | 
|  |  | 
|  | if (time_stretch_cn_) { | 
|  | const size_t target_threshold_samples = | 
|  | target_level_window_ms_ / 2 * (sample_rate_ / 1000); | 
|  | const bool above_target_window = | 
|  | cur_size_samples > target_level_samples + target_threshold_samples; | 
|  | const bool below_target_window = | 
|  | target_level_samples > target_threshold_samples && | 
|  | cur_size_samples < target_level_samples - target_threshold_samples; | 
|  | // Keep the delay same as before CNG, but make sure that it is within the | 
|  | // target window. | 
|  | if ((generated_enough_noise && !below_target_window) || | 
|  | above_target_window) { | 
|  | time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples; | 
|  | return NetEq::Operation::kNormal; | 
|  | } | 
|  | } else { | 
|  | // Keep the same delay as before the CNG, but make sure that the number of | 
|  | // samples in buffer is no higher than 4 times the optimal level. | 
|  | if (generated_enough_noise || | 
|  | cur_size_samples > target_level_samples * 4) { | 
|  | // Time to play this new packet. | 
|  | return NetEq::Operation::kNormal; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Too early to play this new packet; keep on playing comfort noise. | 
|  | if (prev_mode == NetEq::Mode::kRfc3389Cng) { | 
|  | return NetEq::Operation::kRfc3389CngNoPacket; | 
|  | } | 
|  | // prevPlayMode == kModeCodecInternalCng. | 
|  | return NetEq::Operation::kCodecInternalCng; | 
|  | } | 
|  |  | 
|  | // Do not merge unless we have done an expand before. | 
|  | if (prev_mode == NetEq::Mode::kExpand) { | 
|  | return NetEq::Operation::kMerge; | 
|  | } else if (play_dtmf) { | 
|  | // Play DTMF instead of expand. | 
|  | return NetEq::Operation::kDtmf; | 
|  | } else { | 
|  | return NetEq::Operation::kExpand; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool DecisionLogic::UnderTargetLevel() const { | 
|  | return buffer_level_filter_.filtered_current_level() < | 
|  | delay_manager_->TargetDelayMs() * sample_rate_ / 1000; | 
|  | } | 
|  |  | 
|  | bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { | 
|  | return timestamp_leap >= | 
|  | static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands); | 
|  | } | 
|  |  | 
|  | bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { | 
|  | return timestamp_leap > | 
|  | static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_); | 
|  | } | 
|  |  | 
|  | bool DecisionLogic::MaxWaitForPacket() const { | 
|  | return num_consecutive_expands_ >= kMaxWaitForPacket; | 
|  | } | 
|  |  | 
|  | }  // namespace webrtc |