| /* |
| * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_coding/neteq/decision_logic.h" |
| |
| #include <assert.h> |
| #include <stdio.h> |
| |
| #include <string> |
| |
| #include "absl/types/optional.h" |
| #include "modules/audio_coding/neteq/packet_buffer.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/experiments/field_trial_parser.h" |
| #include "rtc_base/logging.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| #include "system_wrappers/include/field_trial.h" |
| |
| namespace { |
| |
| constexpr int kPostponeDecodingLevel = 50; |
| constexpr int kDefaultTargetLevelWindowMs = 100; |
| constexpr int kDecelerationTargetLevelOffsetMs = 85; |
| |
| } // namespace |
| |
| namespace webrtc { |
| |
| DecisionLogic::DecisionLogic(NetEqController::Config config) |
| : DecisionLogic(config, |
| DelayManager::Create(config.max_packets_in_buffer, |
| config.base_min_delay_ms, |
| config.tick_timer), |
| std::make_unique<BufferLevelFilter>()) {} |
| |
| DecisionLogic::DecisionLogic( |
| NetEqController::Config config, |
| std::unique_ptr<DelayManager> delay_manager, |
| std::unique_ptr<BufferLevelFilter> buffer_level_filter) |
| : delay_manager_(std::move(delay_manager)), |
| buffer_level_filter_(std::move(buffer_level_filter)), |
| tick_timer_(config.tick_timer), |
| disallow_time_stretching_(!config.allow_time_stretching), |
| timescale_countdown_( |
| tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), |
| estimate_dtx_delay_("estimate_dtx_delay", true), |
| time_stretch_cn_("time_stretch_cn", true), |
| target_level_window_ms_("target_level_window", |
| kDefaultTargetLevelWindowMs, |
| 0, |
| absl::nullopt) { |
| const std::string field_trial_name = |
| field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings"); |
| ParseFieldTrial( |
| {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_}, |
| field_trial_name); |
| RTC_LOG(LS_INFO) << "NetEq decision logic settings:" |
| " estimate_dtx_delay=" |
| << estimate_dtx_delay_ |
| << " time_stretch_cn=" << time_stretch_cn_ |
| << " target_level_window_ms=" << target_level_window_ms_; |
| } |
| |
| DecisionLogic::~DecisionLogic() = default; |
| |
| void DecisionLogic::Reset() { |
| cng_state_ = kCngOff; |
| noise_fast_forward_ = 0; |
| packet_length_samples_ = 0; |
| sample_memory_ = 0; |
| prev_time_scale_ = false; |
| last_pack_cng_or_dtmf_ = true; |
| timescale_countdown_.reset(); |
| num_consecutive_expands_ = 0; |
| time_stretched_cn_samples_ = 0; |
| } |
| |
| void DecisionLogic::SoftReset() { |
| packet_length_samples_ = 0; |
| sample_memory_ = 0; |
| prev_time_scale_ = false; |
| last_pack_cng_or_dtmf_ = true; |
| timescale_countdown_ = |
| tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); |
| time_stretched_cn_samples_ = 0; |
| delay_manager_->Reset(); |
| buffer_level_filter_->Reset(); |
| } |
| |
| void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { |
| // TODO(hlundin): Change to an enumerator and skip assert. |
| assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); |
| sample_rate_ = fs_hz; |
| output_size_samples_ = output_size_samples; |
| } |
| |
| NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, |
| bool* reset_decoder) { |
| // If last mode was CNG (or Expand, since this could be covering up for |
| // a lost CNG packet), remember that CNG is on. This is needed if comfort |
| // noise is interrupted by DTMF. |
| if (status.last_mode == NetEq::Mode::kRfc3389Cng) { |
| cng_state_ = kCngRfc3389On; |
| } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) { |
| cng_state_ = kCngInternalOn; |
| } |
| |
| size_t cur_size_samples = estimate_dtx_delay_ |
| ? status.packet_buffer_info.span_samples |
| : status.packet_buffer_info.num_samples; |
| prev_time_scale_ = |
| prev_time_scale_ && |
| (status.last_mode == NetEq::Mode::kAccelerateSuccess || |
| status.last_mode == NetEq::Mode::kAccelerateLowEnergy || |
| status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess || |
| status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy); |
| |
| // Do not update buffer history if currently playing CNG since it will bias |
| // the filtered buffer level. |
| if (status.last_mode != NetEq::Mode::kRfc3389Cng && |
| status.last_mode != NetEq::Mode::kCodecInternalCng && |
| !(status.next_packet && status.next_packet->is_dtx && |
| !estimate_dtx_delay_)) { |
| FilterBufferLevel(cur_size_samples); |
| } |
| |
| // Guard for errors, to avoid getting stuck in error mode. |
| if (status.last_mode == NetEq::Mode::kError) { |
| if (!status.next_packet) { |
| return NetEq::Operation::kExpand; |
| } else { |
| // Use kUndefined to flag for a reset. |
| return NetEq::Operation::kUndefined; |
| } |
| } |
| |
| if (status.next_packet && status.next_packet->is_cng) { |
| return CngOperation(status.last_mode, status.target_timestamp, |
| status.next_packet->timestamp, |
| status.generated_noise_samples); |
| } |
| |
| // Handle the case with no packet at all available (except maybe DTMF). |
| if (!status.next_packet) { |
| return NoPacket(status.play_dtmf); |
| } |
| |
| // If the expand period was very long, reset NetEQ since it is likely that the |
| // sender was restarted. |
| if (num_consecutive_expands_ > kReinitAfterExpands) { |
| *reset_decoder = true; |
| return NetEq::Operation::kNormal; |
| } |
| |
| // Make sure we don't restart audio too soon after an expansion to avoid |
| // running out of data right away again. We should only wait if there are no |
| // DTX or CNG packets in the buffer (otherwise we should just play out what we |
| // have, since we cannot know the exact duration of DTX or CNG packets), and |
| // if the mute factor is low enough (otherwise the expansion was short enough |
| // to not be noticable). |
| // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. |
| const size_t current_span = |
| estimate_dtx_delay_ ? status.packet_buffer_info.span_samples |
| : status.packet_buffer_info.span_samples_no_dtx; |
| const int target_level_samples = |
| delay_manager_->TargetDelayMs() * sample_rate_ / 1000; |
| if ((status.last_mode == NetEq::Mode::kExpand || |
| status.last_mode == NetEq::Mode::kCodecPlc) && |
| status.expand_mutefactor < 16384 / 2 && |
| current_span < static_cast<size_t>(target_level_samples * |
| kPostponeDecodingLevel / 100) && |
| !status.packet_buffer_info.dtx_or_cng) { |
| return NetEq::Operation::kExpand; |
| } |
| |
| const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_); |
| // Check if the required packet is available. |
| if (status.target_timestamp == status.next_packet->timestamp) { |
| return ExpectedPacketAvailable(status.last_mode, status.play_dtmf); |
| } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, |
| status.target_timestamp, |
| five_seconds_samples)) { |
| return FuturePacketAvailable( |
| status.last_packet_samples, status.last_mode, status.target_timestamp, |
| status.next_packet->timestamp, status.play_dtmf, |
| status.generated_noise_samples, status.packet_buffer_info.span_samples, |
| status.packet_buffer_info.num_packets); |
| } else { |
| // This implies that available_timestamp < target_timestamp, which can |
| // happen when a new stream or codec is received. Signal for a reset. |
| return NetEq::Operation::kUndefined; |
| } |
| } |
| |
| void DecisionLogic::ExpandDecision(NetEq::Operation operation) { |
| if (operation == NetEq::Operation::kExpand) { |
| num_consecutive_expands_++; |
| } else { |
| num_consecutive_expands_ = 0; |
| } |
| } |
| |
| absl::optional<int> DecisionLogic::PacketArrived( |
| int fs_hz, |
| bool should_update_stats, |
| const PacketArrivedInfo& info) { |
| buffer_flush_ = buffer_flush_ || info.buffer_flush; |
| if (info.is_cng_or_dtmf) { |
| last_pack_cng_or_dtmf_ = true; |
| return absl::nullopt; |
| } |
| if (!should_update_stats) { |
| return absl::nullopt; |
| } |
| if (info.packet_length_samples > 0 && fs_hz > 0 && |
| info.packet_length_samples != packet_length_samples_) { |
| packet_length_samples_ = info.packet_length_samples; |
| delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); |
| } |
| auto relative_delay = delay_manager_->Update( |
| info.main_timestamp, fs_hz, /*reset=*/last_pack_cng_or_dtmf_); |
| last_pack_cng_or_dtmf_ = false; |
| return relative_delay; |
| } |
| |
| void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { |
| buffer_level_filter_->SetTargetBufferLevel(delay_manager_->TargetDelayMs()); |
| |
| int time_stretched_samples = time_stretched_cn_samples_; |
| if (prev_time_scale_) { |
| time_stretched_samples += sample_memory_; |
| timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); |
| } |
| |
| if (buffer_flush_) { |
| buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples); |
| buffer_flush_ = false; |
| } else { |
| buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples); |
| } |
| prev_time_scale_ = false; |
| time_stretched_cn_samples_ = 0; |
| } |
| |
| NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, |
| uint32_t target_timestamp, |
| uint32_t available_timestamp, |
| size_t generated_noise_samples) { |
| // Signed difference between target and available timestamp. |
| int32_t timestamp_diff = static_cast<int32_t>( |
| static_cast<uint32_t>(generated_noise_samples + target_timestamp) - |
| available_timestamp); |
| int optimal_level_samp = |
| delay_manager_->TargetDelayMs() * sample_rate_ / 1000; |
| const int64_t excess_waiting_time_samp = |
| -static_cast<int64_t>(timestamp_diff) - optimal_level_samp; |
| |
| if (excess_waiting_time_samp > optimal_level_samp / 2) { |
| // The waiting time for this packet will be longer than 1.5 |
| // times the wanted buffer delay. Apply fast-forward to cut the |
| // waiting time down to the optimal. |
| noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ + |
| excess_waiting_time_samp); |
| timestamp_diff = |
| rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp); |
| } |
| |
| if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) { |
| // Not time to play this packet yet. Wait another round before using this |
| // packet. Keep on playing CNG from previous CNG parameters. |
| return NetEq::Operation::kRfc3389CngNoPacket; |
| } else { |
| // Otherwise, go for the CNG packet now. |
| noise_fast_forward_ = 0; |
| return NetEq::Operation::kRfc3389Cng; |
| } |
| } |
| |
| NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) { |
| if (cng_state_ == kCngRfc3389On) { |
| // Keep on playing comfort noise. |
| return NetEq::Operation::kRfc3389CngNoPacket; |
| } else if (cng_state_ == kCngInternalOn) { |
| // Keep on playing codec internal comfort noise. |
| return NetEq::Operation::kCodecInternalCng; |
| } else if (play_dtmf) { |
| return NetEq::Operation::kDtmf; |
| } else { |
| // Nothing to play, do expand. |
| return NetEq::Operation::kExpand; |
| } |
| } |
| |
| NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode, |
| bool play_dtmf) { |
| if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand && |
| !play_dtmf) { |
| const int samples_per_ms = sample_rate_ / 1000; |
| const int target_level_samples = |
| delay_manager_->TargetDelayMs() * samples_per_ms; |
| const int low_limit = |
| std::max(target_level_samples * 3 / 4, |
| target_level_samples - |
| kDecelerationTargetLevelOffsetMs * samples_per_ms); |
| // |higher_limit| is equal to |target_level|, but should at |
| // least be 20 ms higher than |lower_limit|. |
| const int high_limit = |
| std::max(target_level_samples, low_limit + 20 * samples_per_ms); |
| |
| const int buffer_level_samples = |
| buffer_level_filter_->filtered_current_level(); |
| if (buffer_level_samples >= high_limit << 2) |
| return NetEq::Operation::kFastAccelerate; |
| if (TimescaleAllowed()) { |
| if (buffer_level_samples >= high_limit) |
| return NetEq::Operation::kAccelerate; |
| if (buffer_level_samples < low_limit) |
| return NetEq::Operation::kPreemptiveExpand; |
| } |
| } |
| return NetEq::Operation::kNormal; |
| } |
| |
| NetEq::Operation DecisionLogic::FuturePacketAvailable( |
| size_t decoder_frame_length, |
| NetEq::Mode prev_mode, |
| uint32_t target_timestamp, |
| uint32_t available_timestamp, |
| bool play_dtmf, |
| size_t generated_noise_samples, |
| size_t span_samples_in_packet_buffer, |
| size_t num_packets_in_packet_buffer) { |
| // Required packet is not available, but a future packet is. |
| // Check if we should continue with an ongoing expand because the new packet |
| // is too far into the future. |
| uint32_t timestamp_leap = available_timestamp - target_timestamp; |
| if ((prev_mode == NetEq::Mode::kExpand || |
| prev_mode == NetEq::Mode::kCodecPlc) && |
| !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && |
| PacketTooEarly(timestamp_leap) && UnderTargetLevel()) { |
| if (play_dtmf) { |
| // Still have DTMF to play, so do not do expand. |
| return NetEq::Operation::kDtmf; |
| } else { |
| // Nothing to play. |
| return NetEq::Operation::kExpand; |
| } |
| } |
| |
| if (prev_mode == NetEq::Mode::kCodecPlc) { |
| return NetEq::Operation::kNormal; |
| } |
| |
| // If previous was comfort noise, then no merge is needed. |
| if (prev_mode == NetEq::Mode::kRfc3389Cng || |
| prev_mode == NetEq::Mode::kCodecInternalCng) { |
| size_t cur_size_samples = |
| estimate_dtx_delay_ |
| ? span_samples_in_packet_buffer |
| : num_packets_in_packet_buffer * decoder_frame_length; |
| // Target level is in number of packets in Q8. |
| const size_t target_level_samples = |
| delay_manager_->TargetDelayMs() * sample_rate_ / 1000; |
| const bool generated_enough_noise = |
| static_cast<uint32_t>(generated_noise_samples + target_timestamp) >= |
| available_timestamp; |
| |
| if (time_stretch_cn_) { |
| const size_t target_threshold_samples = |
| target_level_window_ms_ / 2 * (sample_rate_ / 1000); |
| const bool above_target_window = |
| cur_size_samples > target_level_samples + target_threshold_samples; |
| const bool below_target_window = |
| target_level_samples > target_threshold_samples && |
| cur_size_samples < target_level_samples - target_threshold_samples; |
| // Keep the delay same as before CNG, but make sure that it is within the |
| // target window. |
| if ((generated_enough_noise && !below_target_window) || |
| above_target_window) { |
| time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples; |
| return NetEq::Operation::kNormal; |
| } |
| } else { |
| // Keep the same delay as before the CNG, but make sure that the number of |
| // samples in buffer is no higher than 4 times the optimal level. |
| if (generated_enough_noise || |
| cur_size_samples > target_level_samples * 4) { |
| // Time to play this new packet. |
| return NetEq::Operation::kNormal; |
| } |
| } |
| |
| // Too early to play this new packet; keep on playing comfort noise. |
| if (prev_mode == NetEq::Mode::kRfc3389Cng) { |
| return NetEq::Operation::kRfc3389CngNoPacket; |
| } |
| // prevPlayMode == kModeCodecInternalCng. |
| return NetEq::Operation::kCodecInternalCng; |
| } |
| |
| // Do not merge unless we have done an expand before. |
| if (prev_mode == NetEq::Mode::kExpand) { |
| return NetEq::Operation::kMerge; |
| } else if (play_dtmf) { |
| // Play DTMF instead of expand. |
| return NetEq::Operation::kDtmf; |
| } else { |
| return NetEq::Operation::kExpand; |
| } |
| } |
| |
| bool DecisionLogic::UnderTargetLevel() const { |
| return buffer_level_filter_->filtered_current_level() < |
| delay_manager_->TargetDelayMs() * sample_rate_ / 1000; |
| } |
| |
| bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { |
| return timestamp_leap >= |
| static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands); |
| } |
| |
| bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { |
| return timestamp_leap > |
| static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_); |
| } |
| |
| bool DecisionLogic::MaxWaitForPacket() const { |
| return num_consecutive_expands_ >= kMaxWaitForPacket; |
| } |
| |
| } // namespace webrtc |