JitterEstimator: add field trial overrides for max frame filter

This change adds a percentile filter that can replace the
"non-linear IIR" filter that is currently used to estimate the
max frame size (in bytes). The percentile filter is enabled through
the field trial, and it has two tuning parameters: the percentile
that is deemed the "max" frame, and the window length over which
the filter is applied.

Bug: webrtc:14151
Change-Id: I002609edb0a74161aaa6f0934892a1bec2ad8230
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/274167
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Rasmus Brandt <brandtr@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38047}
diff --git a/modules/video_coding/timing/BUILD.gn b/modules/video_coding/timing/BUILD.gn
index 33eb81f..985223f 100644
--- a/modules/video_coding/timing/BUILD.gn
+++ b/modules/video_coding/timing/BUILD.gn
@@ -59,6 +59,8 @@
     "../../../api/units:time_delta",
     "../../../api/units:timestamp",
     "../../../rtc_base",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_numerics",
     "../../../rtc_base:safe_conversions",
     "../../../rtc_base/experiments:field_trial_parser",
     "../../../system_wrappers",
diff --git a/modules/video_coding/timing/jitter_estimator.cc b/modules/video_coding/timing/jitter_estimator.cc
index ef5faa2..af3c9ab 100644
--- a/modules/video_coding/timing/jitter_estimator.cc
+++ b/modules/video_coding/timing/jitter_estimator.cc
@@ -23,6 +23,7 @@
 #include "api/units/time_delta.h"
 #include "api/units/timestamp.h"
 #include "modules/video_coding/timing/rtt_filter.h"
+#include "rtc_base/checks.h"
 #include "rtc_base/numerics/safe_conversions.h"
 #include "system_wrappers/include/clock.h"
 
@@ -43,6 +44,9 @@
 constexpr double kPhi = 0.97;
 // Time constant for max frame size filter.
 constexpr double kPsi = 0.9999;
+// Default constants for percentile frame size filter.
+constexpr double kDefaultMaxFrameSizePercentile = 0.95;
+constexpr int kDefaultMaxFrameSizeWindow = 30 * 10;
 
 // Outlier rejection constants.
 constexpr double kDefaultMaxTimestampDeviationInSigmas = 3.5;
@@ -84,6 +88,9 @@
 JitterEstimator::JitterEstimator(Clock* clock,
                                  const FieldTrialsView& field_trials)
     : config_(Config::Parse(field_trials.Lookup(Config::kFieldTrialsKey))),
+      max_frame_size_bytes_percentile_(
+          config_.max_frame_size_percentile.value_or(
+              kDefaultMaxFrameSizePercentile)),
       fps_counter_(30),  // TODO(sprang): Use an estimator with limit based
                          // on time, rather than number of samples.
       clock_(clock) {
@@ -97,6 +104,8 @@
   avg_frame_size_bytes_ = kInitialAvgAndMaxFrameSizeBytes;
   max_frame_size_bytes_ = kInitialAvgAndMaxFrameSizeBytes;
   var_frame_size_bytes2_ = 100;
+  max_frame_size_bytes_percentile_.Reset();
+  frame_sizes_in_percentile_filter_ = std::queue<int64_t>();
   last_update_time_ = absl::nullopt;
   prev_estimate_ = absl::nullopt;
   prev_frame_size_ = absl::nullopt;
@@ -147,10 +156,23 @@
       kPhi * var_frame_size_bytes2_ + (1 - kPhi) * (delta_bytes * delta_bytes),
       1.0);
 
-  // Update max_frame_size_bytes_ estimate.
+  // Update non-linear IIR estimate of max frame size.
   max_frame_size_bytes_ =
       std::max<double>(kPsi * max_frame_size_bytes_, frame_size.bytes());
 
+  // Maybe update percentile estimate of max frame size.
+  if (config_.MaxFrameSizePercentileEnabled()) {
+    frame_sizes_in_percentile_filter_.push(frame_size.bytes());
+    if (frame_sizes_in_percentile_filter_.size() >
+        static_cast<size_t>(config_.max_frame_size_window.value_or(
+            kDefaultMaxFrameSizeWindow))) {
+      max_frame_size_bytes_percentile_.Erase(
+          frame_sizes_in_percentile_filter_.front());
+      frame_sizes_in_percentile_filter_.pop();
+    }
+    max_frame_size_bytes_percentile_.Insert(frame_size.bytes());
+  }
+
   if (!prev_frame_size_) {
     prev_frame_size_ = frame_size;
     return;
@@ -190,11 +212,12 @@
     // delayed. The next frame is of normal size (delta frame), and thus deltaFS
     // will be << 0. This removes all frame samples which arrives after a key
     // frame.
+    double max_frame_size_bytes = GetMaxFrameSizeEstimateBytes();
     if (delta_frame_bytes >
-        GetCongestionRejectionFactor() * max_frame_size_bytes_) {
+        GetCongestionRejectionFactor() * max_frame_size_bytes) {
       // Update the Kalman filter with the new data
       kalman_filter_.PredictAndUpdate(frame_delay.ms(), delta_frame_bytes,
-                                      max_frame_size_bytes_, var_noise_ms2_);
+                                      max_frame_size_bytes, var_noise_ms2_);
     }
   } else {
     double num_stddev = (delay_deviation_ms >= 0) ? num_stddev_delay_outlier
@@ -225,6 +248,17 @@
   return config_;
 }
 
+double JitterEstimator::GetMaxFrameSizeEstimateBytes() const {
+  if (config_.MaxFrameSizePercentileEnabled()) {
+    RTC_DCHECK_GT(frame_sizes_in_percentile_filter_.size(), 1u);
+    RTC_DCHECK_LE(
+        frame_sizes_in_percentile_filter_.size(),
+        config_.max_frame_size_window.value_or(kDefaultMaxFrameSizeWindow));
+    return max_frame_size_bytes_percentile_.GetPercentileValue();
+  }
+  return max_frame_size_bytes_;
+}
+
 double JitterEstimator::GetNumStddevDelayOutlier() const {
   return config_.num_stddev_delay_outlier.value_or(kNumStdDevDelayOutlier);
 }
@@ -298,8 +332,10 @@
 
 // Calculates the current jitter estimate from the filtered estimates.
 TimeDelta JitterEstimator::CalculateEstimate() {
+  double worst_case_frame_size_deviation_bytes =
+      GetMaxFrameSizeEstimateBytes() - avg_frame_size_bytes_;
   double ret_ms = kalman_filter_.GetFrameDelayVariationEstimateSizeBased(
-                      max_frame_size_bytes_ - avg_frame_size_bytes_) +
+                      worst_case_frame_size_deviation_bytes) +
                   NoiseThreshold();
   TimeDelta ret = TimeDelta::Millis(ret_ms);
 
diff --git a/modules/video_coding/timing/jitter_estimator.h b/modules/video_coding/timing/jitter_estimator.h
index 6ae4729..ae6b155 100644
--- a/modules/video_coding/timing/jitter_estimator.h
+++ b/modules/video_coding/timing/jitter_estimator.h
@@ -12,6 +12,7 @@
 #define MODULES_VIDEO_CODING_TIMING_JITTER_ESTIMATOR_H_
 
 #include <memory>
+#include <queue>
 
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
@@ -23,6 +24,7 @@
 #include "modules/video_coding/timing/frame_delay_variation_kalman_filter.h"
 #include "modules/video_coding/timing/rtt_filter.h"
 #include "rtc_base/experiments/struct_parameters_parser.h"
+#include "rtc_base/numerics/percentile_filter.h"
 #include "rtc_base/rolling_accumulator.h"
 
 namespace webrtc {
@@ -44,11 +46,24 @@
 
     std::unique_ptr<StructParametersParser> Parser() {
       return StructParametersParser::Create(
+          "max_frame_size_percentile", &max_frame_size_percentile,
+          "max_frame_size_window", &max_frame_size_window,
           "num_stddev_delay_outlier", &num_stddev_delay_outlier,
           "num_stddev_size_outlier", &num_stddev_size_outlier,
           "congestion_rejection_factor", &congestion_rejection_factor);
     }
 
+    bool MaxFrameSizePercentileEnabled() const {
+      return max_frame_size_percentile.has_value();
+    }
+
+    // If set, the "max" frame size is calculated as this percentile over a
+    // window of recent frame sizes.
+    absl::optional<double> max_frame_size_percentile;
+
+    // The length of the percentile filter's window, in number of frames.
+    absl::optional<int> max_frame_size_window;
+
     // A (relative) frame delay variation sample is an outlier if its absolute
     // deviation from the Kalman filter model falls outside this number of
     // sample standard deviations.
@@ -110,6 +125,7 @@
 
  private:
   // These functions return values that could be overriden through the config.
+  double GetMaxFrameSizeEstimateBytes() const;
   double GetNumStddevDelayOutlier() const;
   double GetNumStddevSizeOutlier() const;
   double GetCongestionRejectionFactor() const;
@@ -145,10 +161,14 @@
   // when api/units have sufficient precision.
   double avg_frame_size_bytes_;  // Average frame size
   double var_frame_size_bytes2_;  // Frame size variance. Unit is bytes^2.
-  // Largest frame size received (descending with a factor kPsi)
+  // Largest frame size received (descending with a factor kPsi).
+  // Used by default.
   // TODO(bugs.webrtc.org/14381): Update `max_frame_size_bytes_` to DataSize
   // when api/units have sufficient precision.
   double max_frame_size_bytes_;
+  // Percentile frame sized received (over a window). Only used if configured.
+  PercentileFilter<int64_t> max_frame_size_bytes_percentile_;
+  std::queue<int64_t> frame_sizes_in_percentile_filter_;
   // TODO(bugs.webrtc.org/14381): Update `startup_frame_size_sum_bytes_` to
   // DataSize when api/units have sufficient precision.
   double startup_frame_size_sum_bytes_;
diff --git a/modules/video_coding/timing/jitter_estimator_unittest.cc b/modules/video_coding/timing/jitter_estimator_unittest.cc
index 612e7d7..4da6bf4 100644
--- a/modules/video_coding/timing/jitter_estimator_unittest.cc
+++ b/modules/video_coding/timing/jitter_estimator_unittest.cc
@@ -145,8 +145,27 @@
             *jitter_by_rtt_mult_cap[0].second.GetPercentile(1.0) * 1.25);
 }
 
+// By default, the `JitterEstimator` is not robust against single large frames.
+TEST_F(JitterEstimatorTest, Single2xFrameSizeImpactsJitterEstimate) {
+  ValueGenerator gen(10);
+
+  // Steady state.
+  Run(/*duration_s=*/60, /*framerate_fps=*/30, gen);
+  TimeDelta steady_state_jitter =
+      estimator_.GetJitterEstimate(0, absl::nullopt);
+
+  // A single outlier frame size...
+  estimator_.UpdateEstimate(gen.Delay(), 2 * gen.FrameSize());
+  TimeDelta outlier_jitter = estimator_.GetJitterEstimate(0, absl::nullopt);
+
+  // ...impacts the estimate.
+  EXPECT_GT(outlier_jitter.ms(), steady_state_jitter.ms());
+}
+
 TEST_F(JitterEstimatorTest, EmptyFieldTrialsParsesToUnsetConfig) {
   JitterEstimator::Config config = estimator_.GetConfigForTest();
+  EXPECT_FALSE(config.max_frame_size_percentile.has_value());
+  EXPECT_FALSE(config.max_frame_size_window.has_value());
   EXPECT_FALSE(config.num_stddev_delay_outlier.has_value());
   EXPECT_FALSE(config.num_stddev_size_outlier.has_value());
   EXPECT_FALSE(config.congestion_rejection_factor.has_value());
@@ -157,6 +176,8 @@
   FieldTrialsOverriddenJitterEstimatorTest()
       : JitterEstimatorTest(
             "WebRTC-JitterEstimatorConfig/"
+            "max_frame_size_percentile:0.9,"
+            "max_frame_size_window:30,"
             "num_stddev_delay_outlier:2,"
             "num_stddev_size_outlier:3.1,"
             "congestion_rejection_factor:-1.55/") {}
@@ -165,6 +186,8 @@
 
 TEST_F(FieldTrialsOverriddenJitterEstimatorTest, FieldTrialsParsesCorrectly) {
   JitterEstimator::Config config = estimator_.GetConfigForTest();
+  EXPECT_EQ(*config.max_frame_size_percentile, 0.9);
+  EXPECT_EQ(*config.max_frame_size_window, 30);
   EXPECT_EQ(*config.num_stddev_delay_outlier, 2.0);
   EXPECT_EQ(*config.num_stddev_size_outlier, 3.1);
   EXPECT_EQ(*config.congestion_rejection_factor, -1.55);
@@ -187,5 +210,29 @@
   EXPECT_EQ(outlier_jitter.ms(), steady_state_jitter.ms());
 }
 
+// The field trial is configured to be robust against the `(1 - 0.9) = 10%`
+// largest frames over a window of length `30`.
+TEST_F(FieldTrialsOverriddenJitterEstimatorTest,
+       Four2xFrameSizesImpactJitterEstimate) {
+  ValueGenerator gen(10);
+
+  // Steady state.
+  Run(/*duration_s=*/60, /*framerate_fps=*/30, gen);
+  TimeDelta steady_state_jitter =
+      estimator_.GetJitterEstimate(0, absl::nullopt);
+
+  // Three outlier frames do not impact the jitter estimate.
+  for (int i = 0; i < 3; ++i) {
+    estimator_.UpdateEstimate(gen.Delay(), 2 * gen.FrameSize());
+  }
+  TimeDelta outlier_jitter_3x = estimator_.GetJitterEstimate(0, absl::nullopt);
+  EXPECT_EQ(outlier_jitter_3x.ms(), steady_state_jitter.ms());
+
+  // Four outlier frames do impact the jitter estimate.
+  estimator_.UpdateEstimate(gen.Delay(), 2 * gen.FrameSize());
+  TimeDelta outlier_jitter_4x = estimator_.GetJitterEstimate(0, absl::nullopt);
+  EXPECT_GT(outlier_jitter_4x.ms(), outlier_jitter_3x.ms());
+}
+
 }  // namespace
 }  // namespace webrtc