JitterEstimator: add field trial overrides for avg frame filter

This change adds a median filter that can replace the
IIR filter that is currently used to estimate the
avg frame size (in bytes). It is enabled through a boolean,
and reuses the window length from the max percentile filter.

The median filter is only used by the delay calculation in
`CalculateEstimate()`. It does not replaced the use of the
IIR estimate in the size outlier rejection heuristic.

Bug: webrtc:14151
Change-Id: I519b6b57a8bee3c41a300ed2e92a1981c61cca15
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/275121
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Rasmus Brandt <brandtr@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38077}
diff --git a/modules/video_coding/timing/jitter_estimator.cc b/modules/video_coding/timing/jitter_estimator.cc
index af3c9ab..f5d6fdf 100644
--- a/modules/video_coding/timing/jitter_estimator.cc
+++ b/modules/video_coding/timing/jitter_estimator.cc
@@ -46,7 +46,7 @@
 constexpr double kPsi = 0.9999;
 // Default constants for percentile frame size filter.
 constexpr double kDefaultMaxFrameSizePercentile = 0.95;
-constexpr int kDefaultMaxFrameSizeWindow = 30 * 10;
+constexpr int kDefaultFrameSizeWindow = 30 * 10;
 
 // Outlier rejection constants.
 constexpr double kDefaultMaxTimestampDeviationInSigmas = 3.5;
@@ -88,6 +88,8 @@
 JitterEstimator::JitterEstimator(Clock* clock,
                                  const FieldTrialsView& field_trials)
     : config_(Config::Parse(field_trials.Lookup(Config::kFieldTrialsKey))),
+      avg_frame_size_median_bytes_(static_cast<size_t>(
+          config_.frame_size_window.value_or(kDefaultFrameSizeWindow))),
       max_frame_size_bytes_percentile_(
           config_.max_frame_size_percentile.value_or(
               kDefaultMaxFrameSizePercentile)),
@@ -104,6 +106,7 @@
   avg_frame_size_bytes_ = kInitialAvgAndMaxFrameSizeBytes;
   max_frame_size_bytes_ = kInitialAvgAndMaxFrameSizeBytes;
   var_frame_size_bytes2_ = 100;
+  avg_frame_size_median_bytes_.Reset();
   max_frame_size_bytes_percentile_.Reset();
   frame_sizes_in_percentile_filter_ = std::queue<int64_t>();
   last_update_time_ = absl::nullopt;
@@ -160,12 +163,15 @@
   max_frame_size_bytes_ =
       std::max<double>(kPsi * max_frame_size_bytes_, frame_size.bytes());
 
-  // Maybe update percentile estimate of max frame size.
+  // Maybe update percentile estimates of frame sizes.
+  if (config_.avg_frame_size_median) {
+    avg_frame_size_median_bytes_.Insert(frame_size.bytes());
+  }
   if (config_.MaxFrameSizePercentileEnabled()) {
     frame_sizes_in_percentile_filter_.push(frame_size.bytes());
     if (frame_sizes_in_percentile_filter_.size() >
-        static_cast<size_t>(config_.max_frame_size_window.value_or(
-            kDefaultMaxFrameSizeWindow))) {
+        static_cast<size_t>(
+            config_.frame_size_window.value_or(kDefaultFrameSizeWindow))) {
       max_frame_size_bytes_percentile_.Erase(
           frame_sizes_in_percentile_filter_.front());
       frame_sizes_in_percentile_filter_.pop();
@@ -188,11 +194,25 @@
       frame_delay.ms() -
       kalman_filter_.GetFrameDelayVariationEstimateTotal(delta_frame_bytes);
 
-  // Outlier rejection.
+  // Outlier rejection: these conditions depend on filtered versions of the
+  // delay and frame size _means_, respectively, together with a configurable
+  // number of standard deviations. If a sample is large with respect to the
+  // corresponding mean and dispersion (defined by the number of
+  // standard deviations and the sample standard deviation), it is deemed an
+  // outlier. This "empirical rule" is further described in
+  // https://en.wikipedia.org/wiki/68-95-99.7_rule. Note that neither of the
+  // estimated means are true sample means, which implies that they are possibly
+  // not normally distributed. Hence, this rejection method is just a heuristic.
   double num_stddev_delay_outlier = GetNumStddevDelayOutlier();
+  // Delay outlier rejection is two-sided.
   bool abs_delay_is_not_outlier =
       fabs(delay_deviation_ms) <
       num_stddev_delay_outlier * sqrt(var_noise_ms2_);
+  // The reasoning above means, in particular, that we should use the sample
+  // mean-style `avg_frame_size_bytes_` estimate, as opposed to the
+  // median-filtered version, even if configured to use latter for the
+  // calculation in `CalculateEstimate()`.
+  // Size outlier rejection is one-sided.
   bool size_is_positive_outlier =
       frame_size.bytes() >
       avg_frame_size_bytes_ +
@@ -251,9 +271,8 @@
 double JitterEstimator::GetMaxFrameSizeEstimateBytes() const {
   if (config_.MaxFrameSizePercentileEnabled()) {
     RTC_DCHECK_GT(frame_sizes_in_percentile_filter_.size(), 1u);
-    RTC_DCHECK_LE(
-        frame_sizes_in_percentile_filter_.size(),
-        config_.max_frame_size_window.value_or(kDefaultMaxFrameSizeWindow));
+    RTC_DCHECK_LE(frame_sizes_in_percentile_filter_.size(),
+                  config_.frame_size_window.value_or(kDefaultFrameSizeWindow));
     return max_frame_size_bytes_percentile_.GetPercentileValue();
   }
   return max_frame_size_bytes_;
@@ -332,8 +351,14 @@
 
 // Calculates the current jitter estimate from the filtered estimates.
 TimeDelta JitterEstimator::CalculateEstimate() {
+  // Using median- and percentile-filtered versions of the frame sizes may be
+  // more robust than using sample mean-style estimates.
+  double filtered_avg_frame_size_bytes =
+      config_.avg_frame_size_median
+          ? avg_frame_size_median_bytes_.GetFilteredValue()
+          : avg_frame_size_bytes_;
   double worst_case_frame_size_deviation_bytes =
-      GetMaxFrameSizeEstimateBytes() - avg_frame_size_bytes_;
+      GetMaxFrameSizeEstimateBytes() - filtered_avg_frame_size_bytes;
   double ret_ms = kalman_filter_.GetFrameDelayVariationEstimateSizeBased(
                       worst_case_frame_size_deviation_bytes) +
                   NoiseThreshold();
diff --git a/modules/video_coding/timing/jitter_estimator.h b/modules/video_coding/timing/jitter_estimator.h
index ae6b155..bbb148d 100644
--- a/modules/video_coding/timing/jitter_estimator.h
+++ b/modules/video_coding/timing/jitter_estimator.h
@@ -24,6 +24,7 @@
 #include "modules/video_coding/timing/frame_delay_variation_kalman_filter.h"
 #include "modules/video_coding/timing/rtt_filter.h"
 #include "rtc_base/experiments/struct_parameters_parser.h"
+#include "rtc_base/numerics/moving_median_filter.h"
 #include "rtc_base/numerics/percentile_filter.h"
 #include "rtc_base/rolling_accumulator.h"
 
@@ -45,45 +46,52 @@
     }
 
     std::unique_ptr<StructParametersParser> Parser() {
+      // clang-format off
       return StructParametersParser::Create(
+          "avg_frame_size_median", &avg_frame_size_median,
           "max_frame_size_percentile", &max_frame_size_percentile,
-          "max_frame_size_window", &max_frame_size_window,
+          "frame_size_window", &frame_size_window,
           "num_stddev_delay_outlier", &num_stddev_delay_outlier,
           "num_stddev_size_outlier", &num_stddev_size_outlier,
           "congestion_rejection_factor", &congestion_rejection_factor);
+      // clang-format on
     }
 
     bool MaxFrameSizePercentileEnabled() const {
       return max_frame_size_percentile.has_value();
     }
 
+    // If set, the "avg" frame size is calculated as the median over a window
+    // of recent frame sizes.
+    bool avg_frame_size_median = false;
+
     // If set, the "max" frame size is calculated as this percentile over a
     // window of recent frame sizes.
-    absl::optional<double> max_frame_size_percentile;
+    absl::optional<double> max_frame_size_percentile = absl::nullopt;
 
-    // The length of the percentile filter's window, in number of frames.
-    absl::optional<int> max_frame_size_window;
+    // The length of the percentile filters' window, in number of frames.
+    absl::optional<int> frame_size_window = absl::nullopt;
 
     // A (relative) frame delay variation sample is an outlier if its absolute
     // deviation from the Kalman filter model falls outside this number of
     // sample standard deviations.
     //
     // Increasing this value rejects fewer samples.
-    absl::optional<double> num_stddev_delay_outlier;
+    absl::optional<double> num_stddev_delay_outlier = absl::nullopt;
 
     // An (absolute) frame size sample is an outlier if its positive deviation
     // from the estimated average frame size falls outside this number of sample
     // standard deviations.
     //
     // Increasing this value rejects fewer samples.
-    absl::optional<double> num_stddev_size_outlier;
+    absl::optional<double> num_stddev_size_outlier = absl::nullopt;
 
     // A (relative) frame size variation sample is deemed "congested", and is
     // thus rejected, if its value is less than this factor times the estimated
     // max frame size.
     //
     // Decreasing this value rejects fewer samples.
-    absl::optional<double> congestion_rejection_factor;
+    absl::optional<double> congestion_rejection_factor = absl::nullopt;
   };
 
   JitterEstimator(Clock* clock, const FieldTrialsView& field_trials);
@@ -167,6 +175,9 @@
   // when api/units have sufficient precision.
   double max_frame_size_bytes_;
   // Percentile frame sized received (over a window). Only used if configured.
+  MovingMedianFilter<int64_t> avg_frame_size_median_bytes_;
+  // TODO(webrtc:14151): Make `MovingMedianFilter` take a percentile value and
+  // switch `max_frame_size_bytes_percentile_` over to that class.
   PercentileFilter<int64_t> max_frame_size_bytes_percentile_;
   std::queue<int64_t> frame_sizes_in_percentile_filter_;
   // TODO(bugs.webrtc.org/14381): Update `startup_frame_size_sum_bytes_` to
diff --git a/modules/video_coding/timing/jitter_estimator_unittest.cc b/modules/video_coding/timing/jitter_estimator_unittest.cc
index 4da6bf4..f53208c 100644
--- a/modules/video_coding/timing/jitter_estimator_unittest.cc
+++ b/modules/video_coding/timing/jitter_estimator_unittest.cc
@@ -164,8 +164,9 @@
 
 TEST_F(JitterEstimatorTest, EmptyFieldTrialsParsesToUnsetConfig) {
   JitterEstimator::Config config = estimator_.GetConfigForTest();
+  EXPECT_FALSE(config.avg_frame_size_median);
   EXPECT_FALSE(config.max_frame_size_percentile.has_value());
-  EXPECT_FALSE(config.max_frame_size_window.has_value());
+  EXPECT_FALSE(config.frame_size_window.has_value());
   EXPECT_FALSE(config.num_stddev_delay_outlier.has_value());
   EXPECT_FALSE(config.num_stddev_size_outlier.has_value());
   EXPECT_FALSE(config.congestion_rejection_factor.has_value());
@@ -176,8 +177,9 @@
   FieldTrialsOverriddenJitterEstimatorTest()
       : JitterEstimatorTest(
             "WebRTC-JitterEstimatorConfig/"
+            "avg_frame_size_median:true,"
             "max_frame_size_percentile:0.9,"
-            "max_frame_size_window:30,"
+            "frame_size_window:30,"
             "num_stddev_delay_outlier:2,"
             "num_stddev_size_outlier:3.1,"
             "congestion_rejection_factor:-1.55/") {}
@@ -186,8 +188,9 @@
 
 TEST_F(FieldTrialsOverriddenJitterEstimatorTest, FieldTrialsParsesCorrectly) {
   JitterEstimator::Config config = estimator_.GetConfigForTest();
+  EXPECT_TRUE(config.avg_frame_size_median);
   EXPECT_EQ(*config.max_frame_size_percentile, 0.9);
-  EXPECT_EQ(*config.max_frame_size_window, 30);
+  EXPECT_EQ(*config.frame_size_window, 30);
   EXPECT_EQ(*config.num_stddev_delay_outlier, 2.0);
   EXPECT_EQ(*config.num_stddev_size_outlier, 3.1);
   EXPECT_EQ(*config.congestion_rejection_factor, -1.55);