Divide SpeechLevelEstimator in interface and implementation

Bug: webrtc:42232605
Change-Id: I2112dccdadd163e62fa55614c2c23347a6fcd6d6
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/422061
Reviewed-by: Lionel Koenig <lionelk@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#46114}
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 83f8889..5bcc7cd 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -12,6 +12,8 @@
   sources = [
     "speech_level_estimator.cc",
     "speech_level_estimator.h",
+    "speech_level_estimator_impl.cc",
+    "speech_level_estimator_impl.h",
   ]
 
   visibility = [
@@ -214,8 +216,6 @@
     "../../../api:array_view",
     "../../../api/audio:audio_processing",
     "../../../rtc_base:checks",
-    "../../../rtc_base:checks",
-    "../../../rtc_base:gtest_prod",
     "../../../rtc_base:gtest_prod",
     "../../../rtc_base:logging",
     "../../../rtc_base:safe_minmax",
diff --git a/modules/audio_processing/agc2/speech_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc
index 702f1fa..1e3cc02 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator.cc
@@ -10,162 +10,19 @@
 
 #include "modules/audio_processing/agc2/speech_level_estimator.h"
 
+#include <memory>
+
 #include "api/audio/audio_processing.h"
-#include "modules/audio_processing/agc2/agc2_common.h"
-#include "modules/audio_processing/logging/apm_data_dumper.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/numerics/safe_minmax.h"
+#include "modules/audio_processing/agc2/speech_level_estimator_impl.h"
 
 namespace webrtc {
-namespace {
 
-float ClampLevelEstimateDbfs(float level_estimate_dbfs) {
-  return SafeClamp<float>(level_estimate_dbfs, -90.0f, 30.0f);
-}
-
-// Returns the initial speech level estimate needed to apply the initial gain.
-float GetInitialSpeechLevelEstimateDbfs(
-    const AudioProcessing::Config::GainController2::AdaptiveDigital& config) {
-  return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb -
-                                config.initial_gain_db - config.headroom_db);
-}
-
-}  // namespace
-
-bool SpeechLevelEstimator::LevelEstimatorState::operator==(
-    const SpeechLevelEstimator::LevelEstimatorState& b) const {
-  return time_to_confidence_ms == b.time_to_confidence_ms &&
-         level_dbfs.numerator == b.level_dbfs.numerator &&
-         level_dbfs.denominator == b.level_dbfs.denominator;
-}
-
-float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
-  RTC_DCHECK_NE(denominator, 0.f);
-  return numerator / denominator;
-}
-
-SpeechLevelEstimator::SpeechLevelEstimator(
+std::unique_ptr<SpeechLevelEstimator> SpeechLevelEstimator::Create(
     ApmDataDumper* apm_data_dumper,
     const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
-    int adjacent_speech_frames_threshold)
-    : apm_data_dumper_(apm_data_dumper),
-      initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
-      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
-      level_dbfs_(initial_speech_level_dbfs_),
-      // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
-      // controller temporal dependency removed.
-      is_confident_(false) {
-  RTC_DCHECK(apm_data_dumper_);
-  RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
-  Reset();
-}
-
-void SpeechLevelEstimator::Update(float rms_dbfs,
-                                  float speech_probability) {
-  RTC_DCHECK_GT(rms_dbfs, -150.0f);
-  RTC_DCHECK_LT(rms_dbfs, 50.0f);
-  RTC_DCHECK_GE(speech_probability, 0.0f);
-  RTC_DCHECK_LE(speech_probability, 1.0f);
-  if (speech_probability < kVadConfidenceThreshold) {
-    // Not a speech frame.
-    if (adjacent_speech_frames_threshold_ > 1) {
-      // When two or more adjacent speech frames are required in order to update
-      // the state, we need to decide whether to discard or confirm the updates
-      // based on the speech sequence length.
-      if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
-        // First non-speech frame after a long enough sequence of speech frames.
-        // Update the reliable state.
-        reliable_state_ = preliminary_state_;
-      } else if (num_adjacent_speech_frames_ > 0) {
-        // First non-speech frame after a too short sequence of speech frames.
-        // Reset to the last reliable state.
-        preliminary_state_ = reliable_state_;
-      }
-    }
-    num_adjacent_speech_frames_ = 0;
-  } else {
-    // Speech frame observed.
-    num_adjacent_speech_frames_++;
-
-    // Update preliminary level estimate.
-    RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0);
-    const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0;
-    if (!buffer_is_full) {
-      preliminary_state_.time_to_confidence_ms -= kFrameDurationMs;
-    }
-    // Weighted average of levels with speech probability as weight.
-    RTC_DCHECK_GT(speech_probability, 0.0f);
-    const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.0f;
-    preliminary_state_.level_dbfs.numerator =
-        preliminary_state_.level_dbfs.numerator * leak_factor +
-        rms_dbfs * speech_probability;
-    preliminary_state_.level_dbfs.denominator =
-        preliminary_state_.level_dbfs.denominator * leak_factor +
-        speech_probability;
-
-    const float level_dbfs = preliminary_state_.level_dbfs.GetRatio();
-
-    if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
-      // `preliminary_state_` is now reliable. Update the last level estimation.
-      level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs);
-    }
-  }
-  UpdateIsConfident();
-  DumpDebugData();
-}
-
-void SpeechLevelEstimator::UpdateIsConfident() {
-  if (adjacent_speech_frames_threshold_ == 1) {
-    // Ignore `reliable_state_` when a single frame is enough to update the
-    // level estimate (because it is not used).
-    is_confident_ = preliminary_state_.time_to_confidence_ms == 0;
-    return;
-  }
-  // Once confident, it remains confident.
-  RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 ||
-             preliminary_state_.time_to_confidence_ms == 0);
-  // During the first long enough speech sequence, `reliable_state_` must be
-  // ignored since `preliminary_state_` is used.
-  is_confident_ =
-      reliable_state_.time_to_confidence_ms == 0 ||
-      (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ &&
-       preliminary_state_.time_to_confidence_ms == 0);
-}
-
-void SpeechLevelEstimator::Reset() {
-  ResetLevelEstimatorState(preliminary_state_);
-  ResetLevelEstimatorState(reliable_state_);
-  level_dbfs_ = initial_speech_level_dbfs_;
-  num_adjacent_speech_frames_ = 0;
-}
-
-void SpeechLevelEstimator::ResetLevelEstimatorState(
-    LevelEstimatorState& state) const {
-  state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
-  state.level_dbfs.numerator = initial_speech_level_dbfs_;
-  state.level_dbfs.denominator = 1.0f;
-}
-
-void SpeechLevelEstimator::DumpDebugData() const {
-  if (!apm_data_dumper_)
-    return;
-  apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_);
-  apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_);
-  apm_data_dumper_->DumpRaw(
-      "agc2_adaptive_level_estimator_num_adjacent_speech_frames",
-      num_adjacent_speech_frames_);
-  apm_data_dumper_->DumpRaw(
-      "agc2_adaptive_level_estimator_preliminary_level_estimate_num",
-      preliminary_state_.level_dbfs.numerator);
-  apm_data_dumper_->DumpRaw(
-      "agc2_adaptive_level_estimator_preliminary_level_estimate_den",
-      preliminary_state_.level_dbfs.denominator);
-  apm_data_dumper_->DumpRaw(
-      "agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms",
-      preliminary_state_.time_to_confidence_ms);
-  apm_data_dumper_->DumpRaw(
-      "agc2_adaptive_level_estimator_reliable_time_to_confidence_ms",
-      reliable_state_.time_to_confidence_ms);
+    int adjacent_speech_frames_threshold) {
+  return std::make_unique<SpeechLevelEstimatorImpl>(
+      apm_data_dumper, config, adjacent_speech_frames_threshold);
 }
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/speech_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h
index 514b21c..50c7c1e 100644
--- a/modules/audio_processing/agc2/speech_level_estimator.h
+++ b/modules/audio_processing/agc2/speech_level_estimator.h
@@ -11,7 +11,7 @@
 #ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
 #define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
 
-#include <type_traits>
+#include <memory>
 
 #include "api/audio/audio_processing.h"
 
@@ -19,58 +19,23 @@
 class ApmDataDumper;
 
 // Active speech level estimator based on the analysis of the following
-// framewise properties: RMS level (dBFS), peak level (dBFS), speech
-// probability.
+// framewise properties: RMS level (dBFS), speech probability.
 class SpeechLevelEstimator {
  public:
-  SpeechLevelEstimator(
+  virtual ~SpeechLevelEstimator() {}
+  // Updates the level estimation.
+  virtual void Update(float rms_dbfs, float speech_probability) = 0;
+  // Returns the estimated speech plus noise level.
+  virtual float GetLevelDbfs() const = 0;
+  // Returns true if the estimator is confident on its current estimate.
+  virtual bool IsConfident() const = 0;
+
+  virtual void Reset() = 0;
+
+  static std::unique_ptr<SpeechLevelEstimator> Create(
       ApmDataDumper* apm_data_dumper,
       const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
       int adjacent_speech_frames_threshold);
-  SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
-  SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;
-
-  // Updates the level estimation.
-  void Update(float rms_dbfs, float speech_probability);
-  // Returns the estimated speech plus noise level.
-  float level_dbfs() const { return level_dbfs_; }
-  // Returns true if the estimator is confident on its current estimate.
-  bool is_confident() const { return is_confident_; }
-
-  void Reset();
-
- private:
-  // Part of the level estimator state used for check-pointing and restore ops.
-  struct LevelEstimatorState {
-    bool operator==(const LevelEstimatorState& s) const;
-    inline bool operator!=(const LevelEstimatorState& s) const {
-      return !(*this == s);
-    }
-    // TODO(bugs.webrtc.org/7494): Remove `time_to_confidence_ms` if redundant.
-    int time_to_confidence_ms;
-    struct Ratio {
-      float numerator;
-      float denominator;
-      float GetRatio() const;
-    } level_dbfs;
-  };
-  static_assert(std::is_trivially_copyable<LevelEstimatorState>::value, "");
-
-  void UpdateIsConfident();
-
-  void ResetLevelEstimatorState(LevelEstimatorState& state) const;
-
-  void DumpDebugData() const;
-
-  ApmDataDumper* const apm_data_dumper_;
-
-  const float initial_speech_level_dbfs_;
-  const int adjacent_speech_frames_threshold_;
-  LevelEstimatorState preliminary_state_;
-  LevelEstimatorState reliable_state_;
-  float level_dbfs_;
-  bool is_confident_;
-  int num_adjacent_speech_frames_;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/speech_level_estimator_impl.cc b/modules/audio_processing/agc2/speech_level_estimator_impl.cc
new file mode 100644
index 0000000..a802111
--- /dev/null
+++ b/modules/audio_processing/agc2/speech_level_estimator_impl.cc
@@ -0,0 +1,164 @@
+/*
+ *  Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/speech_level_estimator_impl.h"
+
+#include "api/audio/audio_processing.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+namespace {
+
+float ClampLevelEstimateDbfs(float level_estimate_dbfs) {
+  return SafeClamp<float>(level_estimate_dbfs, -90.0f, 30.0f);
+}
+
+// Returns the initial speech level estimate needed to apply the initial gain.
+float GetInitialSpeechLevelEstimateDbfs(
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config) {
+  return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb -
+                                config.initial_gain_db - config.headroom_db);
+}
+
+}  // namespace
+
+float SpeechLevelEstimatorImpl::LevelEstimatorState::Ratio::GetRatio() const {
+  RTC_DCHECK_NE(denominator, 0.f);
+  return numerator / denominator;
+}
+
+SpeechLevelEstimatorImpl::SpeechLevelEstimatorImpl(
+    ApmDataDumper* apm_data_dumper,
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+    int adjacent_speech_frames_threshold)
+    : apm_data_dumper_(apm_data_dumper),
+      initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
+      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
+      level_dbfs_(initial_speech_level_dbfs_),
+      // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
+      // controller temporal dependency removed.
+      is_confident_(false) {
+  RTC_DCHECK(apm_data_dumper_);
+  RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
+  Reset();
+}
+
+void SpeechLevelEstimatorImpl::Update(float rms_dbfs,
+                                      float speech_probability) {
+  RTC_DCHECK_GT(rms_dbfs, -150.0f);
+  RTC_DCHECK_LT(rms_dbfs, 50.0f);
+  RTC_DCHECK_GE(speech_probability, 0.0f);
+  RTC_DCHECK_LE(speech_probability, 1.0f);
+  if (speech_probability < kVadConfidenceThreshold) {
+    // Not a speech frame.
+    if (adjacent_speech_frames_threshold_ > 1) {
+      // When two or more adjacent speech frames are required in order to update
+      // the state, we need to decide whether to discard or confirm the updates
+      // based on the speech sequence length.
+      if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
+        // First non-speech frame after a long enough sequence of speech frames.
+        // Update the reliable state.
+        reliable_state_ = preliminary_state_;
+      } else if (num_adjacent_speech_frames_ > 0) {
+        // First non-speech frame after a too short sequence of speech frames.
+        // Reset to the last reliable state.
+        preliminary_state_ = reliable_state_;
+      }
+    }
+    num_adjacent_speech_frames_ = 0;
+  } else {
+    // Speech frame observed.
+    num_adjacent_speech_frames_++;
+
+    // Update preliminary level estimate.
+    RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0);
+    const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0;
+    if (!buffer_is_full) {
+      preliminary_state_.time_to_confidence_ms -= kFrameDurationMs;
+    }
+    // Weighted average of levels with speech probability as weight.
+    RTC_DCHECK_GT(speech_probability, 0.0f);
+    const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.0f;
+    preliminary_state_.level_dbfs.numerator =
+        preliminary_state_.level_dbfs.numerator * leak_factor +
+        rms_dbfs * speech_probability;
+    preliminary_state_.level_dbfs.denominator =
+        preliminary_state_.level_dbfs.denominator * leak_factor +
+        speech_probability;
+
+    const float level_dbfs = preliminary_state_.level_dbfs.GetRatio();
+
+    if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
+      // `preliminary_state_` is now reliable. Update the last level estimation.
+      level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs);
+    }
+  }
+  UpdateIsConfident();
+  DumpDebugData();
+}
+
+void SpeechLevelEstimatorImpl::UpdateIsConfident() {
+  if (adjacent_speech_frames_threshold_ == 1) {
+    // Ignore `reliable_state_` when a single frame is enough to update the
+    // level estimate (because it is not used).
+    is_confident_ = preliminary_state_.time_to_confidence_ms == 0;
+    return;
+  }
+  // Once confident, it remains confident.
+  RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 ||
+             preliminary_state_.time_to_confidence_ms == 0);
+  // During the first long enough speech sequence, `reliable_state_` must be
+  // ignored since `preliminary_state_` is used.
+  is_confident_ =
+      reliable_state_.time_to_confidence_ms == 0 ||
+      (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ &&
+       preliminary_state_.time_to_confidence_ms == 0);
+}
+
+void SpeechLevelEstimatorImpl::Reset() {
+  ResetLevelEstimatorState(preliminary_state_);
+  ResetLevelEstimatorState(reliable_state_);
+  level_dbfs_ = initial_speech_level_dbfs_;
+  num_adjacent_speech_frames_ = 0;
+}
+
+void SpeechLevelEstimatorImpl::ResetLevelEstimatorState(
+    LevelEstimatorState& state) const {
+  state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
+  state.level_dbfs.numerator = initial_speech_level_dbfs_;
+  state.level_dbfs.denominator = 1.0f;
+}
+
+void SpeechLevelEstimatorImpl::DumpDebugData() const {
+  if (!apm_data_dumper_)
+    return;
+  apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_);
+  apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_);
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_level_estimator_num_adjacent_speech_frames",
+      num_adjacent_speech_frames_);
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_level_estimator_preliminary_level_estimate_num",
+      preliminary_state_.level_dbfs.numerator);
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_level_estimator_preliminary_level_estimate_den",
+      preliminary_state_.level_dbfs.denominator);
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms",
+      preliminary_state_.time_to_confidence_ms);
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_level_estimator_reliable_time_to_confidence_ms",
+      reliable_state_.time_to_confidence_ms);
+}
+
+}  // namespace webrtc
diff --git a/modules/audio_processing/agc2/speech_level_estimator_impl.h b/modules/audio_processing/agc2/speech_level_estimator_impl.h
new file mode 100644
index 0000000..68c62d0
--- /dev/null
+++ b/modules/audio_processing/agc2/speech_level_estimator_impl.h
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_IMPL_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_IMPL_H_
+
+#include <type_traits>
+
+#include "api/audio/audio_processing.h"
+#include "modules/audio_processing/agc2/speech_level_estimator.h"
+
+namespace webrtc {
+class ApmDataDumper;
+
+class SpeechLevelEstimatorImpl : public SpeechLevelEstimator {
+ public:
+  SpeechLevelEstimatorImpl(
+      ApmDataDumper* apm_data_dumper,
+      const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+      int adjacent_speech_frames_threshold);
+  explicit SpeechLevelEstimatorImpl(const SpeechLevelEstimatorImpl&) = delete;
+  SpeechLevelEstimatorImpl& operator=(const SpeechLevelEstimatorImpl&) = delete;
+
+  // Updates the level estimation.
+  void Update(float rms_dbfs, float speech_probability) override;
+  // Returns the estimated speech plus noise level.
+  float GetLevelDbfs() const override { return level_dbfs_; }
+  // Returns true if the estimator is confident on its current estimate.
+  bool IsConfident() const override { return is_confident_; }
+
+  void Reset() override;
+
+ private:
+  // Part of the level estimator state used for check-pointing and restore ops.
+  struct LevelEstimatorState {
+    // TODO(bugs.webrtc.org/7494): Remove `time_to_confidence_ms` if redundant.
+    int time_to_confidence_ms;
+    struct Ratio {
+      float numerator;
+      float denominator;
+      float GetRatio() const;
+    } level_dbfs;
+  };
+  static_assert(std::is_trivially_copyable<LevelEstimatorState>::value, "");
+
+  void UpdateIsConfident();
+
+  void ResetLevelEstimatorState(LevelEstimatorState& state) const;
+
+  void DumpDebugData() const;
+
+  ApmDataDumper* const apm_data_dumper_;
+
+  const float initial_speech_level_dbfs_;
+  const int adjacent_speech_frames_threshold_;
+  LevelEstimatorState preliminary_state_;
+  LevelEstimatorState reliable_state_;
+  float level_dbfs_;
+  bool is_confident_;
+  int num_adjacent_speech_frames_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_IMPL_H_
diff --git a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
index eb466d1..f9387f2c 100644
--- a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc
@@ -8,12 +8,11 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "modules/audio_processing/agc2/speech_level_estimator.h"
-
 #include <memory>
 
 #include "api/audio/audio_processing.h"
 #include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/agc2/speech_level_estimator_impl.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/checks.h"
 #include "test/gtest.h"
@@ -36,7 +35,7 @@
 void RunOnConstantLevel(int num_iterations,
                         float rms_dbfs,
                         float speech_probability,
-                        SpeechLevelEstimator& level_estimator) {
+                        SpeechLevelEstimatorImpl& level_estimator) {
   for (int i = 0; i < num_iterations; ++i) {
     level_estimator.Update(rms_dbfs, speech_probability);
   }
@@ -50,11 +49,11 @@
 struct TestLevelEstimator {
   explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
       : data_dumper(0),
-        estimator(std::make_unique<SpeechLevelEstimator>(
+        estimator(std::make_unique<SpeechLevelEstimatorImpl>(
             &data_dumper,
             AdaptiveDigitalConfig{},
             adjacent_speech_frames_threshold)),
-        initial_speech_level_dbfs(estimator->level_dbfs()),
+        initial_speech_level_dbfs(estimator->GetLevelDbfs()),
         level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
         level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {
     RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs);
@@ -64,7 +63,7 @@
            "level is wide enough for the tests";
   }
   ApmDataDumper data_dumper;
-  std::unique_ptr<SpeechLevelEstimator> estimator;
+  std::unique_ptr<SpeechLevelEstimatorImpl> estimator;
   const float initial_speech_level_dbfs;
   const float level_rms_dbfs;
   const float level_peak_dbfs;
@@ -76,10 +75,10 @@
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                      level_estimator.level_rms_dbfs, kMaxSpeechProbability,
                      *level_estimator.estimator);
-  const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
+  const float estimated_level_dbfs = level_estimator.estimator->GetLevelDbfs();
   RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs,
                      kMaxSpeechProbability, *level_estimator.estimator);
-  EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
+  EXPECT_NEAR(level_estimator.estimator->GetLevelDbfs(), estimated_level_dbfs,
               0.1f);
 }
 
@@ -90,7 +89,7 @@
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
                      level_estimator.level_rms_dbfs, kMaxSpeechProbability,
                      *level_estimator.estimator);
-  EXPECT_FALSE(level_estimator.estimator->is_confident());
+  EXPECT_FALSE(level_estimator.estimator->IsConfident());
 }
 
 // Checks that the level controller becomes confident when enough speech frames
@@ -100,7 +99,7 @@
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                      level_estimator.level_rms_dbfs, kMaxSpeechProbability,
                      *level_estimator.estimator);
-  EXPECT_TRUE(level_estimator.estimator->is_confident());
+  EXPECT_TRUE(level_estimator.estimator->IsConfident());
 }
 
 // Checks that the estimated level is not affected by the level of non-speech
@@ -111,13 +110,13 @@
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                      level_estimator.level_rms_dbfs, kMaxSpeechProbability,
                      *level_estimator.estimator);
-  const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
+  const float estimated_level_dbfs = level_estimator.estimator->GetLevelDbfs();
   // Simulate full-scale non-speech.
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                      /*rms_dbfs=*/0.0f, kNoSpeechProbability,
                      *level_estimator.estimator);
   // No estimated level change is expected.
-  EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
+  EXPECT_FLOAT_EQ(level_estimator.estimator->GetLevelDbfs(),
                   estimated_level_dbfs);
 }
 
@@ -127,7 +126,7 @@
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                      level_estimator.level_rms_dbfs, kMaxSpeechProbability,
                      *level_estimator.estimator);
-  EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
+  EXPECT_NEAR(level_estimator.estimator->GetLevelDbfs(),
               level_estimator.level_rms_dbfs,
               kConvergenceSpeedTestsLevelTolerance);
 }
@@ -141,9 +140,9 @@
       /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
       kMaxSpeechProbability, *level_estimator.estimator);
   // No estimate change should occur, but confidence is achieved.
-  ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
+  ASSERT_FLOAT_EQ(level_estimator.estimator->GetLevelDbfs(),
                   level_estimator.initial_speech_level_dbfs);
-  ASSERT_TRUE(level_estimator.estimator->is_confident());
+  ASSERT_TRUE(level_estimator.estimator->IsConfident());
   // After confidence.
   constexpr float kConvergenceTimeAfterConfidenceNumFrames = 700;  // 7 seconds.
   static_assert(
@@ -152,7 +151,7 @@
       /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
       level_estimator.level_rms_dbfs, kMaxSpeechProbability,
       *level_estimator.estimator);
-  EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
+  EXPECT_NEAR(level_estimator.estimator->GetLevelDbfs(),
               level_estimator.level_rms_dbfs,
               kConvergenceSpeedTestsLevelTolerance);
 }
@@ -165,28 +164,28 @@
 
 TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
   TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
-  const float initial_level = level_estimator.estimator->level_dbfs();
+  const float initial_level = level_estimator.estimator->GetLevelDbfs();
   ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
   for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
     SCOPED_TRACE(i);
     level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
                                       kMaxSpeechProbability);
-    EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
+    EXPECT_EQ(initial_level, level_estimator.estimator->GetLevelDbfs());
   }
   level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
                                     kLowSpeechProbability);
-  EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
+  EXPECT_EQ(initial_level, level_estimator.estimator->GetLevelDbfs());
 }
 
 TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
   TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
-  const float initial_level = level_estimator.estimator->level_dbfs();
+  const float initial_level = level_estimator.estimator->GetLevelDbfs();
   ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
   for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
     level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
                                       kMaxSpeechProbability);
   }
-  EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
+  EXPECT_LT(initial_level, level_estimator.estimator->GetLevelDbfs());
 }
 
 INSTANTIATE_TEST_SUITE_P(GainController2,
diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 98deda4..ba4b61d 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@@ -117,7 +117,7 @@
   if (config.input_volume_controller.enabled ||
       config.adaptive_digital.enabled) {
     // Create dependencies.
-    speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
+    speech_level_estimator_ = SpeechLevelEstimator::Create(
         &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
     if (use_internal_vad)
       vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
@@ -218,8 +218,8 @@
   if (speech_level_estimator_) {
     speech_level_estimator_->Update(audio_levels.rms_dbfs, speech_probability);
     speech_level =
-        SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
-                    .rms_dbfs = speech_level_estimator_->level_dbfs()};
+        SpeechLevel{.is_confident = speech_level_estimator_->IsConfident(),
+                    .rms_dbfs = speech_level_estimator_->GetLevelDbfs()};
   }
 
   // Update the recommended input volume.