modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h - src - Git at Google

 /*
  *  Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #ifndef MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_
 #define MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_

 #include <array>
 #include <memory>
 #include <vector>

 #include "absl/strings/string_view.h"
 #include "api/array_view.h"
 #include "api/audio/neural_residual_echo_estimator.h"
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/aec3/neural_feature_extractor.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
 #include "external/webrtc/webrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h"
 #else
 #include "modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h"
 #endif

 namespace webrtc {

 // Implements the NeuralResidualEchoEstimator's virtual methods to estimate
 // residual echo not fully removed by the linear AEC3 estimator. It uses a
 // provided model to generate an echo residual mask from the linear AEC output
 // and render signal. This mask is then used for estimating the echo residual
 // that the AEC3 suppressor needs for computing the suppression gains.
 class NeuralResidualEchoEstimatorImpl : public NeuralResidualEchoEstimator {
  public:
   enum class ModelInputEnum {
     kModelState = 0,
     kMic = 1,
     kLinearAecOutput = 2,
     kAecRef = 3,
     kNumInputs = 4
   };
   enum class ModelOutputEnum {
     kEchoMask = 0,
     kModelState = 1,
     kNumOutputs = 2
   };

   // Executes a residual echo estimation model on given inputs.
   class ModelRunner {
    public:
     virtual ~ModelRunner() = default;

     virtual int StepSize() const = 0;
     virtual webrtc::ArrayView<float> GetInput(ModelInputEnum input_enum) = 0;
     virtual webrtc::ArrayView<const float> GetOutputEchoMask() = 0;
     virtual const audioproc::ReeModelMetadata& GetMetadata() const = 0;
     virtual bool Invoke() = 0;
   };

   // Initializes an ML-based residual echo estimator from the tflite file path
   // provided. Returns nullptr if any initialization step fails.
   static std::unique_ptr<ModelRunner> LoadTfLiteModel(
       absl::string_view ml_ree_model_path);

   explicit NeuralResidualEchoEstimatorImpl(
       std::unique_ptr<ModelRunner> model_runner_);

   void Estimate(
       webrtc::ArrayView<const float> x,
       webrtc::ArrayView<const std::array<float, kBlockSize>> y,
       webrtc::ArrayView<const std::array<float, kBlockSize>> e,
       webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2,
       webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
       webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
       webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
       webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded)
       override;

  private:
   void DumpInputs();

   // Encapsulates all ML model invocation work.
   const std::unique_ptr<ModelRunner> model_runner_;
   std::unique_ptr<FeatureExtractor> feature_extractor_;

   // Input buffers for translating from the 4 ms FloatS16 block format of AEC3
   // to the model scale and frame size.
   std::vector<float> input_mic_buffer_;
   std::vector<float> input_linear_aec_output_buffer_;
   std::vector<float> input_aec_ref_buffer_;

   // Downsampled model output for what fraction of the power content in the
   // linear AEC output is echo for each bin.
   std::array<float, kFftLengthBy2Plus1> output_mask_;

   static int instance_count_;
   // Pointer to a data dumper that is used for debugging purposes.
   std::unique_ptr<ApmDataDumper> data_dumper_;
 };

 }  // namespace webrtc

 #endif  // MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_
	/*
	* Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_
	#define MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_

	#include <array>
	#include <memory>
	#include <vector>

	#include "absl/strings/string_view.h"
	#include "api/array_view.h"
	#include "api/audio/neural_residual_echo_estimator.h"
	#include "modules/audio_processing/aec3/aec3_common.h"
	#include "modules/audio_processing/aec3/neural_feature_extractor.h"
	#include "modules/audio_processing/logging/apm_data_dumper.h"
	#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
	#include "external/webrtc/webrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h"
	#else
	#include "modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h"
	#endif

	namespace webrtc {

	// Implements the NeuralResidualEchoEstimator's virtual methods to estimate
	// residual echo not fully removed by the linear AEC3 estimator. It uses a
	// provided model to generate an echo residual mask from the linear AEC output
	// and render signal. This mask is then used for estimating the echo residual
	// that the AEC3 suppressor needs for computing the suppression gains.
	class NeuralResidualEchoEstimatorImpl : public NeuralResidualEchoEstimator {
	public:
	enum class ModelInputEnum {
	kModelState = 0,
	kMic = 1,
	kLinearAecOutput = 2,
	kAecRef = 3,
	kNumInputs = 4
	};
	enum class ModelOutputEnum {
	kEchoMask = 0,
	kModelState = 1,
	kNumOutputs = 2
	};

	// Executes a residual echo estimation model on given inputs.
	class ModelRunner {
	public:
	virtual ~ModelRunner() = default;

	virtual int StepSize() const = 0;
	virtual webrtc::ArrayView<float> GetInput(ModelInputEnum input_enum) = 0;
	virtual webrtc::ArrayView<const float> GetOutputEchoMask() = 0;
	virtual const audioproc::ReeModelMetadata& GetMetadata() const = 0;
	virtual bool Invoke() = 0;
	};

	// Initializes an ML-based residual echo estimator from the tflite file path
	// provided. Returns nullptr if any initialization step fails.
	static std::unique_ptr<ModelRunner> LoadTfLiteModel(
	absl::string_view ml_ree_model_path);

	explicit NeuralResidualEchoEstimatorImpl(
	std::unique_ptr<ModelRunner> model_runner_);

	void Estimate(
	webrtc::ArrayView<const float> x,
	webrtc::ArrayView<const std::array<float, kBlockSize>> y,
	webrtc::ArrayView<const std::array<float, kBlockSize>> e,
	webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2,
	webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
	webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
	webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
	webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded)
	override;

	private:
	void DumpInputs();

	// Encapsulates all ML model invocation work.
	const std::unique_ptr<ModelRunner> model_runner_;
	std::unique_ptr<FeatureExtractor> feature_extractor_;

	// Input buffers for translating from the 4 ms FloatS16 block format of AEC3
	// to the model scale and frame size.
	std::vector<float> input_mic_buffer_;
	std::vector<float> input_linear_aec_output_buffer_;
	std::vector<float> input_aec_ref_buffer_;

	// Downsampled model output for what fraction of the power content in the
	// linear AEC output is echo for each bin.
	std::array<float, kFftLengthBy2Plus1> output_mask_;

	static int instance_count_;
	// Pointer to a data dumper that is used for debugging purposes.
	std::unique_ptr<ApmDataDumper> data_dumper_;
	};

	} // namespace webrtc

	#endif // MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_