modules/audio_processing/transient/transient_suppressor.h - src/ - Git at Google

 /*
  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
 #define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

 #include <cstddef>

 namespace webrtc {

 // Detects transients in an audio stream and suppress them using a simple
 // restoration algorithm that attenuates unexpected spikes in the spectrum.
 class TransientSuppressor {
  public:
   // Type of VAD used by the caller to compute the `voice_probability` argument
   // `Suppress()`.
   enum class VadMode {
     // By default, `TransientSuppressor` assumes that `voice_probability` is
     // computed by `AgcManagerDirect`.
     kDefault = 0,
     // Use this mode when `TransientSuppressor` must assume that
     // `voice_probability` is computed by the RNN VAD.
     kRnnVad,
     // Use this mode to let `TransientSuppressor::Suppressor()` ignore
     // `voice_probability` and behave as if voice information is unavailable
     // (regardless of the passed value).
     kNoVad,
   };

   virtual ~TransientSuppressor() {}

   virtual void Initialize(int sample_rate_hz,
                           int detector_rate_hz,
                           int num_channels) = 0;

   // Processes a `data` chunk, and returns it with keystrokes suppressed from
   // it. The float format is assumed to be int16 ranged. If there are more than
   // one channel, the chunks are concatenated one after the other in `data`.
   // `data_length` must be equal to `data_length_`.
   // `num_channels` must be equal to `num_channels_`.
   // A sub-band, ideally the higher, can be used as `detection_data`. If it is
   // NULL, `data` is used for the detection too. The `detection_data` is always
   // assumed mono.
   // If a reference signal (e.g. keyboard microphone) is available, it can be
   // passed in as `reference_data`. It is assumed mono and must have the same
   // length as `data`. NULL is accepted if unavailable.
   // This suppressor performs better if voice information is available.
   // `voice_probability` is the probability of voice being present in this chunk
   // of audio. If voice information is not available, `voice_probability` must
   // always be set to 1.
   // `key_pressed` determines if a key was pressed on this audio chunk.
   // Returns a delayed version of `voice_probability` according to the
   // algorithmic delay introduced by this method. In this way, the modified
   // `data` and the returned voice probability will be temporally aligned.
   virtual float Suppress(float* data,
                          size_t data_length,
                          int num_channels,
                          const float* detection_data,
                          size_t detection_length,
                          const float* reference_data,
                          size_t reference_length,
                          float voice_probability,
                          bool key_pressed) = 0;
 };

 }  // namespace webrtc

 #endif  // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
	/*
	* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
	#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

	#include <cstddef>

	namespace webrtc {

	// Detects transients in an audio stream and suppress them using a simple
	// restoration algorithm that attenuates unexpected spikes in the spectrum.
	class TransientSuppressor {
	public:
	// Type of VAD used by the caller to compute the `voice_probability` argument
	// `Suppress()`.
	enum class VadMode {
	// By default, `TransientSuppressor` assumes that `voice_probability` is
	// computed by `AgcManagerDirect`.
	kDefault = 0,
	// Use this mode when `TransientSuppressor` must assume that
	// `voice_probability` is computed by the RNN VAD.
	kRnnVad,
	// Use this mode to let `TransientSuppressor::Suppressor()` ignore
	// `voice_probability` and behave as if voice information is unavailable
	// (regardless of the passed value).
	kNoVad,
	};

	virtual ~TransientSuppressor() {}

	virtual void Initialize(int sample_rate_hz,
	int detector_rate_hz,
	int num_channels) = 0;

	// Processes a `data` chunk, and returns it with keystrokes suppressed from
	// it. The float format is assumed to be int16 ranged. If there are more than
	// one channel, the chunks are concatenated one after the other in `data`.
	// `data_length` must be equal to `data_length_`.
	// `num_channels` must be equal to `num_channels_`.
	// A sub-band, ideally the higher, can be used as `detection_data`. If it is
	// NULL, `data` is used for the detection too. The `detection_data` is always
	// assumed mono.
	// If a reference signal (e.g. keyboard microphone) is available, it can be
	// passed in as `reference_data`. It is assumed mono and must have the same
	// length as `data`. NULL is accepted if unavailable.
	// This suppressor performs better if voice information is available.
	// `voice_probability` is the probability of voice being present in this chunk
	// of audio. If voice information is not available, `voice_probability` must
	// always be set to 1.
	// `key_pressed` determines if a key was pressed on this audio chunk.
	// Returns a delayed version of `voice_probability` according to the
	// algorithmic delay introduced by this method. In this way, the modified
	// `data` and the returned voice probability will be temporally aligned.
	virtual float Suppress(float* data,
	size_t data_length,
	int num_channels,
	const float* detection_data,
	size_t detection_length,
	const float* reference_data,
	size_t reference_length,
	float voice_probability,
	bool key_pressed) = 0;
	};

	} // namespace webrtc

	#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_