| /* |
| * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ |
| #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ |
| |
| #include "webrtc/common_audio/lapped_transform.h" |
| #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" |
| #include "webrtc/modules/audio_processing/include/audio_processing.h" |
| |
| namespace webrtc { |
| |
| // Enhances sound sources coming directly in front of a uniform linear array |
| // and suppresses sound sources coming from all other directions. Operates on |
| // multichannel signals and produces single-channel output. |
| // |
| // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear |
| // Beamforming Postprocessor" by Bastiaan Kleijn. |
| // |
| // TODO: Target angle assumed to be 0. Parameterize target angle. |
| class Beamformer : public LappedTransform::Callback { |
| public: |
| // At the moment it only accepts uniform linear microphone arrays. Using the |
| // first microphone as a reference position [0, 0, 0] is a natural choice. |
| explicit Beamformer(const std::vector<Point>& array_geometry); |
| virtual ~Beamformer() {}; |
| |
| // Sample rate corresponds to the lower band. |
| // Needs to be called before the Beamformer can be used. |
| virtual void Initialize(int chunk_size_ms, int sample_rate_hz); |
| |
| // Process one time-domain chunk of audio. The audio can be separated into |
| // two signals by frequency, with the higher half passed in as the second |
| // parameter. Use NULL for |high_pass_split_input| if you only have one |
| // audio signal. The number of frames and channels must correspond to the |
| // ctor parameters. The same signal can be passed in as |input| and |output|. |
| virtual void ProcessChunk(const float* const* input, |
| const float* const* high_pass_split_input, |
| int num_input_channels, |
| int num_frames_per_band, |
| float* const* output, |
| float* const* high_pass_split_output); |
| // After processing each block |is_target_present_| is set to true if the |
| // target signal es present and to false otherwise. This methods can be called |
| // to know if the data is target signal or interference and process it |
| // accordingly. |
| virtual bool is_target_present() { return is_target_present_; } |
| |
| protected: |
| // Process one frequency-domain block of audio. This is where the fun |
| // happens. Implements LappedTransform::Callback. |
| void ProcessAudioBlock(const complex<float>* const* input, |
| int num_input_channels, |
| int num_freq_bins, |
| int num_output_channels, |
| complex<float>* const* output) override; |
| |
| private: |
| typedef Matrix<float> MatrixF; |
| typedef ComplexMatrix<float> ComplexMatrixF; |
| typedef complex<float> complex_f; |
| |
| void InitDelaySumMasks(); |
| void InitTargetCovMats(); // TODO: Make this depend on target angle. |
| void InitInterfCovMats(); |
| |
| // An implementation of equation 18, which calculates postfilter masks that, |
| // when applied, minimize the mean-square error of our estimation of the |
| // desired signal. A sub-task is to calculate lambda, which is solved via |
| // equation 13. |
| float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, |
| float rpsiw, |
| float ratio_rxiw_rxim, |
| float rmxi_r, |
| float mask_threshold); |
| |
| // Prevents the postfilter masks from degenerating too quickly (a cause of |
| // musical noise). |
| void ApplyMaskSmoothing(); |
| |
| // The postfilter masks are unreliable at low frequencies. Calculates a better |
| // mask by averaging mid-low frequency values. |
| void ApplyLowFrequencyCorrection(); |
| |
| // Postfilter masks are also unreliable at high frequencies. Average mid-high |
| // frequency masks to calculate a single mask per block which can be applied |
| // in the time-domain. Further, we average these block-masks over a chunk, |
| // resulting in one postfilter mask per audio chunk. This allows us to skip |
| // both transforming and blocking the high-frequency signal. |
| void ApplyHighFrequencyCorrection(); |
| |
| // Applies both sets of masks to |input| and store in |output|. |
| void ApplyMasks(const complex_f* const* input, complex_f* const* output); |
| |
| float MicSpacingFromGeometry(const std::vector<Point>& array_geometry); |
| void EstimateTargetPresence(); |
| |
| static const int kFftSize = 256; |
| static const int kNumFreqBins = kFftSize / 2 + 1; |
| |
| // Deals with the fft transform and blocking. |
| int chunk_length_; |
| rtc::scoped_ptr<LappedTransform> lapped_transform_; |
| float window_[kFftSize]; |
| |
| // Parameters exposed to the user. |
| const int num_input_channels_; |
| int sample_rate_hz_; |
| const float mic_spacing_; |
| |
| // Calculated based on user-input and constants in the .cc file. |
| int low_average_start_bin_; |
| int low_average_end_bin_; |
| int high_average_start_bin_; |
| int high_average_end_bin_; |
| |
| // Old masks are saved for smoothing. Matrix of size 1 x |kNumFreqBins|. |
| float postfilter_mask_[kNumFreqBins]; |
| float new_mask_[kNumFreqBins]; |
| |
| // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. |
| ComplexMatrixF delay_sum_masks_[kNumFreqBins]; |
| ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; |
| |
| // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
| // |num_input_channels_|. |
| ComplexMatrixF target_cov_mats_[kNumFreqBins]; |
| |
| // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
| // |num_input_channels_|. |
| ComplexMatrixF interf_cov_mats_[kNumFreqBins]; |
| ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; |
| |
| // Of length |kNumFreqBins|. |
| float mask_thresholds_[kNumFreqBins]; |
| float wave_numbers_[kNumFreqBins]; |
| |
| // Preallocated for ProcessAudioBlock() |
| // Of length |kNumFreqBins|. |
| float rxiws_[kNumFreqBins]; |
| float rpsiws_[kNumFreqBins]; |
| float reflected_rpsiws_[kNumFreqBins]; |
| |
| // The microphone normalization factor. |
| ComplexMatrixF eig_m_; |
| |
| // For processing the high-frequency input signal. |
| float high_pass_postfilter_mask_; |
| |
| // True when the target signal is present. |
| bool is_target_present_; |
| // Number of blocks after which the data is considered interference if the |
| // mask does not pass |kMaskSignalThreshold|. |
| int hold_target_blocks_; |
| // Number of blocks since the last mask that passed |kMaskSignalThreshold|. |
| int interference_blocks_count_; |
| }; |
| |
| } // namespace webrtc |
| |
| #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ |