Pull out the PostFilter to its own NonlinearBeamformer API This is done to avoid having a nonlinear component in the AEC path. Now the linear delay and sum is run before the AEC and the postfilter after it. This change landed originally at: https://codereview.webrtc.org/1982183002/ R=peah@webrtc.org TBR=henrik.lundin@webrtc.org Review URL: https://codereview.webrtc.org/2110593003 . Cr-Commit-Position: refs/heads/master@{#13371}

commit: f4022ffa1a3b4597cab9d06e83dd493c246b204f [log] [tgz]
author: Alejandro Luebs <aluebs@webrtc.org> Sat Jul 02 00:19:09 2016
committer: Alejandro Luebs <aluebs@webrtc.org> Sat Jul 02 00:19:32 2016
tree: ec0422d65cd449cb5e2cec29e88d0f8b58dcb0ad
parent: 1aa821980d2f871336d2f323143934bb81affff6 [diff]
diff --git a/webrtc/common_audio/lapped_transform.cc b/webrtc/common_audio/lapped_transform.cc
index 5ab1db1..006bda0 100644
--- a/webrtc/common_audio/lapped_transform.cc
+++ b/webrtc/common_audio/lapped_transform.cc

@@ -83,7 +83,7 @@
       cplx_post_(num_out_channels,
                  cplx_length_,
                  RealFourier::kFftBufferAlignment) {
-  RTC_CHECK(num_in_channels_ > 0 && num_out_channels_ > 0);
+  RTC_CHECK(num_in_channels_ > 0);
   RTC_CHECK_GT(block_length_, 0u);
   RTC_CHECK_GT(chunk_length_, 0u);
   RTC_CHECK(block_processor_);

diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn
index 21b7ced..9c1674a 100644
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn

@@ -55,7 +55,6 @@
     "audio_processing_impl.h",
     "beamformer/array_util.cc",
     "beamformer/array_util.h",
-    "beamformer/beamformer.h",
     "beamformer/complex_matrix.h",
     "beamformer/covariance_matrix_generator.cc",
     "beamformer/covariance_matrix_generator.h",

diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index 90cf055..0a2f413 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi

@@ -66,7 +66,6 @@
         'audio_processing_impl.h',
         'beamformer/array_util.cc',
         'beamformer/array_util.h',
-        'beamformer/beamformer.h',
         'beamformer/complex_matrix.h',
         'beamformer/covariance_matrix_generator.cc',
         'beamformer/covariance_matrix_generator.h',

diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index afeebba..2a2e54d 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc

@@ -128,10 +128,10 @@
 };
 
 struct AudioProcessingImpl::ApmPrivateSubmodules {
-  explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)
+  explicit ApmPrivateSubmodules(NonlinearBeamformer* beamformer)
       : beamformer(beamformer) {}
   // Accessed internally from capture or during initialization
-  std::unique_ptr<Beamformer<float>> beamformer;
+  std::unique_ptr<NonlinearBeamformer> beamformer;
   std::unique_ptr<AgcManagerDirect> agc_manager;
   std::unique_ptr<LevelController> level_controller;
 };
@@ -146,7 +146,7 @@
 }
 
 AudioProcessing* AudioProcessing::Create(const Config& config,
-                                         Beamformer<float>* beamformer) {
+                                         NonlinearBeamformer* beamformer) {
   AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
   if (apm->Initialize() != kNoError) {
     delete apm;
@@ -160,7 +160,7 @@
     : AudioProcessingImpl(config, nullptr) {}
 
 AudioProcessingImpl::AudioProcessingImpl(const Config& config,
-                                         Beamformer<float>* beamformer)
+                                         NonlinearBeamformer* beamformer)
     : public_submodules_(new ApmPublicSubmodules()),
       private_submodules_(new ApmPrivateSubmodules(beamformer)),
       constants_(config.Get<ExperimentalAgc>().startup_min_volume,
@@ -699,8 +699,8 @@
   }
 
   if (capture_nonlocked_.beamformer_enabled) {
-    private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(),
-                                                  ca->split_data_f());
+    private_submodules_->beamformer->AnalyzeChunk(*ca->split_data_f());
+    // Discards all channels by the leftmost one.
     ca->set_num_channels(1);
   }
 
@@ -746,6 +746,10 @@
   RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(
       ca, stream_delay_ms()));
 
+  if (capture_nonlocked_.beamformer_enabled) {
+    private_submodules_->beamformer->PostFilter(ca->split_data_f());
+  }
+
   public_submodules_->voice_detection->ProcessCaptureAudio(ca);
 
   if (constants_.use_experimental_agc &&
@@ -1223,7 +1227,7 @@
   if (capture_nonlocked_.beamformer_enabled) {
     if (!private_submodules_->beamformer) {
       private_submodules_->beamformer.reset(new NonlinearBeamformer(
-          capture_.array_geometry, capture_.target_direction));
+          capture_.array_geometry, 1u, capture_.target_direction));
     }
     private_submodules_->beamformer->Initialize(kChunkSizeMs,
                                                 capture_nonlocked_.split_rate);

diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index a79d028..4b9011d 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h

@@ -36,8 +36,7 @@
 class AgcManagerDirect;
 class AudioConverter;
 
-template<typename T>
-class Beamformer;
+class NonlinearBeamformer;
 
 class AudioProcessingImpl : public AudioProcessing {
  public:
@@ -45,7 +44,7 @@
   // Acquires both the render and capture locks.
   explicit AudioProcessingImpl(const Config& config);
   // AudioProcessingImpl takes ownership of beamformer.
-  AudioProcessingImpl(const Config& config, Beamformer<float>* beamformer);
+  AudioProcessingImpl(const Config& config, NonlinearBeamformer* beamformer);
   virtual ~AudioProcessingImpl();
   int Initialize() override;
   int Initialize(int input_sample_rate_hz,

diff --git a/webrtc/modules/audio_processing/audio_processing_unittest.cc b/webrtc/modules/audio_processing/audio_processing_unittest.cc
index e5ab3da..23705e7 100644
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc

@@ -1284,7 +1284,7 @@
   geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
   config.Set<Beamforming>(new Beamforming(true, geometry));
   testing::NiceMock<MockNonlinearBeamformer>* beamformer =
-      new testing::NiceMock<MockNonlinearBeamformer>(geometry);
+      new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u);
   std::unique_ptr<AudioProcessing> apm(
       AudioProcessing::Create(config, beamformer));
   EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));

diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h
deleted file mode 100644
index 6a9ff45..0000000
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ /dev/null

@@ -1,48 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
-
-#include "webrtc/common_audio/channel_buffer.h"
-#include "webrtc/modules/audio_processing/beamformer/array_util.h"
-
-namespace webrtc {
-
-template<typename T>
-class Beamformer {
- public:
-  virtual ~Beamformer() {}
-
-  // Process one time-domain chunk of audio. The audio is expected to be split
-  // into frequency bands inside the ChannelBuffer. The number of frames and
-  // channels must correspond to the constructor parameters. The same
-  // ChannelBuffer can be passed in as |input| and |output|.
-  virtual void ProcessChunk(const ChannelBuffer<T>& input,
-                            ChannelBuffer<T>* output) = 0;
-
-  // Sample rate corresponds to the lower band.
-  // Needs to be called before the the Beamformer can be used.
-  virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
-
-  // Aim the beamformer at a point in space.
-  virtual void AimAt(const SphericalPointf& spherical_point) = 0;
-
-  // Indicates whether a given point is inside of the beam.
-  virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }
-
-  // Returns true if the current data contains the target signal.
-  // Which signals are considered "targets" is implementation dependent.
-  virtual bool is_target_present() = 0;
-};
-
-}  // namespace webrtc
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_

diff --git a/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h
index e2b4417..e0a1c6f 100644
--- a/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h

@@ -20,12 +20,13 @@
 
 class MockNonlinearBeamformer : public NonlinearBeamformer {
  public:
-  explicit MockNonlinearBeamformer(const std::vector<Point>& array_geometry)
-      : NonlinearBeamformer(array_geometry) {}
+  MockNonlinearBeamformer(const std::vector<Point>& array_geometry,
+                          size_t num_postfilter_channels)
+      : NonlinearBeamformer(array_geometry, num_postfilter_channels) {}
 
   MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
-  MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer<float>& input,
-                                  ChannelBuffer<float>* output));
+  MOCK_METHOD1(AnalyzeChunk, void(const ChannelBuffer<float>& data));
+  MOCK_METHOD1(PostFilter, void(ChannelBuffer<float>* data));
   MOCK_METHOD1(IsInBeam, bool(const SphericalPointf& spherical_point));
   MOCK_METHOD0(is_target_present, bool());
 };

diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
index f5bdd6a..5412fb5 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

@@ -122,18 +122,6 @@
   return static_cast<size_t>(std::floor(x + 0.5f));
 }
 
-// Calculates the sum of absolute values of a complex matrix.
-float SumAbs(const ComplexMatrix<float>& mat) {
-  float sum_abs = 0.f;
-  const complex<float>* const* mat_els = mat.elements();
-  for (size_t i = 0; i < mat.num_rows(); ++i) {
-    for (size_t j = 0; j < mat.num_columns(); ++j) {
-      sum_abs += std::abs(mat_els[i][j]);
-    }
-  }
-  return sum_abs;
-}
-
 // Calculates the sum of squares of a complex matrix.
 float SumSquares(const ComplexMatrix<float>& mat) {
   float sum_squares = 0.f;
@@ -183,10 +171,46 @@
 // static
 const size_t NonlinearBeamformer::kNumFreqBins;
 
+PostFilterTransform::PostFilterTransform(size_t num_channels,
+                                         size_t chunk_length,
+                                         float* window,
+                                         size_t fft_size)
+    : transform_(num_channels,
+                 num_channels,
+                 chunk_length,
+                 window,
+                 fft_size,
+                 fft_size / 2,
+                 this),
+      num_freq_bins_(fft_size / 2 + 1) {}
+
+void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) {
+  final_mask_ = final_mask;
+  transform_.ProcessChunk(data, data);
+}
+
+void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input,
+                                            size_t num_input_channels,
+                                            size_t num_freq_bins,
+                                            size_t num_output_channels,
+                                            complex<float>* const* output) {
+  RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins);
+  RTC_DCHECK_EQ(num_input_channels, num_output_channels);
+
+  for (size_t ch = 0; ch < num_input_channels; ++ch) {
+    for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) {
+      output[ch][f_ix] =
+          kCompensationGain * final_mask_[f_ix] * input[ch][f_ix];
+    }
+  }
+}
+
 NonlinearBeamformer::NonlinearBeamformer(
     const std::vector<Point>& array_geometry,
+    size_t num_postfilter_channels,
     SphericalPointf target_direction)
     : num_input_channels_(array_geometry.size()),
+      num_postfilter_channels_(num_postfilter_channels),
       array_geometry_(GetCenteredArray(array_geometry)),
       array_normal_(GetArrayNormalIfExists(array_geometry)),
       min_mic_spacing_(GetMinimumSpacing(array_geometry)),
@@ -208,18 +232,21 @@
   hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
   interference_blocks_count_ = hold_target_blocks_;
 
-  lapped_transform_.reset(new LappedTransform(num_input_channels_,
-                                              1,
-                                              chunk_length_,
-                                              window_,
-                                              kFftSize,
-                                              kFftSize / 2,
-                                              this));
+  process_transform_.reset(new LappedTransform(num_input_channels_,
+                                               0u,
+                                               chunk_length_,
+                                               window_,
+                                               kFftSize,
+                                               kFftSize / 2,
+                                               this));
+  postfilter_transform_.reset(new PostFilterTransform(
+      num_postfilter_channels_, chunk_length_, window_, kFftSize));
+  const float wave_number_step =
+      (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds);
   for (size_t i = 0; i < kNumFreqBins; ++i) {
     time_smooth_mask_[i] = 1.f;
     final_mask_[i] = 1.f;
-    float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
-    wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
+    wave_numbers_[i] = i * wave_number_step;
   }
 
   InitLowFrequencyCorrectionRanges();
@@ -306,9 +333,6 @@
     complex_f norm_factor = sqrt(
         ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
     delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
-    normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
-    normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
-        normalized_delay_sum_masks_[f_ix]));
   }
 }
 
@@ -366,30 +390,49 @@
   }
 }
 
-void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
-                                       ChannelBuffer<float>* output) {
-  RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
-  RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
+void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) {
+  RTC_DCHECK_EQ(data.num_channels(), num_input_channels_);
+  RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_);
 
-  float old_high_pass_mask = high_pass_postfilter_mask_;
-  lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
-  // Ramp up/down for smoothing. 1 mask per 10ms results in audible
-  // discontinuities.
+  old_high_pass_mask_ = high_pass_postfilter_mask_;
+  process_transform_->ProcessChunk(data.channels(0), nullptr);
+}
+
+void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) {
+  RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_);
+  // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated.
+  RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_);
+
+  postfilter_transform_->ProcessChunk(data->channels(0), final_mask_);
+
+  // Ramp up/down for smoothing is needed in order to avoid discontinuities in
+  // the transitions between 10 ms frames.
   const float ramp_increment =
-      (high_pass_postfilter_mask_ - old_high_pass_mask) /
-      input.num_frames_per_band();
-  // Apply the smoothed high-pass mask to the first channel of each band.
-  // This can be done because the effect of the linear beamformer is negligible
-  // compared to the post-filter.
-  for (size_t i = 1; i < input.num_bands(); ++i) {
-    float smoothed_mask = old_high_pass_mask;
-    for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
+      (high_pass_postfilter_mask_ - old_high_pass_mask_) /
+      data->num_frames_per_band();
+  for (size_t i = 1; i < data->num_bands(); ++i) {
+    float smoothed_mask = old_high_pass_mask_;
+    for (size_t j = 0; j < data->num_frames_per_band(); ++j) {
       smoothed_mask += ramp_increment;
-      output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
+      for (size_t k = 0; k < num_postfilter_channels_; ++k) {
+        data->channels(i)[k][j] *= smoothed_mask;
+      }
     }
   }
 }
 
+void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
+                                       ChannelBuffer<float>* output) {
+  RTC_DCHECK_GT(output->num_channels(), 0u);
+  RTC_DCHECK_EQ(output->num_frames_per_band(), input.num_frames_per_band());
+  AnalyzeChunk(input);
+  for (size_t i = 0u; i < input.num_bands(); ++i) {
+    std::memcpy(output->channels(i)[0], input.channels(i)[0],
+        sizeof(input.channels(0)[0][0]) * input.num_frames_per_band());
+  }
+  PostFilter(output);
+}
+
 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
   target_angle_radians_ = target_direction.azimuth();
   InitHighFrequencyCorrectionRanges();
@@ -414,7 +457,7 @@
                                             complex_f* const* output) {
   RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
   RTC_CHECK_EQ(num_input_channels_, num_input_channels);
-  RTC_CHECK_EQ(1u, num_output_channels);
+  RTC_CHECK_EQ(0u, num_output_channels);
 
   // Calculating the post-filter masks. Note that we need two for each
   // frequency bin to account for the positive and negative interferer
@@ -456,7 +499,6 @@
   ApplyLowFrequencyCorrection();
   ApplyHighFrequencyCorrection();
   ApplyMaskFrequencySmoothing();
-  ApplyMasks(input, output);
 }
 
 float NonlinearBeamformer::CalculatePostfilterMask(
@@ -484,22 +526,6 @@
   return numerator / denominator;
 }
 
-void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
-                                     complex_f* const* output) {
-  complex_f* output_channel = output[0];
-  for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
-    output_channel[f_ix] = complex_f(0.f, 0.f);
-
-    const complex_f* delay_sum_mask_els =
-        normalized_delay_sum_masks_[f_ix].elements()[0];
-    for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
-      output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
-    }
-
-    output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
-  }
-}
-
 // Smooth new_mask_ into time_smooth_mask_.
 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
   for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {

diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
index b8953b0..10ef6e5 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

@@ -21,48 +21,76 @@
 
 #include "webrtc/common_audio/lapped_transform.h"
 #include "webrtc/common_audio/channel_buffer.h"
-#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
+#include "webrtc/modules/audio_processing/beamformer/array_util.h"
 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
 
 namespace webrtc {
 
+class PostFilterTransform : public LappedTransform::Callback {
+ public:
+  PostFilterTransform(size_t num_channels,
+                      size_t chunk_length,
+                      float* window,
+                      size_t fft_size);
+
+  void ProcessChunk(float* const* data, float* final_mask);
+
+ protected:
+  void ProcessAudioBlock(const complex<float>* const* input,
+                         size_t num_input_channels,
+                         size_t num_freq_bins,
+                         size_t num_output_channels,
+                         complex<float>* const* output) override;
+
+ private:
+  LappedTransform transform_;
+  const size_t num_freq_bins_;
+  float* final_mask_;
+};
+
 // Enhances sound sources coming directly in front of a uniform linear array
 // and suppresses sound sources coming from all other directions. Operates on
 // multichannel signals and produces single-channel output.
 //
 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
 // Beamforming Postprocessor" by Bastiaan Kleijn.
-class NonlinearBeamformer
-  : public Beamformer<float>,
-    public LappedTransform::Callback {
+class NonlinearBeamformer : public LappedTransform::Callback {
  public:
   static const float kHalfBeamWidthRadians;
 
   explicit NonlinearBeamformer(
       const std::vector<Point>& array_geometry,
+      size_t num_postfilter_channels = 1u,
       SphericalPointf target_direction =
           SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));
 
   // Sample rate corresponds to the lower band.
   // Needs to be called before the NonlinearBeamformer can be used.
-  void Initialize(int chunk_size_ms, int sample_rate_hz) override;
+  virtual void Initialize(int chunk_size_ms, int sample_rate_hz);
 
-  // Process one time-domain chunk of audio. The audio is expected to be split
+  // Analyzes one time-domain chunk of audio. The audio is expected to be split
   // into frequency bands inside the ChannelBuffer. The number of frames and
-  // channels must correspond to the constructor parameters. The same
-  // ChannelBuffer can be passed in as |input| and |output|.
-  void ProcessChunk(const ChannelBuffer<float>& input,
-                    ChannelBuffer<float>* output) override;
+  // channels must correspond to the constructor parameters.
+  virtual void AnalyzeChunk(const ChannelBuffer<float>& data);
 
-  void AimAt(const SphericalPointf& target_direction) override;
+  // Applies the postfilter mask to one chunk of audio. The audio is expected to
+  // be split into frequency bands inside the ChannelBuffer. The number of
+  // frames and channels must correspond to the constructor parameters.
+  virtual void PostFilter(ChannelBuffer<float>* data);
 
-  bool IsInBeam(const SphericalPointf& spherical_point) override;
+  // TODO(aluebs): Remove once the dependencies have moved to new API.
+  virtual void ProcessChunk(const ChannelBuffer<float>& input,
+                            ChannelBuffer<float>* output);
+
+  virtual void AimAt(const SphericalPointf& target_direction);
+
+  virtual bool IsInBeam(const SphericalPointf& spherical_point);
 
   // After processing each block |is_target_present_| is set to true if the
   // target signal es present and to false otherwise. This methods can be called
   // to know if the data is target signal or interference and process it
   // accordingly.
-  bool is_target_present() override { return is_target_present_; }
+  virtual bool is_target_present() { return is_target_present_; }
 
  protected:
   // Process one frequency-domain block of audio. This is where the fun
@@ -116,8 +144,8 @@
   // Compute the means needed for the above frequency correction.
   float MaskRangeMean(size_t start_bin, size_t end_bin);
 
-  // Applies both sets of masks to |input| and store in |output|.
-  void ApplyMasks(const complex_f* const* input, complex_f* const* output);
+  // Applies post-filter mask to |input| and store in |output|.
+  void ApplyPostFilter(const complex_f* input, complex_f* output);
 
   void EstimateTargetPresence();
 
@@ -126,11 +154,13 @@
 
   // Deals with the fft transform and blocking.
   size_t chunk_length_;
-  std::unique_ptr<LappedTransform> lapped_transform_;
+  std::unique_ptr<LappedTransform> process_transform_;
+  std::unique_ptr<PostFilterTransform> postfilter_transform_;
   float window_[kFftSize];
 
   // Parameters exposed to the user.
   const size_t num_input_channels_;
+  const size_t num_postfilter_channels_;
   int sample_rate_hz_;
 
   const std::vector<Point> array_geometry_;
@@ -161,7 +191,6 @@
 
   // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
   ComplexMatrixF delay_sum_masks_[kNumFreqBins];
-  ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
 
   // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
   // |num_input_channels_|.
@@ -186,6 +215,7 @@
 
   // For processing the high-frequency input signal.
   float high_pass_postfilter_mask_;
+  float old_high_pass_mask_;
 
   // True when the target signal is present.
   bool is_target_present_;

diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc
index d187552..233d406 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc

@@ -43,14 +43,14 @@
   google::ParseCommandLineFlags(&argc, &argv, true);
 
   WavReader in_file(FLAGS_i);
-  WavWriter out_file(FLAGS_o, in_file.sample_rate(), 1);
+  WavWriter out_file(FLAGS_o, in_file.sample_rate(), in_file.num_channels());
 
   const size_t num_mics = in_file.num_channels();
   const std::vector<Point> array_geometry =
       ParseArrayGeometry(FLAGS_mic_positions, num_mics);
   RTC_CHECK_EQ(array_geometry.size(), num_mics);
 
-  NonlinearBeamformer bf(array_geometry);
+  NonlinearBeamformer bf(array_geometry, array_geometry.size());
   bf.Initialize(kChunkSizeMs, in_file.sample_rate());
 
   printf("Input file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n",
@@ -58,24 +58,22 @@
   printf("Output file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n",
          FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate());
 
-  ChannelBuffer<float> in_buf(
+  ChannelBuffer<float> buf(
       rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond),
       in_file.num_channels());
-  ChannelBuffer<float> out_buf(
-      rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond),
-      out_file.num_channels());
 
-  std::vector<float> interleaved(in_buf.size());
+  std::vector<float> interleaved(buf.size());
   while (in_file.ReadSamples(interleaved.size(),
                              &interleaved[0]) == interleaved.size()) {
     FloatS16ToFloat(&interleaved[0], interleaved.size(), &interleaved[0]);
-    Deinterleave(&interleaved[0], in_buf.num_frames(),
-                 in_buf.num_channels(), in_buf.channels());
+    Deinterleave(&interleaved[0], buf.num_frames(),
+                 buf.num_channels(), buf.channels());
 
-    bf.ProcessChunk(in_buf, &out_buf);
+    bf.AnalyzeChunk(buf);
+    bf.PostFilter(&buf);
 
-    Interleave(out_buf.channels(), out_buf.num_frames(),
-               out_buf.num_channels(), &interleaved[0]);
+    Interleave(buf.channels(), buf.num_frames(),
+               buf.num_channels(), &interleaved[0]);
     FloatToFloatS16(&interleaved[0], interleaved.size(), &interleaved[0]);
     out_file.WriteSamples(&interleaved[0], interleaved.size());
   }

diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc
index fbf0ec0..1ad3ed6 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc

@@ -57,14 +57,14 @@
 
 void ProcessOneFrame(int sample_rate_hz,
                      AudioBuffer* capture_audio_buffer,
-                     Beamformer<float>* beamformer) {
+                     NonlinearBeamformer* beamformer) {
   if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
     capture_audio_buffer->SplitIntoFrequencyBands();
   }
 
-  beamformer->ProcessChunk(*capture_audio_buffer->split_data_f(),
-                           capture_audio_buffer->split_data_f());
+  beamformer->AnalyzeChunk(*capture_audio_buffer->split_data_f());
   capture_audio_buffer->set_num_channels(1);
+  beamformer->PostFilter(capture_audio_buffer->split_data_f());
 
   if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
     capture_audio_buffer->MergeFrequencyBands();
@@ -81,7 +81,7 @@
                          const std::vector<Point>& array_geometry,
                          const SphericalPointf& target_direction,
                          rtc::ArrayView<const float> output_reference) {
-  NonlinearBeamformer beamformer(array_geometry, target_direction);
+  NonlinearBeamformer beamformer(array_geometry, 1u, target_direction);
   beamformer.Initialize(AudioProcessing::kChunkSizeMs,
                         BeamformerSampleRate(sample_rate_hz));
 
@@ -159,7 +159,7 @@
   std::vector<Point> array_geometry;
   array_geometry.push_back(Point(-0.025f, 0.f, 0.f));
   array_geometry.push_back(Point(0.025f, 0.f, 0.f));
-  NonlinearBeamformer bf(array_geometry);
+  NonlinearBeamformer bf(array_geometry, 1u);
   bf.Initialize(kChunkSizeMs, kSampleRateHz);
   // The default constructor parameter sets the target angle to PI / 2.
   Verify(&bf, static_cast<float>(M_PI) / 2.f);
@@ -176,7 +176,7 @@
     array_geometry.push_back(Point(-0.1f, 0.f, 0.f));
     array_geometry.push_back(Point(0.f, 0.f, 0.f));
     array_geometry.push_back(Point(0.2f, 0.f, 0.f));
-    NonlinearBeamformer bf(array_geometry);
+    NonlinearBeamformer bf(array_geometry, 1u);
     bf.Initialize(kChunkSizeMs, kSampleRateHz);
     EXPECT_EQ(2u, bf.interf_angles_radians_.size());
     EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
@@ -197,7 +197,7 @@
     array_geometry.push_back(Point(0.2f, 0.f, 0.f));
     array_geometry.push_back(Point(0.1f, 0.f, 0.2f));
     array_geometry.push_back(Point(0.f, 0.f, -0.1f));
-    NonlinearBeamformer bf(array_geometry);
+    NonlinearBeamformer bf(array_geometry, 1u);
     bf.Initialize(kChunkSizeMs, kSampleRateHz);
     EXPECT_EQ(2u, bf.interf_angles_radians_.size());
     EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
@@ -216,7 +216,7 @@
     array_geometry.push_back(Point(0.f, 0.f, 0.f));
     array_geometry.push_back(Point(0.2f, 0.f, 0.f));
     array_geometry.push_back(Point(0.f, 0.1f, -0.2f));
-    NonlinearBeamformer bf(array_geometry);
+    NonlinearBeamformer bf(array_geometry, 1u);
     bf.Initialize(kChunkSizeMs, kSampleRateHz);
     EXPECT_EQ(2u, bf.interf_angles_radians_.size());
     EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
@@ -235,7 +235,7 @@
     array_geometry.push_back(Point(0.1f, 0.f, 0.f));
     array_geometry.push_back(Point(0.f, 0.2f, 0.f));
     array_geometry.push_back(Point(0.f, 0.f, 0.3f));
-    NonlinearBeamformer bf(array_geometry);
+    NonlinearBeamformer bf(array_geometry, 1u);
     bf.Initialize(kChunkSizeMs, kSampleRateHz);
     EXPECT_EQ(2u, bf.interf_angles_radians_.size());
     EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
@@ -262,8 +262,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo16kHz_ArrayGeometry1_TargetDirection1) {
-  const float kOutputReference[] = {0.000064f, 0.000211f, 0.000075f,
-                                    0.000064f, 0.000211f, 0.000075f};
+  const float kOutputReference[] = {-0.000077f, -0.000147f, -0.000138f,
+                                    -0.000077f, -0.000147f, -0.000138f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1),
                       TargetDirection1, kOutputReference);
@@ -271,8 +271,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo32kHz_ArrayGeometry1_TargetDirection1) {
-  const float kOutputReference[] = {0.000183f, 0.000183f, 0.000183f,
-                                    0.000183f, 0.000183f, 0.000183f};
+  const float kOutputReference[] = {-0.000061f, -0.000061f, -0.000061f,
+                                    -0.000061f, -0.000061f, -0.000061f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1),
                       TargetDirection1, kOutputReference);
@@ -280,8 +280,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo48kHz_ArrayGeometry1_TargetDirection1) {
-  const float kOutputReference[] = {0.000155f, 0.000152f, 0.000159f,
-                                    0.000155f, 0.000152f, 0.000159f};
+  const float kOutputReference[] = {0.000450f, 0.000436f, 0.000433f,
+                                    0.000450f, 0.000436f, 0.000433f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1),
                       TargetDirection1, kOutputReference);
@@ -300,8 +300,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo16kHz_ArrayGeometry1_TargetDirection2) {
-  const float kOutputReference[] = {0.001144f, -0.001026f, 0.001074f,
-                                    0.001144f, -0.001026f, 0.001074f};
+  const float kOutputReference[] = {0.000221f, -0.000249f, 0.000140f,
+                                    0.000221f, -0.000249f, 0.000140f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1),
                       TargetDirection2, kOutputReference);
@@ -309,8 +309,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo32kHz_ArrayGeometry1_TargetDirection2) {
-  const float kOutputReference[] = {0.000732f, -0.000397f, 0.000610f,
-                                    0.000732f, -0.000397f, 0.000610f};
+  const float kOutputReference[] = {0.000763f, -0.000336f, 0.000549f,
+                                    0.000763f, -0.000336f, 0.000549f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1),
                       TargetDirection2, kOutputReference);
@@ -318,8 +318,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo48kHz_ArrayGeometry1_TargetDirection2) {
-  const float kOutputReference[] = {0.000106f, -0.000464f, 0.000188f,
-                                    0.000106f, -0.000464f, 0.000188f};
+  const float kOutputReference[] = {-0.000004f, -0.000494f, 0.000255f,
+                                    -0.000004f, -0.000494f, 0.000255f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1),
                       TargetDirection2, kOutputReference);
@@ -327,8 +327,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo8kHz_ArrayGeometry2_TargetDirection2) {
-  const float kOutputReference[] = {-0.000649f, 0.000576f, -0.000148f,
-                                    -0.000649f, 0.000576f, -0.000148f};
+  const float kOutputReference[] = {-0.000914f, 0.002170f, -0.002382f,
+                                    -0.000914f, 0.002170f, -0.002382f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate8kHz, CreateArrayGeometry(2),
                       TargetDirection2, kOutputReference);
@@ -336,8 +336,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo16kHz_ArrayGeometry2_TargetDirection2) {
-  const float kOutputReference[] = {0.000808f, -0.000695f, 0.000739f,
-                                    0.000808f, -0.000695f, 0.000739f};
+  const float kOutputReference[] = {0.000179f, -0.000179f, 0.000081f,
+                                    0.000179f, -0.000179f, 0.000081f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(2),
                       TargetDirection2, kOutputReference);
@@ -345,8 +345,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo32kHz_ArrayGeometry2_TargetDirection2) {
-  const float kOutputReference[] = {0.000580f, -0.000183f, 0.000458f,
-                                    0.000580f, -0.000183f, 0.000458f};
+  const float kOutputReference[] = {0.000549f, -0.000214f, 0.000366f,
+                                    0.000549f, -0.000214f, 0.000366f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(2),
                       TargetDirection2, kOutputReference);
@@ -354,8 +354,8 @@
 
 TEST(BeamformerBitExactnessTest,
      Stereo48kHz_ArrayGeometry2_TargetDirection2) {
-  const float kOutputReference[] = {0.000075f, -0.000288f, 0.000156f,
-                                    0.000075f, -0.000288f, 0.000156f};
+  const float kOutputReference[] = {0.000019f, -0.000310f, 0.000182f,
+                                    0.000019f, -0.000310f, 0.000182f};
 
   RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(2),
                       TargetDirection2, kOutputReference);

diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index d25c252..06bfc9b 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h

@@ -31,8 +31,7 @@
 
 class AudioFrame;
 
-template<typename T>
-class Beamformer;
+class NonlinearBeamformer;
 
 class StreamConfig;
 class ProcessingConfig;
@@ -275,7 +274,7 @@
   static AudioProcessing* Create(const Config& config);
   // Only for testing.
   static AudioProcessing* Create(const Config& config,
-                                 Beamformer<float>* beamformer);
+                                 NonlinearBeamformer* beamformer);
   virtual ~AudioProcessing() {}
 
   // Initializes internal states, while retaining all user settings. This
commit	f4022ffa1a3b4597cab9d06e83dd493c246b204f	[log] [tgz]
author	Alejandro Luebs <aluebs@webrtc.org>	Sat Jul 02 00:19:09 2016
committer	Alejandro Luebs <aluebs@webrtc.org>	Sat Jul 02 00:19:32 2016
tree	ec0422d65cd449cb5e2cec29e88d0f8b58dcb0ad
parent	1aa821980d2f871336d2f323143934bb81affff6 [diff]