Integrate Intelligibility with APM
- Integrates intelligibility into audio_processing.
- Allows modification of reverse stream if intelligibility enabled.
- Makes intelligibility available in audioproc_float test.
- Adds reverse stream processing to audioproc_float.
- (removed) Makes intelligibility toggleable in real time in voe_cmd_test.
- Cleans up intelligibility construction, parameters, constants and dead code.
TBR=pbos@webrtc.org
Review URL: https://codereview.webrtc.org/1234463003
Cr-Commit-Position: refs/heads/master@{#9713}
diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index 9073ad7..6f73262 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -403,21 +403,37 @@
}
}
-void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
- assert(proc_num_frames_ == output_num_frames_);
- assert(num_channels_ == num_input_channels_);
- assert(frame->num_channels_ == num_channels_);
- assert(frame->samples_per_channel_ == proc_num_frames_);
+void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
frame->vad_activity_ = activity_;
-
if (!data_changed) {
return;
}
- Interleave(data_->ibuf()->channels(),
- proc_num_frames_,
- num_channels_,
- frame->data_);
+ assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
+ assert(frame->samples_per_channel_ == output_num_frames_);
+
+ // Resample if necessary.
+ IFChannelBuffer* data_ptr = data_.get();
+ if (proc_num_frames_ != output_num_frames_) {
+ if (!output_buffer_) {
+ output_buffer_.reset(
+ new IFChannelBuffer(output_num_frames_, num_channels_));
+ }
+ for (int i = 0; i < num_channels_; ++i) {
+ output_resamplers_[i]->Resample(
+ data_->fbuf()->channels()[i], proc_num_frames_,
+ output_buffer_->fbuf()->channels()[i], output_num_frames_);
+ }
+ data_ptr = output_buffer_.get();
+ }
+
+ if (frame->num_channels_ == num_channels_) {
+ Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
+ frame->data_);
+ } else {
+ UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
+ frame->num_channels_, frame->data_);
+ }
}
void AudioBuffer::CopyLowPassToReference() {
diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h
index 6750af0..aeb303b 100644
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@@ -109,7 +109,7 @@
void DeinterleaveFrom(AudioFrame* audioFrame);
// If |data_changed| is false, only the non-audio data members will be copied
// to |frame|.
- void InterleaveTo(AudioFrame* frame, bool data_changed) const;
+ void InterleaveTo(AudioFrame* frame, bool data_changed);
// Use for float deinterleaved data.
void CopyFrom(const float* const* data, const StreamConfig& stream_config);
@@ -152,6 +152,7 @@
rtc::scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
rtc::scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
rtc::scoped_ptr<IFChannelBuffer> input_buffer_;
+ rtc::scoped_ptr<IFChannelBuffer> output_buffer_;
rtc::scoped_ptr<ChannelBuffer<float> > process_buffer_;
ScopedVector<PushSincResampler> input_resamplers_;
ScopedVector<PushSincResampler> output_resamplers_;
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 81d6c70..c9e4ddc 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -15,8 +15,9 @@
#include "webrtc/base/checks.h"
#include "webrtc/base/platform_file.h"
-#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/audio_converter.h"
#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
extern "C" {
#include "webrtc/modules/audio_processing/aec/aec_core.h"
@@ -29,6 +30,7 @@
#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include "webrtc/modules/audio_processing/level_estimator_impl.h"
#include "webrtc/modules/audio_processing/noise_suppression_impl.h"
#include "webrtc/modules/audio_processing/processing_component.h"
@@ -184,6 +186,7 @@
#endif
api_format_({{{kSampleRate16kHz, 1, false},
{kSampleRate16kHz, 1, false},
+ {kSampleRate16kHz, 1, false},
{kSampleRate16kHz, 1, false}}}),
fwd_proc_format_(kSampleRate16kHz),
rev_proc_format_(kSampleRate16kHz, 1),
@@ -210,7 +213,8 @@
#endif
beamformer_enabled_(config.Get<Beamforming>().enabled),
beamformer_(beamformer),
- array_geometry_(config.Get<Beamforming>().array_geometry) {
+ array_geometry_(config.Get<Beamforming>().array_geometry),
+ intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@@ -282,11 +286,17 @@
ChannelLayout output_layout,
ChannelLayout reverse_layout) {
const ProcessingConfig processing_config = {
- {{input_sample_rate_hz, ChannelsFromLayout(input_layout),
+ {{input_sample_rate_hz,
+ ChannelsFromLayout(input_layout),
LayoutHasKeyboard(input_layout)},
- {output_sample_rate_hz, ChannelsFromLayout(output_layout),
+ {output_sample_rate_hz,
+ ChannelsFromLayout(output_layout),
LayoutHasKeyboard(output_layout)},
- {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout),
+ {reverse_sample_rate_hz,
+ ChannelsFromLayout(reverse_layout),
+ LayoutHasKeyboard(reverse_layout)},
+ {reverse_sample_rate_hz,
+ ChannelsFromLayout(reverse_layout),
LayoutHasKeyboard(reverse_layout)}}};
return Initialize(processing_config);
@@ -301,14 +311,28 @@
const int fwd_audio_buffer_channels =
beamformer_enabled_ ? api_format_.input_stream().num_channels()
: api_format_.output_stream().num_channels();
- if (api_format_.reverse_stream().num_channels() > 0) {
+ const int rev_audio_buffer_out_num_frames =
+ api_format_.reverse_output_stream().num_frames() == 0
+ ? rev_proc_format_.num_frames()
+ : api_format_.reverse_output_stream().num_frames();
+ if (api_format_.reverse_input_stream().num_channels() > 0) {
render_audio_.reset(new AudioBuffer(
- api_format_.reverse_stream().num_frames(),
- api_format_.reverse_stream().num_channels(),
+ api_format_.reverse_input_stream().num_frames(),
+ api_format_.reverse_input_stream().num_channels(),
rev_proc_format_.num_frames(), rev_proc_format_.num_channels(),
- rev_proc_format_.num_frames()));
+ rev_audio_buffer_out_num_frames));
+ if (rev_conversion_needed()) {
+ render_converter_ = AudioConverter::Create(
+ api_format_.reverse_input_stream().num_channels(),
+ api_format_.reverse_input_stream().num_frames(),
+ api_format_.reverse_output_stream().num_channels(),
+ api_format_.reverse_output_stream().num_frames());
+ } else {
+ render_converter_.reset(nullptr);
+ }
} else {
render_audio_.reset(nullptr);
+ render_converter_.reset(nullptr);
}
capture_audio_.reset(new AudioBuffer(
api_format_.input_stream().num_frames(),
@@ -329,6 +353,8 @@
InitializeBeamformer();
+ InitializeIntelligibility();
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
int err = WriteInitMessage();
@@ -396,7 +422,8 @@
// ...the forward stream is at 8 kHz.
rev_proc_rate = kSampleRate8kHz;
} else {
- if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) {
+ if (api_format_.reverse_input_stream().sample_rate_hz() ==
+ kSampleRate32kHz) {
// ...or the input is at 32 kHz, in which case we use the splitting
// filter rather than the resampler.
rev_proc_rate = kSampleRate32kHz;
@@ -624,6 +651,7 @@
MaybeUpdateHistograms();
AudioBuffer* ca = capture_audio_.get(); // For brevity.
+
if (use_new_agc_ && gain_control_->is_enabled()) {
agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
fwd_proc_format_.num_frames());
@@ -634,6 +662,11 @@
ca->SplitIntoFrequencyBands();
}
+ if (intelligibility_enabled_) {
+ intelligibility_enhancer_->AnalyzeCaptureAudio(
+ ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
+ }
+
if (beamformer_enabled_) {
beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
ca->set_num_channels(1);
@@ -684,50 +717,81 @@
int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
- int sample_rate_hz,
+ int rev_sample_rate_hz,
ChannelLayout layout) {
const StreamConfig reverse_config = {
- sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
+ rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
};
if (samples_per_channel != reverse_config.num_frames()) {
return kBadDataLengthError;
}
- return AnalyzeReverseStream(data, reverse_config);
+ return AnalyzeReverseStream(data, reverse_config, reverse_config);
+}
+
+int AudioProcessingImpl::ProcessReverseStream(
+ const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config,
+ float* const* dest) {
+ RETURN_ON_ERR(
+ AnalyzeReverseStream(src, reverse_input_config, reverse_output_config));
+ if (is_rev_processed()) {
+ render_audio_->CopyTo(api_format_.reverse_output_stream(), dest);
+ } else if (rev_conversion_needed()) {
+ render_converter_->Convert(src, reverse_input_config.num_samples(), dest,
+ reverse_output_config.num_samples());
+ } else {
+ CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
+ reverse_input_config.num_channels(), dest);
+ }
+
+ return kNoError;
}
int AudioProcessingImpl::AnalyzeReverseStream(
- const float* const* data,
- const StreamConfig& reverse_config) {
+ const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config) {
CriticalSectionScoped crit_scoped(crit_);
- if (data == NULL) {
+ if (src == NULL) {
return kNullPointerError;
}
- if (reverse_config.num_channels() <= 0) {
+ if (reverse_input_config.num_channels() <= 0) {
return kBadNumberChannelsError;
}
ProcessingConfig processing_config = api_format_;
- processing_config.reverse_stream() = reverse_config;
+ processing_config.reverse_input_stream() = reverse_input_config;
+ processing_config.reverse_output_stream() = reverse_output_config;
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
- assert(reverse_config.num_frames() ==
- api_format_.reverse_stream().num_frames());
+ assert(reverse_input_config.num_frames() ==
+ api_format_.reverse_input_stream().num_frames());
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
const size_t channel_size =
- sizeof(float) * api_format_.reverse_stream().num_frames();
- for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
- msg->add_channel(data[i], channel_size);
+ sizeof(float) * api_format_.reverse_input_stream().num_frames();
+ for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i)
+ msg->add_channel(src[i], channel_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
- render_audio_->CopyFrom(data, api_format_.reverse_stream());
- return AnalyzeReverseStreamLocked();
+ render_audio_->CopyFrom(src, api_format_.reverse_input_stream());
+ return ProcessReverseStreamLocked();
+}
+
+int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
+ RETURN_ON_ERR(AnalyzeReverseStream(frame));
+ if (is_rev_processed()) {
+ render_audio_->InterleaveTo(frame, true);
+ }
+
+ return kNoError;
}
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
@@ -752,12 +816,18 @@
}
ProcessingConfig processing_config = api_format_;
- processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_);
- processing_config.reverse_stream().set_num_channels(frame->num_channels_);
+ processing_config.reverse_input_stream().set_sample_rate_hz(
+ frame->sample_rate_hz_);
+ processing_config.reverse_input_stream().set_num_channels(
+ frame->num_channels_);
+ processing_config.reverse_output_stream().set_sample_rate_hz(
+ frame->sample_rate_hz_);
+ processing_config.reverse_output_stream().set_num_channels(
+ frame->num_channels_);
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
if (frame->samples_per_channel_ !=
- api_format_.reverse_stream().num_frames()) {
+ api_format_.reverse_input_stream().num_frames()) {
return kBadDataLengthError;
}
@@ -771,23 +841,32 @@
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
-
render_audio_->DeinterleaveFrom(frame);
- return AnalyzeReverseStreamLocked();
+ return ProcessReverseStreamLocked();
}
-int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
+int AudioProcessingImpl::ProcessReverseStreamLocked() {
AudioBuffer* ra = render_audio_.get(); // For brevity.
if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
ra->SplitIntoFrequencyBands();
}
+ if (intelligibility_enabled_) {
+ intelligibility_enhancer_->ProcessRenderAudio(
+ ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
+ }
+
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
if (!use_new_agc_) {
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
}
+ if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz &&
+ is_rev_processed()) {
+ ra->MergeFrequencyBands();
+ }
+
return kNoError;
}
@@ -1004,6 +1083,15 @@
return false;
}
+bool AudioProcessingImpl::is_rev_processed() const {
+ return intelligibility_enabled_ && intelligibility_enhancer_->active();
+}
+
+bool AudioProcessingImpl::rev_conversion_needed() const {
+ return (api_format_.reverse_input_stream() !=
+ api_format_.reverse_output_stream());
+}
+
void AudioProcessingImpl::InitializeExperimentalAgc() {
if (use_new_agc_) {
if (!agc_manager_.get()) {
@@ -1036,6 +1124,16 @@
}
}
+void AudioProcessingImpl::InitializeIntelligibility() {
+ if (intelligibility_enabled_) {
+ IntelligibilityEnhancer::Config config;
+ config.sample_rate_hz = split_rate_;
+ config.num_capture_channels = capture_audio_->num_channels();
+ config.num_render_channels = render_audio_->num_channels();
+ intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
+ }
+}
+
void AudioProcessingImpl::MaybeUpdateHistograms() {
static const int kMinDiffDelayMs = 60;
@@ -1134,9 +1232,12 @@
msg->set_sample_rate(api_format_.input_stream().sample_rate_hz());
msg->set_num_input_channels(api_format_.input_stream().num_channels());
msg->set_num_output_channels(api_format_.output_stream().num_channels());
- msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels());
- msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz());
+ msg->set_num_reverse_channels(
+ api_format_.reverse_input_stream().num_channels());
+ msg->set_reverse_sample_rate(
+ api_format_.reverse_input_stream().sample_rate_hz());
msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz());
+ // TODO(ekmeyerson): Add reverse output fields to event_msg_.
int err = WriteMessageToDebugFile();
if (err != kNoError) {
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index a44b5a8..a08f7b3 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -23,6 +23,7 @@
class AgcManagerDirect;
class AudioBuffer;
+class AudioConverter;
template<typename T>
class Beamformer;
@@ -39,6 +40,7 @@
class ProcessingComponent;
class TransientSuppressor;
class VoiceDetectionImpl;
+class IntelligibilityEnhancer;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
namespace audioproc {
@@ -89,12 +91,15 @@
const StreamConfig& output_config,
float* const* dest) override;
int AnalyzeReverseStream(AudioFrame* frame) override;
+ int ProcessReverseStream(AudioFrame* frame) override;
int AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
int sample_rate_hz,
ChannelLayout layout) override;
- int AnalyzeReverseStream(const float* const* data,
- const StreamConfig& reverse_config) override;
+ int ProcessReverseStream(const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config,
+ float* const* dest) override;
int set_stream_delay_ms(int delay) override;
int stream_delay_ms() const override;
bool was_stream_delay_set() const override;
@@ -124,16 +129,23 @@
EXCLUSIVE_LOCKS_REQUIRED(crit_);
int MaybeInitializeLocked(const ProcessingConfig& config)
EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ // TODO(ekm): Remove once all clients updated to new interface.
+ int AnalyzeReverseStream(const float* const* src,
+ const StreamConfig& input_config,
+ const StreamConfig& output_config);
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
- int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
bool is_data_processed() const;
bool output_copy_needed(bool is_data_processed) const;
bool synthesis_needed(bool is_data_processed) const;
bool analysis_needed(bool is_data_processed) const;
+ bool is_rev_processed() const;
+ bool rev_conversion_needed() const;
void InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void InitializeBeamformer() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ void InitializeIntelligibility() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_);
EchoCancellationImpl* echo_cancellation_;
@@ -149,6 +161,7 @@
CriticalSectionWrapper* crit_;
rtc::scoped_ptr<AudioBuffer> render_audio_;
rtc::scoped_ptr<AudioBuffer> capture_audio_;
+ rtc::scoped_ptr<AudioConverter> render_converter_;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
// TODO(andrew): make this more graceful. Ideally we would split this stuff
// out into a separate class with an "enabled" and "disabled" implementation.
@@ -191,6 +204,9 @@
const bool beamformer_enabled_;
rtc::scoped_ptr<Beamformer<float>> beamformer_;
const std::vector<Point> array_geometry_;
+
+ bool intelligibility_enabled_;
+ rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_;
};
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 0854844..fd91bfa 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -116,6 +116,18 @@
const std::vector<Point> array_geometry;
};
+// Use to enable intelligibility enhancer in audio processing. Must be provided
+// though the constructor. It will have no impact if used with
+// AudioProcessing::SetExtraOptions().
+//
+// Note: If enabled and the reverse stream has more than one output channel,
+// the reverse stream will become an upmixed mono signal.
+struct Intelligibility {
+ Intelligibility() : enabled(false) {}
+ explicit Intelligibility(bool enabled) : enabled(enabled) {}
+ bool enabled;
+};
+
static const int kAudioProcMaxNativeSampleRateHz = 32000;
// The Audio Processing Module (APM) provides a collection of voice processing
@@ -333,21 +345,28 @@
// |input_sample_rate_hz()|
//
// TODO(ajm): add const to input; requires an implementation fix.
+ // DEPRECATED: Use |ProcessReverseStream| instead.
+ // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
+ // Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility
+ // is enabled.
+ virtual int ProcessReverseStream(AudioFrame* frame) = 0;
+
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of |data| points to a channel buffer, arranged according to |layout|.
- //
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
- int sample_rate_hz,
+ int rev_sample_rate_hz,
ChannelLayout layout) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// |data| points to a channel buffer, arranged according to |reverse_config|.
- virtual int AnalyzeReverseStream(const float* const* data,
- const StreamConfig& reverse_config) = 0;
+ virtual int ProcessReverseStream(const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config,
+ float* const* dest) = 0;
// This must be called if and only if echo processing is enabled.
//
@@ -492,6 +511,7 @@
bool has_keyboard() const { return has_keyboard_; }
int num_frames() const { return num_frames_; }
+ int num_samples() const { return num_channels_ * num_frames_; }
bool operator==(const StreamConfig& other) const {
return sample_rate_hz_ == other.sample_rate_hz_ &&
@@ -517,7 +537,8 @@
enum StreamName {
kInputStream,
kOutputStream,
- kReverseStream,
+ kReverseInputStream,
+ kReverseOutputStream,
kNumStreamNames,
};
@@ -527,13 +548,21 @@
const StreamConfig& output_stream() const {
return streams[StreamName::kOutputStream];
}
- const StreamConfig& reverse_stream() const {
- return streams[StreamName::kReverseStream];
+ const StreamConfig& reverse_input_stream() const {
+ return streams[StreamName::kReverseInputStream];
+ }
+ const StreamConfig& reverse_output_stream() const {
+ return streams[StreamName::kReverseOutputStream];
}
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
- StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }
+ StreamConfig& reverse_input_stream() {
+ return streams[StreamName::kReverseInputStream];
+ }
+ StreamConfig& reverse_output_stream() {
+ return streams[StreamName::kReverseOutputStream];
+ }
bool operator==(const ProcessingConfig& other) const {
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
diff --git a/webrtc/modules/audio_processing/include/mock_audio_processing.h b/webrtc/modules/audio_processing/include/mock_audio_processing.h
index 8007a86..f0d9c32 100644
--- a/webrtc/modules/audio_processing/include/mock_audio_processing.h
+++ b/webrtc/modules/audio_processing/include/mock_audio_processing.h
@@ -227,11 +227,15 @@
float* const* dest));
MOCK_METHOD1(AnalyzeReverseStream,
int(AudioFrame* frame));
+ MOCK_METHOD1(ProcessReverseStream, int(AudioFrame* frame));
MOCK_METHOD4(AnalyzeReverseStream,
int(const float* const* data, int frames, int sample_rate_hz,
ChannelLayout input_layout));
- MOCK_METHOD2(AnalyzeReverseStream,
- int(const float* const* data, const StreamConfig& reverse_config));
+ MOCK_METHOD4(ProcessReverseStream,
+ int(const float* const* src,
+ const StreamConfig& input_config,
+ const StreamConfig& output_config,
+ float* const* dest));
MOCK_METHOD1(set_stream_delay_ms,
int(int delay));
MOCK_CONST_METHOD0(stream_delay_ms,
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
index dbb7e63..8eccde4 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -19,18 +19,18 @@
#include <math.h>
#include <stdlib.h>
-
#include <algorithm>
#include <numeric>
#include "webrtc/base/checks.h"
-#include "webrtc/common_audio/vad/include/webrtc_vad.h"
+#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/window_generator.h"
namespace webrtc {
namespace {
+const int kErbResolution = 2;
const int kWindowSizeMs = 2;
const int kChunkSizeMs = 10; // Size provided by APM.
const float kClipFreq = 200.0f;
@@ -64,124 +64,93 @@
}
}
-IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution,
- int sample_rate_hz,
- int channels,
- int cv_type,
- float cv_alpha,
- int cv_win,
- int analysis_rate,
- int variance_rate,
- float gain_limit)
+IntelligibilityEnhancer::IntelligibilityEnhancer()
+ : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
+}
+
+IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
: freqs_(RealFourier::ComplexLength(
- RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
+ RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
window_size_(1 << RealFourier::FftOrder(freqs_)),
- chunk_length_(sample_rate_hz * kChunkSizeMs / 1000),
- bank_size_(GetBankSize(sample_rate_hz, erb_resolution)),
- sample_rate_hz_(sample_rate_hz),
- erb_resolution_(erb_resolution),
- channels_(channels),
- analysis_rate_(analysis_rate),
- variance_rate_(variance_rate),
+ chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000),
+ bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
+ sample_rate_hz_(config.sample_rate_hz),
+ erb_resolution_(kErbResolution),
+ num_capture_channels_(config.num_capture_channels),
+ num_render_channels_(config.num_render_channels),
+ analysis_rate_(config.analysis_rate),
+ active_(true),
clear_variance_(freqs_,
- static_cast<VarianceType>(cv_type),
- cv_win,
- cv_alpha),
- noise_variance_(freqs_, VarianceType::kStepInfinite, 475, 0.01f),
+ config.var_type,
+ config.var_window_size,
+ config.var_decay_rate),
+ noise_variance_(freqs_,
+ config.var_type,
+ config.var_window_size,
+ config.var_decay_rate),
filtered_clear_var_(new float[bank_size_]),
filtered_noise_var_(new float[bank_size_]),
filter_bank_(bank_size_),
center_freqs_(new float[bank_size_]),
rho_(new float[bank_size_]),
gains_eq_(new float[bank_size_]),
- gain_applier_(freqs_, gain_limit),
- temp_out_buffer_(nullptr),
- input_audio_(new float* [channels]),
+ gain_applier_(freqs_, config.gain_change_limit),
+ temp_render_out_buffer_(chunk_length_, num_render_channels_),
+ temp_capture_out_buffer_(chunk_length_, num_capture_channels_),
kbd_window_(new float[window_size_]),
render_callback_(this, AudioSource::kRenderStream),
capture_callback_(this, AudioSource::kCaptureStream),
block_count_(0),
- analysis_step_(0),
- vad_high_(WebRtcVad_Create()),
- vad_low_(WebRtcVad_Create()),
- vad_tmp_buffer_(new int16_t[chunk_length_]) {
- DCHECK_LE(kConfigRho, 1.0f);
+ analysis_step_(0) {
+ DCHECK_LE(config.rho, 1.0f);
CreateErbBank();
- WebRtcVad_Init(vad_high_);
- WebRtcVad_set_mode(vad_high_, 0); // High likelihood of speech.
- WebRtcVad_Init(vad_low_);
- WebRtcVad_set_mode(vad_low_, 3); // Low likelihood of speech.
-
- temp_out_buffer_ = static_cast<float**>(
- malloc(sizeof(*temp_out_buffer_) * channels_ +
- sizeof(**temp_out_buffer_) * chunk_length_ * channels_));
- for (int i = 0; i < channels_; ++i) {
- temp_out_buffer_[i] =
- reinterpret_cast<float*>(temp_out_buffer_ + channels_) +
- chunk_length_ * i;
- }
-
// Assumes all rho equal.
for (int i = 0; i < bank_size_; ++i) {
- rho_[i] = kConfigRho * kConfigRho;
+ rho_[i] = config.rho * config.rho;
}
float freqs_khz = kClipFreq / 1000.0f;
int erb_index = static_cast<int>(ceilf(
11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
- start_freq_ = std::max(1, erb_index * erb_resolution);
+ start_freq_ = max(1, erb_index * erb_resolution_);
WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
kbd_window_.get());
render_mangler_.reset(new LappedTransform(
- channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
- window_size_ / 2, &render_callback_));
+ num_render_channels_, num_render_channels_, chunk_length_,
+ kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
capture_mangler_.reset(new LappedTransform(
- channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
- window_size_ / 2, &capture_callback_));
+ num_capture_channels_, num_capture_channels_, chunk_length_,
+ kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
}
-IntelligibilityEnhancer::~IntelligibilityEnhancer() {
- WebRtcVad_Free(vad_low_);
- WebRtcVad_Free(vad_high_);
- free(temp_out_buffer_);
-}
+void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
+ int sample_rate_hz,
+ int num_channels) {
+ CHECK_EQ(sample_rate_hz_, sample_rate_hz);
+ CHECK_EQ(num_render_channels_, num_channels);
-void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) {
- for (int i = 0; i < chunk_length_; ++i) {
- vad_tmp_buffer_[i] = (int16_t)audio[0][i];
+ if (active_) {
+ render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
}
- has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_,
- vad_tmp_buffer_.get(), chunk_length_) == 1;
- // Process and enhance chunk of |audio|
- render_mangler_->ProcessChunk(audio, temp_out_buffer_);
-
- for (int i = 0; i < channels_; ++i) {
- memcpy(audio[i], temp_out_buffer_[i],
- chunk_length_ * sizeof(**temp_out_buffer_));
- }
-}
-
-void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) {
- for (int i = 0; i < chunk_length_; ++i) {
- vad_tmp_buffer_[i] = (int16_t)audio[0][i];
- }
- // TODO(bercic): The VAD was always detecting voice in the noise stream,
- // no matter what the aggressiveness, so it was temporarily disabled here.
-
- #if 0
- if (WebRtcVad_Process(vad_high_, sample_rate_hz_, vad_tmp_buffer_.get(),
- chunk_length_) == 1) {
- printf("capture HAS speech\n");
- return;
+ if (active_) {
+ for (int i = 0; i < num_render_channels_; ++i) {
+ memcpy(audio[i], temp_render_out_buffer_.channels()[i],
+ chunk_length_ * sizeof(**audio));
}
- printf("capture NO speech\n");
- #endif
+ }
+}
- capture_mangler_->ProcessChunk(audio, temp_out_buffer_);
+void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio,
+ int sample_rate_hz,
+ int num_channels) {
+ CHECK_EQ(sample_rate_hz_, sample_rate_hz);
+ CHECK_EQ(num_capture_channels_, num_channels);
+
+ capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels());
}
void IntelligibilityEnhancer::DispatchAudio(
@@ -206,28 +175,21 @@
return;
}
- // For now, always assumes enhancement is necessary.
- // TODO(ekmeyerson): Change to only enhance if necessary,
- // based on experiments with different cutoffs.
- if (has_voice_low_ || true) {
+ // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
+ if (true) {
clear_variance_.Step(in_block, false);
- const float power_target = std::accumulate(
- clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.0f);
-
if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
+ const float power_target = std::accumulate(
+ clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f);
AnalyzeClearBlock(power_target);
++analysis_step_;
- if (analysis_step_ == variance_rate_) {
- analysis_step_ = 0;
- clear_variance_.Clear();
- noise_variance_.Clear();
- }
}
++block_count_;
}
- /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */
- gain_applier_.Apply(in_block, out_block);
+ if (active_) {
+ gain_applier_.Apply(in_block, out_block);
+ }
}
void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
@@ -406,4 +368,8 @@
return ret;
}
+bool IntelligibilityEnhancer::active() const {
+ return active_;
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
index 7f18be8..1a2ef23 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@@ -20,11 +20,9 @@
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/lapped_transform.h"
+#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
-struct WebRtcVadInst;
-typedef struct WebRtcVadInst VadInst;
-
namespace webrtc {
// Speech intelligibility enhancement module. Reads render and capture
@@ -33,32 +31,45 @@
// Note: assumes speech and noise streams are already separated.
class IntelligibilityEnhancer {
public:
- // Construct a new instance with the given filter bank resolution,
- // sampling rate, number of channels and analysis rates.
- // |analysis_rate| sets the number of input blocks (containing speech!)
- // to elapse before a new gain computation is made. |variance_rate| specifies
- // the number of gain recomputations after which the variances are reset.
- // |cv_*| are parameters for the VarianceArray constructor for the
- // clear speech stream.
- // TODO(bercic): the |cv_*|, |*_rate| and |gain_limit| parameters should
- // probably go away once fine tuning is done. They override the internal
- // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).
- IntelligibilityEnhancer(int erb_resolution,
- int sample_rate_hz,
- int channels,
- int cv_type,
- float cv_alpha,
- int cv_win,
- int analysis_rate,
- int variance_rate,
- float gain_limit);
- ~IntelligibilityEnhancer();
+ struct Config {
+ // |var_*| are parameters for the VarianceArray constructor for the
+ // clear speech stream.
+ // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should
+ // probably go away once fine tuning is done.
+ Config()
+ : sample_rate_hz(16000),
+ num_capture_channels(1),
+ num_render_channels(1),
+ var_type(intelligibility::VarianceArray::kStepDecaying),
+ var_decay_rate(0.9f),
+ var_window_size(10),
+ analysis_rate(800),
+ gain_change_limit(0.1f),
+ rho(0.02f) {}
+ int sample_rate_hz;
+ int num_capture_channels;
+ int num_render_channels;
+ intelligibility::VarianceArray::StepType var_type;
+ float var_decay_rate;
+ int var_window_size;
+ int analysis_rate;
+ float gain_change_limit;
+ float rho;
+ };
+
+ explicit IntelligibilityEnhancer(const Config& config);
+ IntelligibilityEnhancer(); // Initialize with default config.
// Reads and processes chunk of noise stream in time domain.
- void ProcessCaptureAudio(float* const* audio);
+ void AnalyzeCaptureAudio(float* const* audio,
+ int sample_rate_hz,
+ int num_channels);
// Reads chunk of speech in time domain and updates with modified signal.
- void ProcessRenderAudio(float* const* audio);
+ void ProcessRenderAudio(float* const* audio,
+ int sample_rate_hz,
+ int num_channels);
+ bool active() const;
private:
enum AudioSource {
@@ -133,9 +144,12 @@
const int bank_size_; // Num ERB filters.
const int sample_rate_hz_;
const int erb_resolution_;
- const int channels_; // Num channels.
+ const int num_capture_channels_;
+ const int num_render_channels_;
const int analysis_rate_; // Num blocks before gains recalculated.
- const int variance_rate_; // Num recalculations before history is cleared.
+
+ const bool active_; // Whether render gains are being updated.
+ // TODO(ekm): Add logic for updating |active_|.
intelligibility::VarianceArray clear_variance_;
intelligibility::VarianceArray noise_variance_;
@@ -149,12 +163,11 @@
rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.
intelligibility::GainApplier gain_applier_;
- // Destination buffer used to reassemble blocked chunks before overwriting
+ // Destination buffers used to reassemble blocked chunks before overwriting
// the original input array with modifications.
- // TODO(ekmeyerson): Switch to using ChannelBuffer.
- float** temp_out_buffer_;
+ ChannelBuffer<float> temp_render_out_buffer_;
+ ChannelBuffer<float> temp_capture_out_buffer_;
- rtc::scoped_ptr<float* []> input_audio_;
rtc::scoped_ptr<float[]> kbd_window_;
TransformCallback render_callback_;
TransformCallback capture_callback_;
@@ -162,14 +175,6 @@
rtc::scoped_ptr<LappedTransform> capture_mangler_;
int block_count_;
int analysis_step_;
-
- // TODO(bercic): Quick stopgap measure for voice detection in the clear
- // and noise streams.
- // Note: VAD currently does not affect anything in IntelligibilityEnhancer.
- VadInst* vad_high_;
- VadInst* vad_low_;
- rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;
- bool has_voice_low_; // Whether voice detected in speech stream.
};
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
index 490db2c..cb0085d 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
@@ -19,6 +19,7 @@
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/arraysize.h"
+#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
@@ -74,15 +75,9 @@
// Enhancer initialization parameters.
const int kSamples = 2000;
-const int kErbResolution = 2;
const int kSampleRate = 1000;
-const int kFragmentSize = kSampleRate / 100;
const int kNumChannels = 1;
-const float kDecayRate = 0.9f;
-const int kWindowSize = 800;
-const int kAnalyzeRate = 800;
-const int kVarianceRate = 2;
-const float kGainLimit = 0.1f;
+const int kFragmentSize = kSampleRate / 100;
} // namespace
@@ -92,28 +87,20 @@
class IntelligibilityEnhancerTest : public ::testing::Test {
protected:
IntelligibilityEnhancerTest()
- : enh_(kErbResolution,
- kSampleRate,
- kNumChannels,
- VarianceArray::kStepInfinite,
- kDecayRate,
- kWindowSize,
- kAnalyzeRate,
- kVarianceRate,
- kGainLimit),
- clear_data_(kSamples),
- noise_data_(kSamples),
- orig_data_(kSamples) {}
+ : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
+ config_.sample_rate_hz = kSampleRate;
+ enh_.reset(new IntelligibilityEnhancer(config_));
+ }
bool CheckUpdate(VarianceArray::StepType step_type) {
- IntelligibilityEnhancer enh(kErbResolution, kSampleRate, kNumChannels,
- step_type, kDecayRate, kWindowSize,
- kAnalyzeRate, kVarianceRate, kGainLimit);
+ config_.sample_rate_hz = kSampleRate;
+ config_.var_type = step_type;
+ enh_.reset(new IntelligibilityEnhancer(config_));
float* clear_cursor = &clear_data_[0];
float* noise_cursor = &noise_data_[0];
for (int i = 0; i < kSamples; i += kFragmentSize) {
- enh.ProcessCaptureAudio(&noise_cursor);
- enh.ProcessRenderAudio(&clear_cursor);
+ enh_->AnalyzeCaptureAudio(&noise_cursor, kSampleRate, kNumChannels);
+ enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);
clear_cursor += kFragmentSize;
noise_cursor += kFragmentSize;
}
@@ -125,7 +112,8 @@
return false;
}
- IntelligibilityEnhancer enh_;
+ IntelligibilityEnhancer::Config config_;
+ rtc::scoped_ptr<IntelligibilityEnhancer> enh_;
vector<float> clear_data_;
vector<float> noise_data_;
vector<float> orig_data_;
@@ -161,12 +149,12 @@
// Tests ERB bank creation, comparing against matlab output.
TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
- ASSERT_EQ(static_cast<int>(arraysize(kTestCenterFreqs)), enh_.bank_size_);
- for (int i = 0; i < enh_.bank_size_; ++i) {
- EXPECT_NEAR(kTestCenterFreqs[i], enh_.center_freqs_[i], kMaxTestError);
- ASSERT_EQ(static_cast<int>(arraysize(kTestFilterBank[0])), enh_.freqs_);
- for (int j = 0; j < enh_.freqs_; ++j) {
- EXPECT_NEAR(kTestFilterBank[i][j], enh_.filter_bank_[i][j],
+ ASSERT_EQ(static_cast<int>(arraysize(kTestCenterFreqs)), enh_->bank_size_);
+ for (int i = 0; i < enh_->bank_size_; ++i) {
+ EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
+ ASSERT_EQ(static_cast<int>(arraysize(kTestFilterBank[0])), enh_->freqs_);
+ for (int j = 0; j < enh_->freqs_; ++j) {
+ EXPECT_NEAR(kTestFilterBank[i][j], enh_->filter_bank_[i][j],
kMaxTestError);
}
}
@@ -175,29 +163,29 @@
// Tests analytic solution for optimal gains, comparing
// against matlab output.
TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
- ASSERT_EQ(kTestStartFreq, enh_.start_freq_);
- vector<float> sols(enh_.bank_size_);
+ ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
+ vector<float> sols(enh_->bank_size_);
float lambda = -0.001f;
- for (int i = 0; i < enh_.bank_size_; i++) {
- enh_.filtered_clear_var_[i] = 0.0f;
- enh_.filtered_noise_var_[i] = 0.0f;
- enh_.rho_[i] = 0.02f;
+ for (int i = 0; i < enh_->bank_size_; i++) {
+ enh_->filtered_clear_var_[i] = 0.0f;
+ enh_->filtered_noise_var_[i] = 0.0f;
+ enh_->rho_[i] = 0.02f;
}
- enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
- for (int i = 0; i < enh_.bank_size_; i++) {
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
+ for (int i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);
}
- for (int i = 0; i < enh_.bank_size_; i++) {
- enh_.filtered_clear_var_[i] = static_cast<float>(i + 1);
- enh_.filtered_noise_var_[i] = static_cast<float>(enh_.bank_size_ - i);
+ for (int i = 0; i < enh_->bank_size_; i++) {
+ enh_->filtered_clear_var_[i] = static_cast<float>(i + 1);
+ enh_->filtered_noise_var_[i] = static_cast<float>(enh_->bank_size_ - i);
}
- enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
- for (int i = 0; i < enh_.bank_size_; i++) {
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
+ for (int i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
}
lambda = -1.0;
- enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
- for (int i = 0; i < enh_.bank_size_; i++) {
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
+ for (int i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);
}
}
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
index 00d9b53..2c2743f 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
@@ -51,33 +51,33 @@
static const int kWindowBlockSize = 10;
-VarianceArray::VarianceArray(int freqs,
+VarianceArray::VarianceArray(int num_freqs,
StepType type,
int window_size,
float decay)
- : running_mean_(new complex<float>[freqs]()),
- running_mean_sq_(new complex<float>[freqs]()),
- sub_running_mean_(new complex<float>[freqs]()),
- sub_running_mean_sq_(new complex<float>[freqs]()),
- variance_(new float[freqs]()),
- conj_sum_(new float[freqs]()),
- freqs_(freqs),
+ : running_mean_(new complex<float>[num_freqs]()),
+ running_mean_sq_(new complex<float>[num_freqs]()),
+ sub_running_mean_(new complex<float>[num_freqs]()),
+ sub_running_mean_sq_(new complex<float>[num_freqs]()),
+ variance_(new float[num_freqs]()),
+ conj_sum_(new float[num_freqs]()),
+ num_freqs_(num_freqs),
window_size_(window_size),
decay_(decay),
history_cursor_(0),
count_(0),
array_mean_(0.0f),
buffer_full_(false) {
- history_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());
- for (int i = 0; i < freqs_; ++i) {
+ history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+ for (int i = 0; i < num_freqs_; ++i) {
history_[i].reset(new complex<float>[window_size_]());
}
- subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());
- for (int i = 0; i < freqs_; ++i) {
+ subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+ for (int i = 0; i < num_freqs_; ++i) {
subhistory_[i].reset(new complex<float>[window_size_]());
}
- subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());
- for (int i = 0; i < freqs_; ++i) {
+ subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+ for (int i = 0; i < num_freqs_; ++i) {
subhistory_sq_[i].reset(new complex<float>[window_size_]());
}
switch (type) {
@@ -104,7 +104,7 @@
void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {
array_mean_ = 0.0f;
++count_;
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
complex<float> sample = data[i];
if (!skip_fudge) {
sample = zerofudge(sample);
@@ -132,7 +132,7 @@
void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) {
array_mean_ = 0.0f;
++count_;
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
complex<float> sample = data[i];
sample = zerofudge(sample);
@@ -159,7 +159,7 @@
void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
int num = min(count_ + 1, window_size_);
array_mean_ = 0.0f;
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
complex<float> mean;
float conj_sum = 0.0f;
@@ -192,7 +192,7 @@
// are recomputed from scratch at each of these transitions.
void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
int blocks = min(window_size_, history_cursor_ + 1);
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
AddToMean(data[i] * std::conj(data[i]), count_ + 1,
&sub_running_mean_sq_[i]);
@@ -228,7 +228,7 @@
// TODO(ekmeyerson) To mitigate potential divergence, add counter so that
// after every so often sums are computed scratch by summing over all
// elements instead of subtracting oldest and adding newest.
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
sub_running_mean_[i] += data[i];
sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
}
@@ -239,7 +239,7 @@
if (count_ >= kWindowBlockSize) {
count_ = 0;
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
running_mean_[i] -= subhistory_[i][history_cursor_];
running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
@@ -268,10 +268,11 @@
}
void VarianceArray::Clear() {
- memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * freqs_);
- memset(running_mean_sq_.get(), 0, sizeof(*running_mean_sq_.get()) * freqs_);
- memset(variance_.get(), 0, sizeof(*variance_.get()) * freqs_);
- memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * freqs_);
+ memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_);
+ memset(running_mean_sq_.get(), 0,
+ sizeof(*running_mean_sq_.get()) * num_freqs_);
+ memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_);
+ memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_);
history_cursor_ = 0;
count_ = 0;
array_mean_ = 0.0f;
@@ -279,14 +280,14 @@
void VarianceArray::ApplyScale(float scale) {
array_mean_ = 0.0f;
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
variance_[i] *= scale * scale;
array_mean_ += (variance_[i] - array_mean_) / (i + 1);
}
}
GainApplier::GainApplier(int freqs, float change_limit)
- : freqs_(freqs),
+ : num_freqs_(freqs),
change_limit_(change_limit),
target_(new float[freqs]()),
current_(new float[freqs]()) {
@@ -298,7 +299,7 @@
void GainApplier::Apply(const complex<float>* in_block,
complex<float>* out_block) {
- for (int i = 0; i < freqs_; ++i) {
+ for (int i = 0; i < num_freqs_; ++i) {
float factor = sqrtf(fabsf(current_[i]));
if (!std::isnormal(factor)) {
factor = 1.0f;
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
index 5013ef0..fa0e974 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
@@ -119,7 +119,7 @@
rtc::scoped_ptr<float[]> variance_;
rtc::scoped_ptr<float[]> conj_sum_;
- const int freqs_;
+ const int num_freqs_;
const int window_size_;
const float decay_;
int history_cursor_;
@@ -145,7 +145,7 @@
float* target() const { return target_.get(); }
private:
- const int freqs_;
+ const int num_freqs_;
const float change_limit_;
rtc::scoped_ptr<float[]> target_;
rtc::scoped_ptr<float[]> current_;
diff --git a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
index cef41e9..6b6bfa0 100644
--- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
+++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
@@ -31,6 +31,7 @@
#include "webrtc/test/testsupport/fileutils.h"
using std::complex;
+using webrtc::intelligibility::VarianceArray;
namespace webrtc {
namespace {
@@ -40,7 +41,7 @@
}
DEFINE_int32(clear_type,
- webrtc::intelligibility::VarianceArray::kStepInfinite,
+ webrtc::intelligibility::VarianceArray::kStepDecaying,
"Variance algorithm for clear data.");
DEFINE_double(clear_alpha, 0.9, "Variance decay factor for clear data.");
DEFINE_int32(clear_window,
@@ -67,8 +68,6 @@
"Enhanced output. Use '-' to "
"play through aplay immediately.");
-// Constant IntelligibilityEnhancer constructor parameters.
-const int kErbResolution = 2;
const int kNumChannels = 1;
// void function for gtest
@@ -108,11 +107,14 @@
noise_file.ReadSamples(samples, &noise_fpcm[0]);
// Run intelligibility enhancement.
-
- IntelligibilityEnhancer enh(
- kErbResolution, FLAGS_sample_rate, kNumChannels, FLAGS_clear_type,
- static_cast<float>(FLAGS_clear_alpha), FLAGS_clear_window, FLAGS_ana_rate,
- FLAGS_var_rate, FLAGS_gain_limit);
+ IntelligibilityEnhancer::Config config;
+ config.sample_rate_hz = FLAGS_sample_rate;
+ config.var_type = static_cast<VarianceArray::StepType>(FLAGS_clear_type);
+ config.var_decay_rate = static_cast<float>(FLAGS_clear_alpha);
+ config.var_window_size = FLAGS_clear_window;
+ config.analysis_rate = FLAGS_ana_rate;
+ config.gain_change_limit = FLAGS_gain_limit;
+ IntelligibilityEnhancer enh(config);
// Slice the input into smaller chunks, as the APM would do, and feed them
// through the enhancer.
@@ -120,8 +122,8 @@
float* noise_cursor = &noise_fpcm[0];
for (size_t i = 0; i < samples; i += fragment_size) {
- enh.ProcessCaptureAudio(&noise_cursor);
- enh.ProcessRenderAudio(&clear_cursor);
+ enh.AnalyzeCaptureAudio(&noise_cursor, FLAGS_sample_rate, kNumChannels);
+ enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels);
clear_cursor += fragment_size;
noise_cursor += fragment_size;
}
diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
index 3030182..8384c36 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@@ -40,6 +40,9 @@
namespace webrtc {
namespace {
+// TODO(ekmeyerson): Switch to using StreamConfig and ProcessingConfig where
+// applicable.
+
// TODO(bjornv): This is not feasible until the functionality has been
// re-implemented; see comment at the bottom of this file. For now, the user has
// to hard code the |write_ref_data| value.
@@ -62,6 +65,8 @@
const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) /
sizeof(*kProcessSampleRates);
+enum StreamDirection { kForward = 0, kReverse };
+
void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
ChannelBuffer<int16_t> cb_int(cb->num_frames(),
cb->num_channels());
@@ -252,13 +257,16 @@
std::string OutputFilePath(std::string name,
int input_rate,
int output_rate,
- int reverse_rate,
+ int reverse_input_rate,
+ int reverse_output_rate,
int num_input_channels,
int num_output_channels,
- int num_reverse_channels) {
+ int num_reverse_input_channels,
+ int num_reverse_output_channels,
+ StreamDirection file_direction) {
std::ostringstream ss;
- ss << name << "_i" << num_input_channels << "_" << input_rate / 1000
- << "_r" << num_reverse_channels << "_" << reverse_rate / 1000 << "_";
+ ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir"
+ << num_reverse_input_channels << "_" << reverse_input_rate / 1000 << "_";
if (num_output_channels == 1) {
ss << "mono";
} else if (num_output_channels == 2) {
@@ -266,7 +274,16 @@
} else {
assert(false);
}
- ss << output_rate / 1000 << "_pcm";
+ ss << output_rate / 1000;
+ if (num_reverse_output_channels == 1) {
+ ss << "_rmono";
+ } else if (num_reverse_output_channels == 2) {
+ ss << "_rstereo";
+ } else {
+ assert(false);
+ }
+ ss << reverse_output_rate / 1000;
+ ss << "_d" << file_direction << "_pcm";
std::string filename = ss.str();
if (temp_filenames[filename].empty())
@@ -340,9 +357,9 @@
void Init(int sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
- int num_reverse_channels,
int num_input_channels,
int num_output_channels,
+ int num_reverse_channels,
bool open_output_file);
void Init(AudioProcessing* ap);
void EnableAllComponents();
@@ -458,6 +475,7 @@
ap->Initialize(
{{{frame_->sample_rate_hz_, frame_->num_channels_},
{output_sample_rate_hz_, num_output_channels_},
+ {revframe_->sample_rate_hz_, revframe_->num_channels_},
{revframe_->sample_rate_hz_, revframe_->num_channels_}}}));
}
@@ -496,13 +514,10 @@
if (out_file_) {
ASSERT_EQ(0, fclose(out_file_));
}
- filename = OutputFilePath("out",
- sample_rate_hz,
- output_sample_rate_hz,
- reverse_sample_rate_hz,
- num_input_channels,
- num_output_channels,
- num_reverse_channels);
+ filename = OutputFilePath(
+ "out", sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz,
+ reverse_sample_rate_hz, num_input_channels, num_output_channels,
+ num_reverse_channels, num_reverse_channels, kForward);
out_file_ = fopen(filename.c_str(), "wb");
ASSERT_TRUE(out_file_ != NULL) << "Could not open file " <<
filename << "\n";
@@ -819,13 +834,16 @@
int num_rev_channels,
AudioProcessing::Error expected_return) {
const ProcessingConfig processing_config = {
- {{ frame_->sample_rate_hz_, apm_->num_input_channels() },
- { output_sample_rate_hz_, apm_->num_output_channels() },
- { frame_->sample_rate_hz_, num_rev_channels }}};
+ {{frame_->sample_rate_hz_, apm_->num_input_channels()},
+ {output_sample_rate_hz_, apm_->num_output_channels()},
+ {frame_->sample_rate_hz_, num_rev_channels},
+ {frame_->sample_rate_hz_, num_rev_channels}}};
- EXPECT_EQ(expected_return,
- apm_->AnalyzeReverseStream(float_cb_->channels(),
- processing_config.reverse_stream()));
+ EXPECT_EQ(
+ expected_return,
+ apm_->ProcessReverseStream(
+ float_cb_->channels(), processing_config.reverse_input_stream(),
+ processing_config.reverse_output_stream(), float_cb_->channels()));
}
TEST_F(ApmTest, ChannelsInt16Interface) {
@@ -1531,6 +1549,8 @@
for (int j = 0; j < 1000; j++) {
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
+ EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(frame_));
+ EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
}
}
}
@@ -1555,6 +1575,19 @@
for (size_t i = 0; i < kSamples; ++i) {
EXPECT_EQ(src[i], dest[i]);
}
+
+ // Same for ProcessReverseStream.
+ float rev_dest[kSamples] = {};
+ auto rev_dest_channels = &rev_dest[0];
+
+ StreamConfig input_stream = {sample_rate, 1};
+ StreamConfig output_stream = {sample_rate, 1};
+ EXPECT_NOERR(apm_->ProcessReverseStream(&src_channels, input_stream,
+ output_stream, &rev_dest_channels));
+
+ for (size_t i = 0; i < kSamples; ++i) {
+ EXPECT_EQ(src[i], rev_dest[i]);
+ }
}
TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
@@ -2299,15 +2332,18 @@
// Due to the resampling distortion, we don't expect identical results, but
// enforce SNR thresholds which vary depending on the format. 0 is a special
// case SNR which corresponds to inf, or zero error.
-typedef std::tr1::tuple<int, int, int, double> AudioProcessingTestData;
+typedef std::tr1::tuple<int, int, int, int, double, double>
+ AudioProcessingTestData;
class AudioProcessingTest
: public testing::TestWithParam<AudioProcessingTestData> {
public:
AudioProcessingTest()
: input_rate_(std::tr1::get<0>(GetParam())),
output_rate_(std::tr1::get<1>(GetParam())),
- reverse_rate_(std::tr1::get<2>(GetParam())),
- expected_snr_(std::tr1::get<3>(GetParam())) {}
+ reverse_input_rate_(std::tr1::get<2>(GetParam())),
+ reverse_output_rate_(std::tr1::get<3>(GetParam())),
+ expected_snr_(std::tr1::get<4>(GetParam())),
+ expected_reverse_snr_(std::tr1::get<5>(GetParam())) {}
virtual ~AudioProcessingTest() {}
@@ -2323,13 +2359,9 @@
for (size_t j = 0; j < kNumChannelsSize; ++j) {
for (size_t k = 0; k < kNumChannelsSize; ++k) {
// The reference files always have matching input and output channels.
- ProcessFormat(kNativeRates[i],
- kNativeRates[i],
- kNativeRates[i],
- kNumChannels[j],
- kNumChannels[j],
- kNumChannels[k],
- "ref");
+ ProcessFormat(kNativeRates[i], kNativeRates[i], kNativeRates[i],
+ kNativeRates[i], kNumChannels[j], kNumChannels[j],
+ kNumChannels[k], kNumChannels[k], "ref");
}
}
}
@@ -2338,59 +2370,75 @@
static void TearDownTestCase() {
ClearTempFiles();
}
+
// Runs a process pass on files with the given parameters and dumps the output
- // to a file specified with |output_file_prefix|.
+ // to a file specified with |output_file_prefix|. Both forward and reverse
+ // output streams are dumped.
static void ProcessFormat(int input_rate,
int output_rate,
- int reverse_rate,
+ int reverse_input_rate,
+ int reverse_output_rate,
int num_input_channels,
int num_output_channels,
- int num_reverse_channels,
+ int num_reverse_input_channels,
+ int num_reverse_output_channels,
std::string output_file_prefix) {
Config config;
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));
EnableAllAPComponents(ap.get());
- ap->Initialize({{{input_rate, num_input_channels},
- {output_rate, num_output_channels},
- {reverse_rate, num_reverse_channels}}});
- FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb");
+ ProcessingConfig processing_config = {
+ {{input_rate, num_input_channels},
+ {output_rate, num_output_channels},
+ {reverse_input_rate, num_reverse_input_channels},
+ {reverse_output_rate, num_reverse_output_channels}}};
+ ap->Initialize(processing_config);
+
+ FILE* far_file =
+ fopen(ResourceFilePath("far", reverse_input_rate).c_str(), "rb");
FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb");
- FILE* out_file = fopen(OutputFilePath(output_file_prefix,
- input_rate,
- output_rate,
- reverse_rate,
- num_input_channels,
- num_output_channels,
- num_reverse_channels).c_str(), "wb");
+ FILE* out_file =
+ fopen(OutputFilePath(output_file_prefix, input_rate, output_rate,
+ reverse_input_rate, reverse_output_rate,
+ num_input_channels, num_output_channels,
+ num_reverse_input_channels,
+ num_reverse_output_channels, kForward).c_str(),
+ "wb");
+ FILE* rev_out_file =
+ fopen(OutputFilePath(output_file_prefix, input_rate, output_rate,
+ reverse_input_rate, reverse_output_rate,
+ num_input_channels, num_output_channels,
+ num_reverse_input_channels,
+ num_reverse_output_channels, kReverse).c_str(),
+ "wb");
ASSERT_TRUE(far_file != NULL);
ASSERT_TRUE(near_file != NULL);
ASSERT_TRUE(out_file != NULL);
+ ASSERT_TRUE(rev_out_file != NULL);
ChannelBuffer<float> fwd_cb(SamplesFromRate(input_rate),
num_input_channels);
- ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_rate),
- num_reverse_channels);
+ ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_input_rate),
+ num_reverse_input_channels);
ChannelBuffer<float> out_cb(SamplesFromRate(output_rate),
num_output_channels);
+ ChannelBuffer<float> rev_out_cb(SamplesFromRate(reverse_output_rate),
+ num_reverse_output_channels);
// Temporary buffers.
const int max_length =
- 2 * std::max(out_cb.num_frames(),
- std::max(fwd_cb.num_frames(),
- rev_cb.num_frames()));
+ 2 * std::max(std::max(out_cb.num_frames(), rev_out_cb.num_frames()),
+ std::max(fwd_cb.num_frames(), rev_cb.num_frames()));
rtc::scoped_ptr<float[]> float_data(new float[max_length]);
rtc::scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
int analog_level = 127;
while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) &&
ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) {
- EXPECT_NOERR(ap->AnalyzeReverseStream(
- rev_cb.channels(),
- rev_cb.num_frames(),
- reverse_rate,
- LayoutFromChannels(num_reverse_channels)));
+ EXPECT_NOERR(ap->ProcessReverseStream(
+ rev_cb.channels(), processing_config.reverse_input_stream(),
+ processing_config.reverse_output_stream(), rev_out_cb.channels()));
EXPECT_NOERR(ap->set_stream_delay_ms(0));
ap->echo_cancellation()->set_stream_drift_samples(0);
@@ -2405,274 +2453,293 @@
LayoutFromChannels(num_output_channels),
out_cb.channels()));
- Interleave(out_cb.channels(),
- out_cb.num_frames(),
- out_cb.num_channels(),
+ // Dump forward output to file.
+ Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(),
float_data.get());
- // Dump output to file.
int out_length = out_cb.num_channels() * out_cb.num_frames();
+
ASSERT_EQ(static_cast<size_t>(out_length),
fwrite(float_data.get(), sizeof(float_data[0]),
out_length, out_file));
+ // Dump reverse output to file.
+ Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(),
+ rev_out_cb.num_channels(), float_data.get());
+ int rev_out_length = rev_out_cb.num_channels() * rev_out_cb.num_frames();
+
+ ASSERT_EQ(static_cast<size_t>(rev_out_length),
+ fwrite(float_data.get(), sizeof(float_data[0]), rev_out_length,
+ rev_out_file));
+
analog_level = ap->gain_control()->stream_analog_level();
}
fclose(far_file);
fclose(near_file);
fclose(out_file);
+ fclose(rev_out_file);
}
protected:
int input_rate_;
int output_rate_;
- int reverse_rate_;
+ int reverse_input_rate_;
+ int reverse_output_rate_;
double expected_snr_;
+ double expected_reverse_snr_;
};
TEST_P(AudioProcessingTest, Formats) {
struct ChannelFormat {
int num_input;
int num_output;
- int num_reverse;
+ int num_reverse_input;
+ int num_reverse_output;
};
ChannelFormat cf[] = {
- {1, 1, 1},
- {1, 1, 2},
- {2, 1, 1},
- {2, 1, 2},
- {2, 2, 1},
- {2, 2, 2},
+ {1, 1, 1, 1},
+ {1, 1, 2, 1},
+ {2, 1, 1, 1},
+ {2, 1, 2, 1},
+ {2, 2, 1, 1},
+ {2, 2, 2, 2},
};
size_t channel_format_size = sizeof(cf) / sizeof(*cf);
for (size_t i = 0; i < channel_format_size; ++i) {
- ProcessFormat(input_rate_,
- output_rate_,
- reverse_rate_,
- cf[i].num_input,
- cf[i].num_output,
- cf[i].num_reverse,
- "out");
- int min_ref_rate = std::min(input_rate_, output_rate_);
- int ref_rate;
+ ProcessFormat(input_rate_, output_rate_, reverse_input_rate_,
+ reverse_output_rate_, cf[i].num_input, cf[i].num_output,
+ cf[i].num_reverse_input, cf[i].num_reverse_output, "out");
- if (min_ref_rate > 32000) {
- ref_rate = 48000;
- } else if (min_ref_rate > 16000) {
- ref_rate = 32000;
- } else if (min_ref_rate > 8000) {
- ref_rate = 16000;
- } else {
- ref_rate = 8000;
- }
+ // Verify output for both directions.
+ std::vector<StreamDirection> stream_directions;
+ stream_directions.push_back(kForward);
+ stream_directions.push_back(kReverse);
+ for (StreamDirection file_direction : stream_directions) {
+ const int in_rate = file_direction ? reverse_input_rate_ : input_rate_;
+ const int out_rate = file_direction ? reverse_output_rate_ : output_rate_;
+ const int out_num =
+ file_direction ? cf[i].num_reverse_output : cf[i].num_output;
+ const double expected_snr =
+ file_direction ? expected_reverse_snr_ : expected_snr_;
+
+ const int min_ref_rate = std::min(in_rate, out_rate);
+ int ref_rate;
+
+ if (min_ref_rate > 32000) {
+ ref_rate = 48000;
+ } else if (min_ref_rate > 16000) {
+ ref_rate = 32000;
+ } else if (min_ref_rate > 8000) {
+ ref_rate = 16000;
+ } else {
+ ref_rate = 8000;
+ }
#ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
- ref_rate = std::min(ref_rate, 16000);
+ if (file_direction == kForward) {
+ ref_rate = std::min(ref_rate, 16000);
+ }
#endif
+ FILE* out_file = fopen(
+ OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_,
+ reverse_output_rate_, cf[i].num_input,
+ cf[i].num_output, cf[i].num_reverse_input,
+ cf[i].num_reverse_output, file_direction).c_str(),
+ "rb");
+ // The reference files always have matching input and output channels.
+ FILE* ref_file = fopen(
+ OutputFilePath("ref", ref_rate, ref_rate, ref_rate, ref_rate,
+ cf[i].num_output, cf[i].num_output,
+ cf[i].num_reverse_output, cf[i].num_reverse_output,
+ file_direction).c_str(),
+ "rb");
+ ASSERT_TRUE(out_file != NULL);
+ ASSERT_TRUE(ref_file != NULL);
- FILE* out_file = fopen(OutputFilePath("out",
- input_rate_,
- output_rate_,
- reverse_rate_,
- cf[i].num_input,
- cf[i].num_output,
- cf[i].num_reverse).c_str(), "rb");
- // The reference files always have matching input and output channels.
- FILE* ref_file = fopen(OutputFilePath("ref",
- ref_rate,
- ref_rate,
- ref_rate,
- cf[i].num_output,
- cf[i].num_output,
- cf[i].num_reverse).c_str(), "rb");
- ASSERT_TRUE(out_file != NULL);
- ASSERT_TRUE(ref_file != NULL);
+ const int ref_length = SamplesFromRate(ref_rate) * out_num;
+ const int out_length = SamplesFromRate(out_rate) * out_num;
+ // Data from the reference file.
+ rtc::scoped_ptr<float[]> ref_data(new float[ref_length]);
+ // Data from the output file.
+ rtc::scoped_ptr<float[]> out_data(new float[out_length]);
+ // Data from the resampled output, in case the reference and output rates
+ // don't match.
+ rtc::scoped_ptr<float[]> cmp_data(new float[ref_length]);
- const int ref_length = SamplesFromRate(ref_rate) * cf[i].num_output;
- const int out_length = SamplesFromRate(output_rate_) * cf[i].num_output;
- // Data from the reference file.
- rtc::scoped_ptr<float[]> ref_data(new float[ref_length]);
- // Data from the output file.
- rtc::scoped_ptr<float[]> out_data(new float[out_length]);
- // Data from the resampled output, in case the reference and output rates
- // don't match.
- rtc::scoped_ptr<float[]> cmp_data(new float[ref_length]);
+ PushResampler<float> resampler;
+ resampler.InitializeIfNeeded(out_rate, ref_rate, out_num);
- PushResampler<float> resampler;
- resampler.InitializeIfNeeded(output_rate_, ref_rate, cf[i].num_output);
+ // Compute the resampling delay of the output relative to the reference,
+ // to find the region over which we should search for the best SNR.
+ float expected_delay_sec = 0;
+ if (in_rate != ref_rate) {
+ // Input resampling delay.
+ expected_delay_sec +=
+ PushSincResampler::AlgorithmicDelaySeconds(in_rate);
+ }
+ if (out_rate != ref_rate) {
+ // Output resampling delay.
+ expected_delay_sec +=
+ PushSincResampler::AlgorithmicDelaySeconds(ref_rate);
+ // Delay of converting the output back to its processing rate for
+ // testing.
+ expected_delay_sec +=
+ PushSincResampler::AlgorithmicDelaySeconds(out_rate);
+ }
+ int expected_delay =
+ floor(expected_delay_sec * ref_rate + 0.5f) * out_num;
- // Compute the resampling delay of the output relative to the reference,
- // to find the region over which we should search for the best SNR.
- float expected_delay_sec = 0;
- if (input_rate_ != ref_rate) {
- // Input resampling delay.
- expected_delay_sec +=
- PushSincResampler::AlgorithmicDelaySeconds(input_rate_);
- }
- if (output_rate_ != ref_rate) {
- // Output resampling delay.
- expected_delay_sec +=
- PushSincResampler::AlgorithmicDelaySeconds(ref_rate);
- // Delay of converting the output back to its processing rate for testing.
- expected_delay_sec +=
- PushSincResampler::AlgorithmicDelaySeconds(output_rate_);
- }
- int expected_delay = floor(expected_delay_sec * ref_rate + 0.5f) *
- cf[i].num_output;
+ double variance = 0;
+ double sq_error = 0;
+ while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) &&
+ fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) {
+ float* out_ptr = out_data.get();
+ if (out_rate != ref_rate) {
+ // Resample the output back to its internal processing rate if
+ // necssary.
+ ASSERT_EQ(ref_length, resampler.Resample(out_ptr, out_length,
+ cmp_data.get(), ref_length));
+ out_ptr = cmp_data.get();
+ }
- double variance = 0;
- double sq_error = 0;
- while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) &&
- fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) {
- float* out_ptr = out_data.get();
- if (output_rate_ != ref_rate) {
- // Resample the output back to its internal processing rate if necssary.
- ASSERT_EQ(ref_length, resampler.Resample(out_ptr,
- out_length,
- cmp_data.get(),
- ref_length));
- out_ptr = cmp_data.get();
+ // Update the |sq_error| and |variance| accumulators with the highest
+ // SNR of reference vs output.
+ UpdateBestSNR(ref_data.get(), out_ptr, ref_length, expected_delay,
+ &variance, &sq_error);
}
- // Update the |sq_error| and |variance| accumulators with the highest SNR
- // of reference vs output.
- UpdateBestSNR(ref_data.get(),
- out_ptr,
- ref_length,
- expected_delay,
- &variance,
- &sq_error);
- }
+ std::cout << "(" << input_rate_ << ", " << output_rate_ << ", "
+ << reverse_input_rate_ << ", " << reverse_output_rate_ << ", "
+ << cf[i].num_input << ", " << cf[i].num_output << ", "
+ << cf[i].num_reverse_input << ", " << cf[i].num_reverse_output
+ << ", " << file_direction << "): ";
+ if (sq_error > 0) {
+ double snr = 10 * log10(variance / sq_error);
+ EXPECT_GE(snr, expected_snr);
+ EXPECT_NE(0, expected_snr);
+ std::cout << "SNR=" << snr << " dB" << std::endl;
+ } else {
+ EXPECT_EQ(expected_snr, 0);
+ std::cout << "SNR="
+ << "inf dB" << std::endl;
+ }
- std::cout << "(" << input_rate_ << ", "
- << output_rate_ << ", "
- << reverse_rate_ << ", "
- << cf[i].num_input << ", "
- << cf[i].num_output << ", "
- << cf[i].num_reverse << "): ";
- if (sq_error > 0) {
- double snr = 10 * log10(variance / sq_error);
- EXPECT_GE(snr, expected_snr_);
- EXPECT_NE(0, expected_snr_);
- std::cout << "SNR=" << snr << " dB" << std::endl;
- } else {
- EXPECT_EQ(expected_snr_, 0);
- std::cout << "SNR=" << "inf dB" << std::endl;
+ fclose(out_file);
+ fclose(ref_file);
}
-
- fclose(out_file);
- fclose(ref_file);
}
}
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
INSTANTIATE_TEST_CASE_P(
- CommonFormats, AudioProcessingTest, testing::Values(
- std::tr1::make_tuple(48000, 48000, 48000, 0),
- std::tr1::make_tuple(48000, 48000, 32000, 40),
- std::tr1::make_tuple(48000, 48000, 16000, 40),
- std::tr1::make_tuple(48000, 44100, 48000, 20),
- std::tr1::make_tuple(48000, 44100, 32000, 20),
- std::tr1::make_tuple(48000, 44100, 16000, 20),
- std::tr1::make_tuple(48000, 32000, 48000, 30),
- std::tr1::make_tuple(48000, 32000, 32000, 30),
- std::tr1::make_tuple(48000, 32000, 16000, 30),
- std::tr1::make_tuple(48000, 16000, 48000, 25),
- std::tr1::make_tuple(48000, 16000, 32000, 25),
- std::tr1::make_tuple(48000, 16000, 16000, 25),
+ CommonFormats,
+ AudioProcessingTest,
+ testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
+ std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30),
+ std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20),
+ std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20),
+ std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15),
+ std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15),
+ std::tr1::make_tuple(48000, 32000, 48000, 32000, 30, 35),
+ std::tr1::make_tuple(48000, 32000, 32000, 32000, 30, 0),
+ std::tr1::make_tuple(48000, 32000, 16000, 32000, 30, 20),
+ std::tr1::make_tuple(48000, 16000, 48000, 16000, 25, 20),
+ std::tr1::make_tuple(48000, 16000, 32000, 16000, 25, 20),
+ std::tr1::make_tuple(48000, 16000, 16000, 16000, 25, 0),
- std::tr1::make_tuple(44100, 48000, 48000, 30),
- std::tr1::make_tuple(44100, 48000, 32000, 30),
- std::tr1::make_tuple(44100, 48000, 16000, 30),
- std::tr1::make_tuple(44100, 44100, 48000, 20),
- std::tr1::make_tuple(44100, 44100, 32000, 20),
- std::tr1::make_tuple(44100, 44100, 16000, 20),
- std::tr1::make_tuple(44100, 32000, 48000, 30),
- std::tr1::make_tuple(44100, 32000, 32000, 30),
- std::tr1::make_tuple(44100, 32000, 16000, 30),
- std::tr1::make_tuple(44100, 16000, 48000, 25),
- std::tr1::make_tuple(44100, 16000, 32000, 25),
- std::tr1::make_tuple(44100, 16000, 16000, 25),
+ std::tr1::make_tuple(44100, 48000, 48000, 48000, 30, 0),
+ std::tr1::make_tuple(44100, 48000, 32000, 48000, 30, 30),
+ std::tr1::make_tuple(44100, 48000, 16000, 48000, 30, 20),
+ std::tr1::make_tuple(44100, 44100, 48000, 44100, 20, 20),
+ std::tr1::make_tuple(44100, 44100, 32000, 44100, 20, 15),
+ std::tr1::make_tuple(44100, 44100, 16000, 44100, 20, 15),
+ std::tr1::make_tuple(44100, 32000, 48000, 32000, 30, 35),
+ std::tr1::make_tuple(44100, 32000, 32000, 32000, 30, 0),
+ std::tr1::make_tuple(44100, 32000, 16000, 32000, 30, 20),
+ std::tr1::make_tuple(44100, 16000, 48000, 16000, 25, 20),
+ std::tr1::make_tuple(44100, 16000, 32000, 16000, 25, 20),
+ std::tr1::make_tuple(44100, 16000, 16000, 16000, 25, 0),
- std::tr1::make_tuple(32000, 48000, 48000, 30),
- std::tr1::make_tuple(32000, 48000, 32000, 35),
- std::tr1::make_tuple(32000, 48000, 16000, 30),
- std::tr1::make_tuple(32000, 44100, 48000, 20),
- std::tr1::make_tuple(32000, 44100, 32000, 20),
- std::tr1::make_tuple(32000, 44100, 16000, 20),
- std::tr1::make_tuple(32000, 32000, 48000, 40),
- std::tr1::make_tuple(32000, 32000, 32000, 0),
- std::tr1::make_tuple(32000, 32000, 16000, 40),
- std::tr1::make_tuple(32000, 16000, 48000, 25),
- std::tr1::make_tuple(32000, 16000, 32000, 25),
- std::tr1::make_tuple(32000, 16000, 16000, 25),
+ std::tr1::make_tuple(32000, 48000, 48000, 48000, 30, 0),
+ std::tr1::make_tuple(32000, 48000, 32000, 48000, 35, 30),
+ std::tr1::make_tuple(32000, 48000, 16000, 48000, 30, 20),
+ std::tr1::make_tuple(32000, 44100, 48000, 44100, 20, 20),
+ std::tr1::make_tuple(32000, 44100, 32000, 44100, 20, 15),
+ std::tr1::make_tuple(32000, 44100, 16000, 44100, 20, 15),
+ std::tr1::make_tuple(32000, 32000, 48000, 32000, 40, 35),
+ std::tr1::make_tuple(32000, 32000, 32000, 32000, 0, 0),
+ std::tr1::make_tuple(32000, 32000, 16000, 32000, 40, 20),
+ std::tr1::make_tuple(32000, 16000, 48000, 16000, 25, 20),
+ std::tr1::make_tuple(32000, 16000, 32000, 16000, 25, 20),
+ std::tr1::make_tuple(32000, 16000, 16000, 16000, 25, 0),
- std::tr1::make_tuple(16000, 48000, 48000, 25),
- std::tr1::make_tuple(16000, 48000, 32000, 25),
- std::tr1::make_tuple(16000, 48000, 16000, 25),
- std::tr1::make_tuple(16000, 44100, 48000, 15),
- std::tr1::make_tuple(16000, 44100, 32000, 15),
- std::tr1::make_tuple(16000, 44100, 16000, 15),
- std::tr1::make_tuple(16000, 32000, 48000, 25),
- std::tr1::make_tuple(16000, 32000, 32000, 25),
- std::tr1::make_tuple(16000, 32000, 16000, 25),
- std::tr1::make_tuple(16000, 16000, 48000, 40),
- std::tr1::make_tuple(16000, 16000, 32000, 50),
- std::tr1::make_tuple(16000, 16000, 16000, 0)));
+ std::tr1::make_tuple(16000, 48000, 48000, 48000, 25, 0),
+ std::tr1::make_tuple(16000, 48000, 32000, 48000, 25, 30),
+ std::tr1::make_tuple(16000, 48000, 16000, 48000, 25, 20),
+ std::tr1::make_tuple(16000, 44100, 48000, 44100, 15, 20),
+ std::tr1::make_tuple(16000, 44100, 32000, 44100, 15, 15),
+ std::tr1::make_tuple(16000, 44100, 16000, 44100, 15, 15),
+ std::tr1::make_tuple(16000, 32000, 48000, 32000, 25, 35),
+ std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
+ std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
+ std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20),
+ std::tr1::make_tuple(16000, 16000, 32000, 16000, 50, 20),
+ std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
INSTANTIATE_TEST_CASE_P(
- CommonFormats, AudioProcessingTest, testing::Values(
- std::tr1::make_tuple(48000, 48000, 48000, 20),
- std::tr1::make_tuple(48000, 48000, 32000, 20),
- std::tr1::make_tuple(48000, 48000, 16000, 20),
- std::tr1::make_tuple(48000, 44100, 48000, 15),
- std::tr1::make_tuple(48000, 44100, 32000, 15),
- std::tr1::make_tuple(48000, 44100, 16000, 15),
- std::tr1::make_tuple(48000, 32000, 48000, 20),
- std::tr1::make_tuple(48000, 32000, 32000, 20),
- std::tr1::make_tuple(48000, 32000, 16000, 20),
- std::tr1::make_tuple(48000, 16000, 48000, 20),
- std::tr1::make_tuple(48000, 16000, 32000, 20),
- std::tr1::make_tuple(48000, 16000, 16000, 20),
+ CommonFormats,
+ AudioProcessingTest,
+ testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 20, 0),
+ std::tr1::make_tuple(48000, 48000, 32000, 48000, 20, 30),
+ std::tr1::make_tuple(48000, 48000, 16000, 48000, 20, 20),
+ std::tr1::make_tuple(48000, 44100, 48000, 44100, 15, 20),
+ std::tr1::make_tuple(48000, 44100, 32000, 44100, 15, 15),
+ std::tr1::make_tuple(48000, 44100, 16000, 44100, 15, 15),
+ std::tr1::make_tuple(48000, 32000, 48000, 32000, 20, 35),
+ std::tr1::make_tuple(48000, 32000, 32000, 32000, 20, 0),
+ std::tr1::make_tuple(48000, 32000, 16000, 32000, 20, 20),
+ std::tr1::make_tuple(48000, 16000, 48000, 16000, 20, 20),
+ std::tr1::make_tuple(48000, 16000, 32000, 16000, 20, 20),
+ std::tr1::make_tuple(48000, 16000, 16000, 16000, 20, 0),
- std::tr1::make_tuple(44100, 48000, 48000, 20),
- std::tr1::make_tuple(44100, 48000, 32000, 20),
- std::tr1::make_tuple(44100, 48000, 16000, 20),
- std::tr1::make_tuple(44100, 44100, 48000, 15),
- std::tr1::make_tuple(44100, 44100, 32000, 15),
- std::tr1::make_tuple(44100, 44100, 16000, 15),
- std::tr1::make_tuple(44100, 32000, 48000, 20),
- std::tr1::make_tuple(44100, 32000, 32000, 20),
- std::tr1::make_tuple(44100, 32000, 16000, 20),
- std::tr1::make_tuple(44100, 16000, 48000, 20),
- std::tr1::make_tuple(44100, 16000, 32000, 20),
- std::tr1::make_tuple(44100, 16000, 16000, 20),
+ std::tr1::make_tuple(44100, 48000, 48000, 48000, 20, 0),
+ std::tr1::make_tuple(44100, 48000, 32000, 48000, 20, 30),
+ std::tr1::make_tuple(44100, 48000, 16000, 48000, 20, 20),
+ std::tr1::make_tuple(44100, 44100, 48000, 44100, 15, 20),
+ std::tr1::make_tuple(44100, 44100, 32000, 44100, 15, 15),
+ std::tr1::make_tuple(44100, 44100, 16000, 44100, 15, 15),
+ std::tr1::make_tuple(44100, 32000, 48000, 32000, 20, 35),
+ std::tr1::make_tuple(44100, 32000, 32000, 32000, 20, 0),
+ std::tr1::make_tuple(44100, 32000, 16000, 32000, 20, 20),
+ std::tr1::make_tuple(44100, 16000, 48000, 16000, 20, 20),
+ std::tr1::make_tuple(44100, 16000, 32000, 16000, 20, 20),
+ std::tr1::make_tuple(44100, 16000, 16000, 16000, 20, 0),
- std::tr1::make_tuple(32000, 48000, 48000, 20),
- std::tr1::make_tuple(32000, 48000, 32000, 20),
- std::tr1::make_tuple(32000, 48000, 16000, 20),
- std::tr1::make_tuple(32000, 44100, 48000, 15),
- std::tr1::make_tuple(32000, 44100, 32000, 15),
- std::tr1::make_tuple(32000, 44100, 16000, 15),
- std::tr1::make_tuple(32000, 32000, 48000, 20),
- std::tr1::make_tuple(32000, 32000, 32000, 20),
- std::tr1::make_tuple(32000, 32000, 16000, 20),
- std::tr1::make_tuple(32000, 16000, 48000, 20),
- std::tr1::make_tuple(32000, 16000, 32000, 20),
- std::tr1::make_tuple(32000, 16000, 16000, 20),
+ std::tr1::make_tuple(32000, 48000, 48000, 48000, 20, 0),
+ std::tr1::make_tuple(32000, 48000, 32000, 48000, 20, 30),
+ std::tr1::make_tuple(32000, 48000, 16000, 48000, 20, 20),
+ std::tr1::make_tuple(32000, 44100, 48000, 44100, 15, 20),
+ std::tr1::make_tuple(32000, 44100, 32000, 44100, 15, 15),
+ std::tr1::make_tuple(32000, 44100, 16000, 44100, 15, 15),
+ std::tr1::make_tuple(32000, 32000, 48000, 32000, 20, 35),
+ std::tr1::make_tuple(32000, 32000, 32000, 32000, 20, 0),
+ std::tr1::make_tuple(32000, 32000, 16000, 32000, 20, 20),
+ std::tr1::make_tuple(32000, 16000, 48000, 16000, 20, 20),
+ std::tr1::make_tuple(32000, 16000, 32000, 16000, 20, 20),
+ std::tr1::make_tuple(32000, 16000, 16000, 16000, 20, 0),
- std::tr1::make_tuple(16000, 48000, 48000, 25),
- std::tr1::make_tuple(16000, 48000, 32000, 25),
- std::tr1::make_tuple(16000, 48000, 16000, 25),
- std::tr1::make_tuple(16000, 44100, 48000, 15),
- std::tr1::make_tuple(16000, 44100, 32000, 15),
- std::tr1::make_tuple(16000, 44100, 16000, 15),
- std::tr1::make_tuple(16000, 32000, 48000, 25),
- std::tr1::make_tuple(16000, 32000, 32000, 25),
- std::tr1::make_tuple(16000, 32000, 16000, 25),
- std::tr1::make_tuple(16000, 16000, 48000, 35),
- std::tr1::make_tuple(16000, 16000, 32000, 40),
- std::tr1::make_tuple(16000, 16000, 16000, 0)));
+ std::tr1::make_tuple(16000, 48000, 48000, 48000, 25, 0),
+ std::tr1::make_tuple(16000, 48000, 32000, 48000, 25, 30),
+ std::tr1::make_tuple(16000, 48000, 16000, 48000, 25, 20),
+ std::tr1::make_tuple(16000, 44100, 48000, 44100, 15, 20),
+ std::tr1::make_tuple(16000, 44100, 32000, 44100, 15, 15),
+ std::tr1::make_tuple(16000, 44100, 16000, 44100, 15, 15),
+ std::tr1::make_tuple(16000, 32000, 48000, 32000, 25, 35),
+ std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
+ std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
+ std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20),
+ std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
+ std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
#endif
// TODO(henrike): re-implement functionality lost when removing the old main
diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc
index d2983b2..f22c41e 100644
--- a/webrtc/modules/audio_processing/test/audioproc_float.cc
+++ b/webrtc/modules/audio_processing/test/audioproc_float.cc
@@ -25,7 +25,11 @@
DEFINE_string(dump, "", "The name of the debug dump file to read from.");
DEFINE_string(i, "", "The name of the input file to read from.");
+DEFINE_string(i_rev, "", "The name of the reverse input file to read from.");
DEFINE_string(o, "out.wav", "Name of the output file to write to.");
+DEFINE_string(o_rev,
+ "out_rev.wav",
+ "Name of the reverse output file to write to.");
DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input.");
DEFINE_int32(out_sample_rate, 0,
"Output sample rate in Hz. Defaults to input.");
@@ -40,6 +44,7 @@
DEFINE_bool(ns, false, "Enable noise suppression.");
DEFINE_bool(ts, false, "Enable transient suppression.");
DEFINE_bool(bf, false, "Enable beamforming.");
+DEFINE_bool(ie, false, "Enable intelligibility enhancer.");
DEFINE_bool(all, false, "Enable all components.");
DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3].");
@@ -85,6 +90,7 @@
Config config;
config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all));
+ config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all));
if (FLAGS_bf || FLAGS_all) {
const size_t num_mics = in_file.num_channels();
@@ -102,6 +108,7 @@
fprintf(stderr, "-aec requires a -dump file.\n");
return -1;
}
+ bool process_reverse = !FLAGS_i_rev.empty();
CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all));
CHECK_EQ(kNoErr, ap->gain_control()->set_mode(GainControl::kFixedDigital));
CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all));
@@ -124,6 +131,33 @@
std::vector<float> in_interleaved(in_buf.size());
std::vector<float> out_interleaved(out_buf.size());
+
+ rtc::scoped_ptr<WavReader> in_rev_file;
+ rtc::scoped_ptr<WavWriter> out_rev_file;
+ rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf;
+ rtc::scoped_ptr<ChannelBuffer<float>> out_rev_buf;
+ std::vector<float> in_rev_interleaved;
+ std::vector<float> out_rev_interleaved;
+ if (process_reverse) {
+ in_rev_file.reset(new WavReader(FLAGS_i_rev));
+ out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(),
+ in_rev_file->num_channels()));
+ printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",
+ FLAGS_i_rev.c_str(), in_rev_file->num_channels(),
+ in_rev_file->sample_rate());
+ printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",
+ FLAGS_o_rev.c_str(), out_rev_file->num_channels(),
+ out_rev_file->sample_rate());
+ in_rev_buf.reset(new ChannelBuffer<float>(
+ rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond),
+ in_rev_file->num_channels()));
+ in_rev_interleaved.resize(in_rev_buf->size());
+ out_rev_buf.reset(new ChannelBuffer<float>(
+ rtc::CheckedDivExact(out_rev_file->sample_rate(), kChunksPerSecond),
+ out_rev_file->num_channels()));
+ out_rev_interleaved.resize(out_rev_buf->size());
+ }
+
TickTime processing_start_time;
TickInterval accumulated_time;
int num_chunks = 0;
@@ -134,6 +168,12 @@
const StreamConfig output_config = {
out_file.sample_rate(), out_buf.num_channels(),
};
+ const StreamConfig reverse_input_config = {
+ in_rev_file->sample_rate(), in_rev_file->num_channels(),
+ };
+ const StreamConfig reverse_output_config = {
+ out_rev_file->sample_rate(), out_rev_file->num_channels(),
+ };
while (in_file.ReadSamples(in_interleaved.size(),
&in_interleaved[0]) == in_interleaved.size()) {
// Have logs display the file time rather than wallclock time.
@@ -142,12 +182,25 @@
&in_interleaved[0]);
Deinterleave(&in_interleaved[0], in_buf.num_frames(),
in_buf.num_channels(), in_buf.channels());
+ if (process_reverse) {
+ in_rev_file->ReadSamples(in_rev_interleaved.size(),
+ in_rev_interleaved.data());
+ FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(),
+ in_rev_interleaved.data());
+ Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(),
+ in_rev_buf->num_channels(), in_rev_buf->channels());
+ }
if (FLAGS_perf) {
processing_start_time = TickTime::Now();
}
CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config,
output_config, out_buf.channels()));
+ if (process_reverse) {
+ CHECK_EQ(kNoErr, ap->ProcessReverseStream(
+ in_rev_buf->channels(), reverse_input_config,
+ reverse_output_config, out_rev_buf->channels()));
+ }
if (FLAGS_perf) {
accumulated_time += TickTime::Now() - processing_start_time;
}
@@ -157,6 +210,14 @@
FloatToFloatS16(&out_interleaved[0], out_interleaved.size(),
&out_interleaved[0]);
out_file.WriteSamples(&out_interleaved[0], out_interleaved.size());
+ if (process_reverse) {
+ Interleave(out_rev_buf->channels(), out_rev_buf->num_frames(),
+ out_rev_buf->num_channels(), out_rev_interleaved.data());
+ FloatToFloatS16(out_rev_interleaved.data(), out_rev_interleaved.size(),
+ out_rev_interleaved.data());
+ out_rev_file->WriteSamples(out_rev_interleaved.data(),
+ out_rev_interleaved.size());
+ }
num_chunks++;
}
if (FLAGS_perf) {