|  | /* | 
|  | *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS.  All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  | #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" | 
|  |  | 
|  | #include <stdlib.h> | 
|  | #include <stdio.h> | 
|  | #include <string> | 
|  |  | 
|  | #include "gflags/gflags.h" | 
|  | #include "testing/gtest/include/gtest/gtest.h" | 
|  | #include "webrtc/base/scoped_ptr.h" | 
|  | #include "webrtc/common_audio/include/audio_util.h" | 
|  | #include "webrtc/modules/audio_processing/agc/agc.h" | 
|  | #include "webrtc/modules/interface/module_common_types.h" | 
|  | #include "webrtc/test/testsupport/fileutils.h" | 
|  | #include "webrtc/typedefs.h" | 
|  |  | 
|  | DEFINE_string(in_file_name, "", "PCM file that contains the signal."); | 
|  | DEFINE_string(detection_file_name, | 
|  | "", | 
|  | "PCM file that contains the detection signal."); | 
|  | DEFINE_string(reference_file_name, | 
|  | "", | 
|  | "PCM file that contains the reference signal."); | 
|  |  | 
|  | static bool ValidatePositiveInt(const char* flagname, int32_t value) { | 
|  | if (value <= 0) { | 
|  | printf("%s must be a positive integer.\n", flagname); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  | DEFINE_int32(chunk_size_ms, | 
|  | 10, | 
|  | "Time between each chunk of samples in milliseconds."); | 
|  | static const bool chunk_size_ms_dummy = | 
|  | google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt); | 
|  |  | 
|  | DEFINE_int32(sample_rate_hz, | 
|  | 16000, | 
|  | "Sampling frequency of the signal in Hertz."); | 
|  | static const bool sample_rate_hz_dummy = | 
|  | google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt); | 
|  | DEFINE_int32(detection_rate_hz, | 
|  | 0, | 
|  | "Sampling frequency of the detection signal in Hertz."); | 
|  |  | 
|  | DEFINE_int32(num_channels, 1, "Number of channels."); | 
|  | static const bool num_channels_dummy = | 
|  | google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt); | 
|  |  | 
|  | namespace webrtc { | 
|  |  | 
|  | const char kUsage[] = | 
|  | "\nDetects and suppresses transients from file.\n\n" | 
|  | "This application loads the signal from the in_file_name with a specific\n" | 
|  | "num_channels and sample_rate_hz, the detection signal from the\n" | 
|  | "detection_file_name with a specific detection_rate_hz, and the reference\n" | 
|  | "signal from the reference_file_name with sample_rate_hz, divides them\n" | 
|  | "into chunk_size_ms blocks, computes its voice value and depending on the\n" | 
|  | "voice_threshold does the respective restoration. You can always get the\n" | 
|  | "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" | 
|  | "1 respectively.\n\n"; | 
|  |  | 
|  | // Read next buffers from the test files (signed 16-bit host-endian PCM | 
|  | // format). audio_buffer has int16 samples, detection_buffer has float samples | 
|  | // with range [-32768,32767], and reference_buffer has float samples with range | 
|  | // [-1,1]. Return true iff all the buffers were filled completely. | 
|  | bool ReadBuffers(FILE* in_file, | 
|  | size_t audio_buffer_size, | 
|  | int num_channels, | 
|  | int16_t* audio_buffer, | 
|  | FILE* detection_file, | 
|  | size_t detection_buffer_size, | 
|  | float* detection_buffer, | 
|  | FILE* reference_file, | 
|  | float* reference_buffer) { | 
|  | rtc::scoped_ptr<int16_t[]> tmpbuf; | 
|  | int16_t* read_ptr = audio_buffer; | 
|  | if (num_channels > 1) { | 
|  | tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); | 
|  | read_ptr = tmpbuf.get(); | 
|  | } | 
|  | if (fread(read_ptr, | 
|  | sizeof(*read_ptr), | 
|  | num_channels * audio_buffer_size, | 
|  | in_file) != num_channels * audio_buffer_size) { | 
|  | return false; | 
|  | } | 
|  | // De-interleave. | 
|  | if (num_channels > 1) { | 
|  | for (int i = 0; i < num_channels; ++i) { | 
|  | for (size_t j = 0; j < audio_buffer_size; ++j) { | 
|  | audio_buffer[i * audio_buffer_size + j] = | 
|  | read_ptr[i + j * num_channels]; | 
|  | } | 
|  | } | 
|  | } | 
|  | if (detection_file) { | 
|  | rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]); | 
|  | if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, | 
|  | detection_file) != detection_buffer_size) | 
|  | return false; | 
|  | for (size_t i = 0; i < detection_buffer_size; ++i) | 
|  | detection_buffer[i] = ibuf[i]; | 
|  | } | 
|  | if (reference_file) { | 
|  | rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]); | 
|  | if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) | 
|  | != audio_buffer_size) | 
|  | return false; | 
|  | S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // Write a number of samples to an open signed 16-bit host-endian PCM file. | 
|  | static void WritePCM(FILE* f, | 
|  | size_t num_samples, | 
|  | int num_channels, | 
|  | const float* buffer) { | 
|  | rtc::scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]); | 
|  | // Interleave. | 
|  | for (int i = 0; i < num_channels; ++i) { | 
|  | for (size_t j = 0; j < num_samples; ++j) { | 
|  | ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); | 
|  | } | 
|  | } | 
|  | fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); | 
|  | } | 
|  |  | 
|  | // This application tests the transient suppression by providing a processed | 
|  | // PCM file, which has to be listened to in order to evaluate the | 
|  | // performance. | 
|  | // It gets an audio file, and its voice gain information, and the suppressor | 
|  | // process it giving the output file "suppressed_keystrokes.pcm". | 
|  | void void_main() { | 
|  | // TODO(aluebs): Remove all FileWrappers. | 
|  | // Prepare the input file. | 
|  | FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb"); | 
|  | ASSERT_TRUE(in_file != NULL); | 
|  |  | 
|  | // Prepare the detection file. | 
|  | FILE* detection_file = NULL; | 
|  | if (FLAGS_detection_file_name != "") { | 
|  | detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb"); | 
|  | } | 
|  |  | 
|  | // Prepare the reference file. | 
|  | FILE* reference_file = NULL; | 
|  | if (FLAGS_reference_file_name != "") { | 
|  | reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb"); | 
|  | } | 
|  |  | 
|  | // Prepare the output file. | 
|  | std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; | 
|  | FILE* out_file = fopen(out_file_name.c_str(), "wb"); | 
|  | ASSERT_TRUE(out_file != NULL); | 
|  |  | 
|  | int detection_rate_hz = FLAGS_detection_rate_hz; | 
|  | if (detection_rate_hz == 0) { | 
|  | detection_rate_hz = FLAGS_sample_rate_hz; | 
|  | } | 
|  |  | 
|  | Agc agc; | 
|  |  | 
|  | TransientSuppressor suppressor; | 
|  | suppressor.Initialize( | 
|  | FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels); | 
|  |  | 
|  | const size_t audio_buffer_size = | 
|  | FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000; | 
|  | const size_t detection_buffer_size = | 
|  | FLAGS_chunk_size_ms * detection_rate_hz / 1000; | 
|  |  | 
|  | // int16 and float variants of the same data. | 
|  | rtc::scoped_ptr<int16_t[]> audio_buffer_i( | 
|  | new int16_t[FLAGS_num_channels * audio_buffer_size]); | 
|  | rtc::scoped_ptr<float[]> audio_buffer_f( | 
|  | new float[FLAGS_num_channels * audio_buffer_size]); | 
|  |  | 
|  | rtc::scoped_ptr<float[]> detection_buffer, reference_buffer; | 
|  |  | 
|  | if (detection_file) | 
|  | detection_buffer.reset(new float[detection_buffer_size]); | 
|  | if (reference_file) | 
|  | reference_buffer.reset(new float[audio_buffer_size]); | 
|  |  | 
|  | while (ReadBuffers(in_file, | 
|  | audio_buffer_size, | 
|  | FLAGS_num_channels, | 
|  | audio_buffer_i.get(), | 
|  | detection_file, | 
|  | detection_buffer_size, | 
|  | detection_buffer.get(), | 
|  | reference_file, | 
|  | reference_buffer.get())) { | 
|  | ASSERT_EQ(0, | 
|  | agc.Process(audio_buffer_i.get(), | 
|  | static_cast<int>(audio_buffer_size), | 
|  | FLAGS_sample_rate_hz)) | 
|  | << "The AGC could not process the frame"; | 
|  |  | 
|  | for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) { | 
|  | audio_buffer_f[i] = audio_buffer_i[i]; | 
|  | } | 
|  |  | 
|  | ASSERT_EQ(0, | 
|  | suppressor.Suppress(audio_buffer_f.get(), | 
|  | audio_buffer_size, | 
|  | FLAGS_num_channels, | 
|  | detection_buffer.get(), | 
|  | detection_buffer_size, | 
|  | reference_buffer.get(), | 
|  | audio_buffer_size, | 
|  | agc.voice_probability(), | 
|  | true)) | 
|  | << "The transient suppressor could not suppress the frame"; | 
|  |  | 
|  | // Write result to out file. | 
|  | WritePCM( | 
|  | out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get()); | 
|  | } | 
|  |  | 
|  | fclose(in_file); | 
|  | if (detection_file) { | 
|  | fclose(detection_file); | 
|  | } | 
|  | if (reference_file) { | 
|  | fclose(reference_file); | 
|  | } | 
|  | fclose(out_file); | 
|  | } | 
|  |  | 
|  | }  // namespace webrtc | 
|  |  | 
|  | int main(int argc, char* argv[]) { | 
|  | google::SetUsageMessage(webrtc::kUsage); | 
|  | google::ParseCommandLineFlags(&argc, &argv, true); | 
|  | webrtc::void_main(); | 
|  | return 0; | 
|  | } |