| /* |
| * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_processing/test/conversational_speech/simulator.h" |
| |
| #include <math.h> |
| |
| #include <algorithm> |
| #include <memory> |
| #include <set> |
| #include <utility> |
| #include <vector> |
| |
| #include "api/array_view.h" |
| #include "common_audio/include/audio_util.h" |
| #include "common_audio/wav_file.h" |
| #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" |
| #include "rtc_base/logging.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| #include "test/testsupport/file_utils.h" |
| |
| namespace webrtc { |
| namespace test { |
| namespace { |
| |
| using conversational_speech::MultiEndCall; |
| using conversational_speech::SpeakerOutputFilePaths; |
| using conversational_speech::WavReaderInterface; |
| |
| // Combines output path and speaker names to define the output file paths for |
| // the near-end and far=end audio tracks. |
| std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> |
| InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names, |
| const std::string& output_path) { |
| // Create map. |
| auto speaker_output_file_paths_map = |
| std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>(); |
| |
| // Add near-end and far-end output paths into the map. |
| for (const auto& speaker_name : speaker_names) { |
| const std::string near_end_path = |
| test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav"); |
| RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in " |
| << near_end_path << "."; |
| |
| const std::string far_end_path = |
| test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav"); |
| RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in " |
| << far_end_path << "."; |
| |
| // Add to map. |
| speaker_output_file_paths_map->emplace( |
| std::piecewise_construct, std::forward_as_tuple(speaker_name), |
| std::forward_as_tuple(near_end_path, far_end_path)); |
| } |
| |
| return speaker_output_file_paths_map; |
| } |
| |
| // Class that provides one WavWriter for the near-end and one for the far-end |
| // output track of a speaker. |
| class SpeakerWavWriters { |
| public: |
| SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths, |
| int sample_rate) |
| : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), |
| far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} |
| WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; } |
| WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; } |
| |
| private: |
| WavWriter near_end_wav_writer_; |
| WavWriter far_end_wav_writer_; |
| }; |
| |
| // Initializes one WavWriter instance for each speaker and both the near-end and |
| // far-end output tracks. |
| std::unique_ptr<std::map<std::string, SpeakerWavWriters>> |
| InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>& |
| speaker_output_file_paths, |
| int sample_rate) { |
| // Create map. |
| auto speaker_wav_writers_map = |
| std::make_unique<std::map<std::string, SpeakerWavWriters>>(); |
| |
| // Add SpeakerWavWriters instance into the map. |
| for (auto it = speaker_output_file_paths.begin(); |
| it != speaker_output_file_paths.end(); ++it) { |
| speaker_wav_writers_map->emplace( |
| std::piecewise_construct, std::forward_as_tuple(it->first), |
| std::forward_as_tuple(it->second, sample_rate)); |
| } |
| |
| return speaker_wav_writers_map; |
| } |
| |
| // Reads all the samples for each audio track. |
| std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks( |
| const std::map<std::string, std::unique_ptr<WavReaderInterface>>& |
| audiotrack_readers) { |
| // Create map. |
| auto audiotracks_map = |
| std::make_unique<std::map<std::string, std::vector<int16_t>>>(); |
| |
| // Add audio track vectors. |
| for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); |
| ++it) { |
| // Add map entry. |
| audiotracks_map->emplace(std::piecewise_construct, |
| std::forward_as_tuple(it->first), |
| std::forward_as_tuple(it->second->NumSamples())); |
| |
| // Read samples. |
| it->second->ReadInt16Samples(audiotracks_map->at(it->first)); |
| } |
| |
| return audiotracks_map; |
| } |
| |
| // Writes all the values in `source_samples` via `wav_writer`. If the number of |
| // previously written samples in `wav_writer` is less than `interval_begin`, it |
| // adds zeros as left padding. The padding corresponds to intervals during which |
| // a speaker is not active. |
| void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples, |
| size_t interval_begin, |
| WavWriter* wav_writer) { |
| // Add left padding. |
| RTC_CHECK(wav_writer); |
| RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); |
| size_t padding_size = interval_begin - wav_writer->num_samples(); |
| if (padding_size != 0) { |
| const std::vector<int16_t> padding(padding_size, 0); |
| wav_writer->WriteSamples(padding.data(), padding_size); |
| } |
| |
| // Write source samples. |
| wav_writer->WriteSamples(source_samples.data(), source_samples.size()); |
| } |
| |
| // Appends zeros via `wav_writer`. The number of zeros is always non-negative |
| // and equal to the difference between the previously written samples and |
| // `pad_samples`. |
| void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { |
| RTC_CHECK(wav_writer); |
| RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); |
| size_t padding_size = pad_samples - wav_writer->num_samples(); |
| if (padding_size != 0) { |
| const std::vector<int16_t> padding(padding_size, 0); |
| wav_writer->WriteSamples(padding.data(), padding_size); |
| } |
| } |
| |
| void ScaleSignal(rtc::ArrayView<const int16_t> source_samples, |
| int gain, |
| rtc::ArrayView<int16_t> output_samples) { |
| const float gain_linear = DbToRatio(gain); |
| RTC_DCHECK_EQ(source_samples.size(), output_samples.size()); |
| std::transform(source_samples.begin(), source_samples.end(), |
| output_samples.begin(), [gain_linear](int16_t x) -> int16_t { |
| return rtc::saturated_cast<int16_t>(x * gain_linear); |
| }); |
| } |
| |
| } // namespace |
| |
| namespace conversational_speech { |
| |
| std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate( |
| const MultiEndCall& multiend_call, |
| const std::string& output_path) { |
| // Set output file paths and initialize wav writers. |
| const auto& speaker_names = multiend_call.speaker_names(); |
| auto speaker_output_file_paths = |
| InitSpeakerOutputFilePaths(speaker_names, output_path); |
| auto speakers_wav_writers = InitSpeakersWavWriters( |
| *speaker_output_file_paths, multiend_call.sample_rate()); |
| |
| // Preload all the input audio tracks. |
| const auto& audiotrack_readers = multiend_call.audiotrack_readers(); |
| auto audiotracks = PreloadAudioTracks(audiotrack_readers); |
| |
| // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end |
| // across the 2 speakers are symmetric; hence, the code below could be |
| // replaced by only creating the near-end or the far-end. However, this would |
| // require to split the unit tests and document the behavior in README.md. |
| // In practice, it should not be an issue since the files are not expected to |
| // be signinificant. |
| |
| // Write near-end and far-end output tracks. |
| for (const auto& speaking_turn : multiend_call.speaking_turns()) { |
| const std::string& active_speaker_name = speaking_turn.speaker_name; |
| const auto source_audiotrack = |
| audiotracks->at(speaking_turn.audiotrack_file_name); |
| std::vector<int16_t> scaled_audiotrack(source_audiotrack.size()); |
| ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack); |
| |
| // Write active speaker's chunk to active speaker's near-end. |
| PadLeftWriteChunk( |
| scaled_audiotrack, speaking_turn.begin, |
| speakers_wav_writers->at(active_speaker_name).near_end_wav_writer()); |
| |
| // Write active speaker's chunk to other participants' far-ends. |
| for (const std::string& speaker_name : speaker_names) { |
| if (speaker_name == active_speaker_name) |
| continue; |
| PadLeftWriteChunk( |
| scaled_audiotrack, speaking_turn.begin, |
| speakers_wav_writers->at(speaker_name).far_end_wav_writer()); |
| } |
| } |
| |
| // Finalize all the output tracks with right padding. |
| // This is required to make all the output tracks duration equal. |
| size_t duration_samples = multiend_call.total_duration_samples(); |
| for (const std::string& speaker_name : speaker_names) { |
| PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), |
| duration_samples); |
| PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), |
| duration_samples); |
| } |
| |
| return speaker_output_file_paths; |
| } |
| |
| } // namespace conversational_speech |
| } // namespace test |
| } // namespace webrtc |