APM-QA Test data generation: environmental noise looped.
SignalProcessingUtils.MixSignals() now allows different padding options.
This CL also adds more unit tests for SignalProcessingUtils.MixSignals().
Bug: webrtc:7494
Change-Id: Id62fe9998e512c275cb6399e0aedf11f23a9f36e
Reviewed-on: https://webrtc-review.googlesource.com/5780
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20122}
diff --git a/modules/audio_processing/test/py_quality_assessment/README.md b/modules/audio_processing/test/py_quality_assessment/README.md
index 64f4f0a..97dabd9 100644
--- a/modules/audio_processing/test/py_quality_assessment/README.md
+++ b/modules/audio_processing/test/py_quality_assessment/README.md
@@ -10,7 +10,7 @@
## Dependencies
- OS: Linux
- Python 2.7
- - Python libraries: numpy, scipy, pydub (0.17.0+), pandas (0.20.1+)
+ - Python libraries: enum34, numpy, scipy, pydub (0.17.0+), pandas (0.20.1+)
- It is recommended that a dedicated Python environment is used
- install `virtualenv`
- `$ sudo apt-get install python-virtualenv`
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py
index 5beb3fb..5591a28 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py
@@ -10,6 +10,7 @@
"""
import array
+import enum
import logging
import os
import sys
@@ -29,6 +30,7 @@
try:
import scipy.signal
+ import scipy.fftpack
except ImportError:
logging.critical('Cannot import the third-party Python package scipy')
sys.exit(1)
@@ -40,6 +42,12 @@
"""Collection of signal processing utilities.
"""
+ @enum.unique
+ class MixPadding(enum.Enum):
+ NO_PADDING = 0
+ ZERO_PADDING = 1
+ LOOP = 2
+
def __init__(self):
pass
@@ -156,6 +164,14 @@
return np.array(signal.get_array_of_samples(), np.int16)
@classmethod
+ def Fft(cls, signal, normalize=True):
+ x = cls.AudioSegmentToRawData(signal).astype(np.float32)
+ if normalize:
+ x /= max(abs(np.max(x)), 1.0)
+ y = scipy.fftpack.fft(x)
+ return y[:len(y) / 2]
+
+ @classmethod
def DetectHardClipping(cls, signal, threshold=2):
"""Detects hard clipping.
@@ -272,18 +288,24 @@
})
@classmethod
- def MixSignals(cls, signal, noise, target_snr=0.0, bln_pad_shortest=False):
- """Mixes two signals with a target SNR.
+ def MixSignals(cls, signal, noise, target_snr=0.0,
+ pad_noise=MixPadding.NO_PADDING):
+ """Mixes |signal| and |noise| with a target SNR.
- Mix two signals with a desired SNR by scaling noise (noise).
+ Mix |signal| and |noise| with a desired SNR by scaling |noise|.
If the target SNR is +/- infinite, a copy of signal/noise is returned.
+ If |signal| is shorter than |noise|, the length of the mix equals that of
+ |signal|. Otherwise, the mix length depends on whether padding is applied.
+ When padding is not applied, that is |pad_noise| is set to NO_PADDING
+ (default), the mix length equals that of |noise| - i.e., |signal| is
+ truncated. Otherwise, |noise| is extended and the resulting mix has the same
+ length of |signal|.
Args:
signal: AudioSegment instance (signal).
noise: AudioSegment instance (noise).
target_snr: float, numpy.Inf or -numpy.Inf (dB).
- bln_pad_shortest: if True, it pads the shortest signal with silence at the
- end.
+ pad_noise: SignalProcessingUtils.MixPadding, default: NO_PADDING.
Returns:
An AudioSegment instance.
@@ -310,28 +332,23 @@
raise exceptions.SignalProcessingException(
'cannot mix a signal with -Inf power')
- # Pad signal (if necessary). If noise is the shortest, the AudioSegment
- # overlay() method implictly pads noise. Hence, the only case to handle
- # is signal shorter than noise and bln_pad_shortest True.
- if bln_pad_shortest:
- signal_duration = len(signal)
- noise_duration = len(noise)
- logging.warning('mix signals with padding')
- logging.warning(' signal: %d ms', signal_duration)
- logging.warning(' noise: %d ms', noise_duration)
- padding_duration = noise_duration - signal_duration
- if padding_duration > 0: # That is signal_duration < noise_duration.
- logging.debug(' padding: %d ms', padding_duration)
- padding = pydub.AudioSegment.silent(
- duration=padding_duration,
- frame_rate=signal.frame_rate)
- logging.debug(' signal (pre): %d ms', len(signal))
- signal = signal + padding
- logging.debug(' signal (post): %d ms', len(signal))
-
- # Update power.
- signal_power = float(signal.dBFS)
-
- # Mix signals using the target SNR.
+ # Mix.
gain_db = signal_power - noise_power - target_snr
- return cls.Normalize(signal.overlay(noise.apply_gain(gain_db)))
+ signal_duration = len(signal)
+ noise_duration = len(noise)
+ if signal_duration <= noise_duration:
+ # Ignore |pad_noise|, |noise| is truncated if longer that |signal|, the
+ # mix will have the same length of |signal|.
+ return signal.overlay(noise.apply_gain(gain_db))
+ elif pad_noise == cls.MixPadding.NO_PADDING:
+ # |signal| is longer than |noise|, but no padding is applied to |noise|.
+ # Truncate |signal|.
+ return noise.overlay(signal, gain_during_overlay=gain_db)
+ elif pad_noise == cls.MixPadding.ZERO_PADDING:
+ # TODO(alessiob): Check that this works as expected.
+ return signal.overlay(noise.apply_gain(gain_db))
+ elif pad_noise == cls.MixPadding.LOOP:
+ # |signal| is longer than |noise|, extend |noise| by looping.
+ return signal.overlay(noise.apply_gain(gain_db), loop=True)
+ else:
+ raise exceptions.SignalProcessingException('invalid padding type')
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py
index 3edd538..30ada41 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py
@@ -75,3 +75,112 @@
with self.assertRaises(exceptions.SignalProcessingException):
_ = signal_processing.SignalProcessingUtils.MixSignals(
silence, signal, 0.0)
+
+ def testMixSignalNoiseDifferentLengths(self):
+ # Test signals.
+ shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise(
+ pydub.AudioSegment.silent(duration=1000, frame_rate=8000))
+ longer = signal_processing.SignalProcessingUtils.GenerateWhiteNoise(
+ pydub.AudioSegment.silent(duration=2000, frame_rate=8000))
+
+ # When the signal is shorter than the noise, the mix length always equals
+ # that of the signal regardless of whether padding is applied.
+ # No noise padding, length of signal less than that of noise.
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=shorter,
+ noise=longer,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.NO_PADDING)
+ self.assertEqual(len(shorter), len(mix))
+ # With noise padding, length of signal less than that of noise.
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=shorter,
+ noise=longer,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING)
+ self.assertEqual(len(shorter), len(mix))
+
+ # When the signal is longer than the noise, the mix length depends on
+ # whether padding is applied.
+ # No noise padding, length of signal greater than that of noise.
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=longer,
+ noise=shorter,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.NO_PADDING)
+ self.assertEqual(len(shorter), len(mix))
+ # With noise padding, length of signal greater than that of noise.
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=longer,
+ noise=shorter,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING)
+ self.assertEqual(len(longer), len(mix))
+
+ def testMixSignalNoisePaddingTypes(self):
+ # Test signals.
+ shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise(
+ pydub.AudioSegment.silent(duration=1000, frame_rate=8000))
+ longer = signal_processing.SignalProcessingUtils.GeneratePureTone(
+ pydub.AudioSegment.silent(duration=2000, frame_rate=8000), 440.0)
+
+ # Zero padding: expect pure tone only in 1-2s.
+ mix_zero_pad = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=longer,
+ noise=shorter,
+ target_snr=-6,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING)
+
+ # Loop: expect pure tone plus noise in 1-2s.
+ mix_loop = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=longer,
+ noise=shorter,
+ target_snr=-6,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP)
+
+ def Energy(signal):
+ samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData(
+ signal).astype(np.float32)
+ return np.sum(samples * samples)
+
+ e_mix_zero_pad = Energy(mix_zero_pad[-1000:])
+ e_mix_loop = Energy(mix_loop[-1000:])
+ self.assertLess(0, e_mix_zero_pad)
+ self.assertLess(e_mix_zero_pad, e_mix_loop)
+
+ def testMixSignalSnr(self):
+ # Test signals.
+ tone_low = signal_processing.SignalProcessingUtils.GeneratePureTone(
+ pydub.AudioSegment.silent(duration=64, frame_rate=8000), 250.0)
+ tone_high = signal_processing.SignalProcessingUtils.GeneratePureTone(
+ pydub.AudioSegment.silent(duration=64, frame_rate=8000), 3000.0)
+
+ def ToneAmplitudes(mix):
+ """Returns the amplitude of the coefficients #16 and #192, which
+ correspond to the tones at 250 and 3k Hz respectively."""
+ mix_fft = np.absolute(signal_processing.SignalProcessingUtils.Fft(mix))
+ return mix_fft[16], mix_fft[192]
+
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=tone_low,
+ noise=tone_high,
+ target_snr=-6)
+ ampl_low, ampl_high = ToneAmplitudes(mix)
+ self.assertLess(ampl_low, ampl_high)
+
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=tone_high,
+ noise=tone_low,
+ target_snr=-6)
+ ampl_low, ampl_high = ToneAmplitudes(mix)
+ self.assertLess(ampl_high, ampl_low)
+
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=tone_low,
+ noise=tone_high,
+ target_snr=6)
+ ampl_low, ampl_high = ToneAmplitudes(mix)
+ self.assertLess(ampl_high, ampl_low)
+
+ mix = signal_processing.SignalProcessingUtils.MixSignals(
+ signal=tone_high,
+ noise=tone_low,
+ target_snr=6)
+ ampl_low, ampl_high = ToneAmplitudes(mix)
+ self.assertLess(ampl_low, ampl_high)
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
index 4153f73..8da17a399 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
@@ -394,7 +394,8 @@
if not os.path.exists(noisy_signal_filepath):
# Create noisy signal.
noisy_signal = signal_processing.SignalProcessingUtils.MixSignals(
- input_signal, noise_signal, snr)
+ input_signal, noise_signal, snr,
+ pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP)
# Save.
signal_processing.SignalProcessingUtils.SaveWav(
@@ -489,7 +490,7 @@
if not os.path.exists(noisy_signal_filepath):
# Create noisy signal.
noisy_signal = signal_processing.SignalProcessingUtils.MixSignals(
- input_signal, noise_signal, snr, bln_pad_shortest=True)
+ input_signal, noise_signal, snr)
# Save.
signal_processing.SignalProcessingUtils.SaveWav(