#if !defined(__has_feature) || !__has_feature(objc_arc)
#error "This file requires ARC support."
#import <AVFoundation/AVFoundation.h>
#import <Foundation/Foundation.h>
#include "webrtc/modules/audio_device/ios/audio_device_ios.h"
#include "webrtc/base/atomicops.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/criticalsection.h"
#include "webrtc/base/logging.h"
#include "webrtc/base/thread_annotations.h"
#include "webrtc/modules/audio_device/fine_audio_buffer.h"
#include "webrtc/modules/utility/include/helpers_ios.h"
namespace webrtc {
// Protects |g_audio_session_users|.
static rtc::GlobalLockPod g_lock;
// Counts number of users (=instances of this object) who needs an active
// audio session. This variable is used to ensure that we only activate an audio
// session for the first user and deactivate it for the last.
// Member is static to ensure that the value is counted for all instances
// and not per instance.
static int g_audio_session_users GUARDED_BY(g_lock) = 0;
#define LOGI() LOG(LS_INFO) << "AudioDeviceIOS::"
#define LOG_AND_RETURN_IF_ERROR(error, message) \
do { \
OSStatus err = error; \
if (err) { \
LOG(LS_ERROR) << message << ": " << err; \
return false; \
} \
} while (0)
#define LOG_IF_ERROR(error, message) \
do { \
OSStatus err = error; \
if (err) { \
LOG(LS_ERROR) << message << ": " << err; \
} \
} while (0)
// Preferred hardware sample rate (unit is in Hertz). The client sample rate
// will be set to this value as well to avoid resampling the the audio unit's
// format converter. Note that, some devices, e.g. BT headsets, only supports
// 8000Hz as native sample rate.
const double kPreferredSampleRate = 48000.0;
// Use a hardware I/O buffer size (unit is in seconds) that matches the 10ms
// size used by WebRTC. The exact actual size will differ between devices.
// Example: using 48kHz on iPhone 6 results in a native buffer size of
// ~10.6667ms or 512 audio frames per buffer. The FineAudioBuffer instance will
// take care of any buffering required to convert between native buffers and
// buffers used by WebRTC. It is beneficial for the performance if the native
// size is as close to 10ms as possible since it results in "clean" callback
// sequence without bursts of callbacks back to back.
const double kPreferredIOBufferDuration = 0.01;
// Try to use mono to save resources. Also avoids channel format conversion
// in the I/O audio unit. Initial tests have shown that it is possible to use
// mono natively for built-in microphones and for BT headsets but not for
// wired headsets. Wired headsets only support stereo as native channel format
// but it is a low cost operation to do a format conversion to mono in the
// audio unit. Hence, we will not hit a RTC_CHECK in
// VerifyAudioParametersForActiveAudioSession() for a mismatch between the
// preferred number of channels and the actual number of channels.
const int kPreferredNumberOfChannels = 1;
// Number of bytes per audio sample for 16-bit signed integer representation.
const UInt32 kBytesPerSample = 2;
// Hardcoded delay estimates based on real measurements.
// TODO(henrika): these value is not used in combination with built-in AEC.
// Can most likely be removed.
const UInt16 kFixedPlayoutDelayEstimate = 30;
const UInt16 kFixedRecordDelayEstimate = 30;
// Calls to AudioUnitInitialize() can fail if called back-to-back on different
// ADM instances. A fall-back solution is to allow multiple sequential calls
// with as small delay between each. This factor sets the max number of allowed
// initialization attempts.
const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
using ios::CheckAndLogError;
// Verifies that the current audio session supports input audio and that the
// required category and mode are enabled.
static bool VerifyAudioSession(AVAudioSession* session) {
LOG(LS_INFO) << "VerifyAudioSession";
// Ensure that the device currently supports audio input.
if (!session.isInputAvailable) {
LOG(LS_ERROR) << "No audio input path is available!";
return false;
// Ensure that the required category and mode are actually activated.
if (![session.category isEqualToString:AVAudioSessionCategoryPlayAndRecord]) {
<< "Failed to set category to AVAudioSessionCategoryPlayAndRecord";
return false;
if (![session.mode isEqualToString:AVAudioSessionModeVoiceChat]) {
LOG(LS_ERROR) << "Failed to set mode to AVAudioSessionModeVoiceChat";
return false;
return true;
// Activates an audio session suitable for full duplex VoIP sessions when
// |activate| is true. Also sets the preferred sample rate and IO buffer
// duration. Deactivates an active audio session if |activate| is set to false.
static bool ActivateAudioSession(AVAudioSession* session, bool activate)
LOG(LS_INFO) << "ActivateAudioSession(" << activate << ")";
@autoreleasepool {
NSError* error = nil;
BOOL success = NO;
if (!activate) {
// Deactivate the audio session using an extra option and then return.
// AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation is used to
// ensure that other audio sessions that were interrupted by our session
// can return to their active state. It is recommended for VoIP apps to
// use this option.
success = [session
return CheckAndLogError(success, error);
// Go ahead and active our own audio session since |activate| is true.
// Use a category which supports simultaneous recording and playback.
// By default, using this category implies that our app’s audio is
// nonmixable, hence activating the session will interrupt any other
// audio sessions which are also nonmixable.
if (session.category != AVAudioSessionCategoryPlayAndRecord) {
error = nil;
success = [session setCategory:AVAudioSessionCategoryPlayAndRecord
RTC_DCHECK(CheckAndLogError(success, error));
// Specify mode for two-way voice communication (e.g. VoIP).
if (session.mode != AVAudioSessionModeVoiceChat) {
error = nil;
success = [session setMode:AVAudioSessionModeVoiceChat error:&error];
RTC_DCHECK(CheckAndLogError(success, error));
// Set the session's sample rate or the hardware sample rate.
// It is essential that we use the same sample rate as stream format
// to ensure that the I/O unit does not have to do sample rate conversion.
error = nil;
success =
[session setPreferredSampleRate:kPreferredSampleRate error:&error];
RTC_DCHECK(CheckAndLogError(success, error));
// Set the preferred audio I/O buffer duration, in seconds.
error = nil;
success = [session setPreferredIOBufferDuration:kPreferredIOBufferDuration
RTC_DCHECK(CheckAndLogError(success, error));
// Activate the audio session. Activation can fail if another active audio
// session (e.g. phone call) has higher priority than ours.
error = nil;
success = [session setActive:YES error:&error];
if (!CheckAndLogError(success, error)) {
return false;
// Ensure that the active audio session has the correct category and mode.
if (!VerifyAudioSession(session)) {
LOG(LS_ERROR) << "Failed to verify audio session category and mode";
return false;
// Try to set the preferred number of hardware audio channels. These calls
// must be done after setting the audio session’s category and mode and
// activating the session.
// We try to use mono in both directions to save resources and format
// conversions in the audio unit. Some devices does only support stereo;
// e.g. wired headset on iPhone 6.
// TODO(henrika): add support for stereo if needed.
error = nil;
success =
[session setPreferredInputNumberOfChannels:kPreferredNumberOfChannels
RTC_DCHECK(CheckAndLogError(success, error));
error = nil;
success =
[session setPreferredOutputNumberOfChannels:kPreferredNumberOfChannels
RTC_DCHECK(CheckAndLogError(success, error));
return true;
// An application can create more than one ADM and start audio streaming
// for all of them. It is essential that we only activate the app's audio
// session once (for the first one) and deactivate it once (for the last).
static bool ActivateAudioSession() {
LOGI() << "ActivateAudioSession";
rtc::GlobalLockScope ls(&g_lock);
if (g_audio_session_users == 0) {
// The system provides an audio session object upon launch of an
// application. However, we must initialize the session in order to
// handle interruptions. Implicit initialization occurs when obtaining
// a reference to the AVAudioSession object.
AVAudioSession* session = [AVAudioSession sharedInstance];
// Try to activate the audio session and ask for a set of preferred audio
// parameters.
if (!ActivateAudioSession(session, true)) {
LOG(LS_ERROR) << "Failed to activate the audio session";
return false;
LOG(LS_INFO) << "The audio session is now activated";
LOG(LS_INFO) << "Number of audio session users: " << g_audio_session_users;
return true;
// If more than one object is using the audio session, ensure that only the
// last object deactivates. Apple recommends: "activate your audio session
// only as needed and deactivate it when you are not using audio".
static bool DeactivateAudioSession() {
LOGI() << "DeactivateAudioSession";
rtc::GlobalLockScope ls(&g_lock);
if (g_audio_session_users == 1) {
AVAudioSession* session = [AVAudioSession sharedInstance];
if (!ActivateAudioSession(session, false)) {
LOG(LS_ERROR) << "Failed to deactivate the audio session";
return false;
LOG(LS_INFO) << "Our audio session is now deactivated";
LOG(LS_INFO) << "Number of audio session users: " << g_audio_session_users;
return true;
#if !defined(NDEBUG)
// Helper method for printing out an AudioStreamBasicDescription structure.
static void LogABSD(AudioStreamBasicDescription absd) {
char formatIDString[5];
UInt32 formatID = CFSwapInt32HostToBig(absd.mFormatID);
bcopy(&formatID, formatIDString, 4);
formatIDString[4] = '\0';
LOG(LS_INFO) << " sample rate: " << absd.mSampleRate;
LOG(LS_INFO) << " format ID: " << formatIDString;
LOG(LS_INFO) << " format flags: " << std::hex << absd.mFormatFlags;
LOG(LS_INFO) << " bytes per packet: " << absd.mBytesPerPacket;
LOG(LS_INFO) << " frames per packet: " << absd.mFramesPerPacket;
LOG(LS_INFO) << " bytes per frame: " << absd.mBytesPerFrame;
LOG(LS_INFO) << " channels per packet: " << absd.mChannelsPerFrame;
LOG(LS_INFO) << " bits per channel: " << absd.mBitsPerChannel;
LOG(LS_INFO) << " reserved: " << absd.mReserved;
// Helper method that logs essential device information strings.
static void LogDeviceInfo() {
LOG(LS_INFO) << "LogDeviceInfo";
@autoreleasepool {
LOG(LS_INFO) << " system name: " << ios::GetSystemName();
LOG(LS_INFO) << " system version: " << ios::GetSystemVersion();
LOG(LS_INFO) << " device type: " << ios::GetDeviceType();
LOG(LS_INFO) << " device name: " << ios::GetDeviceName();
#endif // !defined(NDEBUG)
: audio_device_buffer_(nullptr),
route_change_observer_(nullptr) {
LOGI() << "ctor" << ios::GetCurrentThreadDescription();
AudioDeviceIOS::~AudioDeviceIOS() {
LOGI() << "~dtor" << ios::GetCurrentThreadDescription();
void AudioDeviceIOS::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) {
LOGI() << "AttachAudioBuffer";
audio_device_buffer_ = audioBuffer;
int32_t AudioDeviceIOS::Init() {
LOGI() << "Init";
if (initialized_) {
return 0;
#if !defined(NDEBUG)
// Store the preferred sample rate and preferred number of channels already
// here. They have not been set and confirmed yet since ActivateAudioSession()
// is not called until audio is about to start. However, it makes sense to
// store the parameters now and then verify at a later stage.
playout_parameters_.reset(kPreferredSampleRate, kPreferredNumberOfChannels);
record_parameters_.reset(kPreferredSampleRate, kPreferredNumberOfChannels);
// Ensure that the audio device buffer (ADB) knows about the internal audio
// parameters. Note that, even if we are unable to get a mono audio session,
// we will always tell the I/O audio unit to do a channel format conversion
// to guarantee mono on the "input side" of the audio unit.
initialized_ = true;
return 0;
int32_t AudioDeviceIOS::Terminate() {
LOGI() << "Terminate";
if (!initialized_) {
return 0;
initialized_ = false;
rtc::GlobalLockScope ls(&g_lock);
if (g_audio_session_users != 0) {
LOG(LS_WARNING) << "Object is destructed with an active audio session";
RTC_DCHECK_GE(g_audio_session_users, 0);
return 0;
int32_t AudioDeviceIOS::InitPlayout() {
LOGI() << "InitPlayout";
if (!rec_is_initialized_) {
if (!InitPlayOrRecord()) {
LOG_F(LS_ERROR) << "InitPlayOrRecord failed for InitPlayout!";
return -1;
play_is_initialized_ = true;
return 0;
int32_t AudioDeviceIOS::InitRecording() {
LOGI() << "InitRecording";
if (!play_is_initialized_) {
if (!InitPlayOrRecord()) {
LOG_F(LS_ERROR) << "InitPlayOrRecord failed for InitRecording!";
return -1;
rec_is_initialized_ = true;
return 0;
int32_t AudioDeviceIOS::StartPlayout() {
LOGI() << "StartPlayout";
if (!recording_) {
OSStatus result = AudioOutputUnitStart(vpio_unit_);
if (result != noErr) {
LOG_F(LS_ERROR) << "AudioOutputUnitStart failed for StartPlayout: "
<< result;
return -1;
LOG(LS_INFO) << "Voice-Processing I/O audio unit is now started";
rtc::AtomicOps::ReleaseStore(&playing_, 1);
return 0;
int32_t AudioDeviceIOS::StopPlayout() {
LOGI() << "StopPlayout";
if (!play_is_initialized_ || !playing_) {
return 0;
if (!recording_) {
play_is_initialized_ = false;
rtc::AtomicOps::ReleaseStore(&playing_, 0);
return 0;
int32_t AudioDeviceIOS::StartRecording() {
LOGI() << "StartRecording";
if (!playing_) {
OSStatus result = AudioOutputUnitStart(vpio_unit_);
if (result != noErr) {
LOG_F(LS_ERROR) << "AudioOutputUnitStart failed for StartRecording: "
<< result;
return -1;
LOG(LS_INFO) << "Voice-Processing I/O audio unit is now started";
rtc::AtomicOps::ReleaseStore(&recording_, 1);
return 0;
int32_t AudioDeviceIOS::StopRecording() {
LOGI() << "StopRecording";
if (!rec_is_initialized_ || !recording_) {
return 0;
if (!playing_) {
rec_is_initialized_ = false;
rtc::AtomicOps::ReleaseStore(&recording_, 0);
return 0;
// Change the default receiver playout route to speaker.
int32_t AudioDeviceIOS::SetLoudspeakerStatus(bool enable) {
LOGI() << "SetLoudspeakerStatus(" << enable << ")";
AVAudioSession* session = [AVAudioSession sharedInstance];
NSString* category = session.category;
AVAudioSessionCategoryOptions options = session.categoryOptions;
// Respect old category options if category is
// AVAudioSessionCategoryPlayAndRecord. Otherwise reset it since old options
// might not be valid for this category.
if ([category isEqualToString:AVAudioSessionCategoryPlayAndRecord]) {
if (enable) {
options |= AVAudioSessionCategoryOptionDefaultToSpeaker;
} else {
options &= ~AVAudioSessionCategoryOptionDefaultToSpeaker;
} else {
options = AVAudioSessionCategoryOptionDefaultToSpeaker;
NSError* error = nil;
BOOL success = [session setCategory:AVAudioSessionCategoryPlayAndRecord
ios::CheckAndLogError(success, error);
return (error == nil) ? 0 : -1;
int32_t AudioDeviceIOS::GetLoudspeakerStatus(bool& enabled) const {
LOGI() << "GetLoudspeakerStatus";
AVAudioSession* session = [AVAudioSession sharedInstance];
AVAudioSessionCategoryOptions options = session.categoryOptions;
enabled = options & AVAudioSessionCategoryOptionDefaultToSpeaker;
return 0;
int32_t AudioDeviceIOS::PlayoutDelay(uint16_t& delayMS) const {
delayMS = kFixedPlayoutDelayEstimate;
return 0;
int32_t AudioDeviceIOS::RecordingDelay(uint16_t& delayMS) const {
delayMS = kFixedRecordDelayEstimate;
return 0;
int AudioDeviceIOS::GetPlayoutAudioParameters(AudioParameters* params) const {
LOGI() << "GetPlayoutAudioParameters";
*params = playout_parameters_;
return 0;
int AudioDeviceIOS::GetRecordAudioParameters(AudioParameters* params) const {
LOGI() << "GetRecordAudioParameters";
*params = record_parameters_;
return 0;
void AudioDeviceIOS::UpdateAudioDeviceBuffer() {
LOGI() << "UpdateAudioDevicebuffer";
// AttachAudioBuffer() is called at construction by the main class but check
// just in case.
RTC_DCHECK(audio_device_buffer_) << "AttachAudioBuffer must be called first";
// Inform the audio device buffer (ADB) about the new audio format.
void AudioDeviceIOS::RegisterNotificationObservers() {
LOGI() << "RegisterNotificationObservers";
// This code block will be called when AVAudioSessionInterruptionNotification
// is observed.
void (^interrupt_block)(NSNotification*) = ^(NSNotification* notification) {
NSNumber* type_number =
AVAudioSessionInterruptionType type =
LOG(LS_INFO) << "Audio session interruption:";
switch (type) {
case AVAudioSessionInterruptionTypeBegan:
// The system has deactivated our audio session.
// Stop the active audio unit.
LOG(LS_INFO) << " Began => stopping the audio unit";
"Failed to stop the the Voice-Processing I/O unit");
case AVAudioSessionInterruptionTypeEnded:
// The interruption has ended. Restart the audio session and start the
// initialized audio unit again.
LOG(LS_INFO) << " Ended => restarting audio session and audio unit";
NSError* error = nil;
BOOL success = NO;
AVAudioSession* session = [AVAudioSession sharedInstance];
success = [session setActive:YES error:&error];
if (CheckAndLogError(success, error)) {
"Failed to start the the Voice-Processing I/O unit");
// This code block will be called when AVAudioSessionRouteChangeNotification
// is observed.
void (^route_change_block)(NSNotification*) =
^(NSNotification* notification) {
// Get reason for current route change.
NSNumber* reason_number =
AVAudioSessionRouteChangeReason reason =
bool valid_route_change = true;
LOG(LS_INFO) << "Route change:";
switch (reason) {
case AVAudioSessionRouteChangeReasonUnknown:
LOG(LS_INFO) << " ReasonUnknown";
case AVAudioSessionRouteChangeReasonNewDeviceAvailable:
LOG(LS_INFO) << " NewDeviceAvailable";
case AVAudioSessionRouteChangeReasonOldDeviceUnavailable:
LOG(LS_INFO) << " OldDeviceUnavailable";
case AVAudioSessionRouteChangeReasonCategoryChange:
LOG(LS_INFO) << " CategoryChange";
LOG(LS_INFO) << " New category: " << ios::GetAudioSessionCategory();
// Don't see this as route change since it can be triggered in
// combination with session interruptions as well.
valid_route_change = false;
case AVAudioSessionRouteChangeReasonOverride:
LOG(LS_INFO) << " Override";
case AVAudioSessionRouteChangeReasonWakeFromSleep:
LOG(LS_INFO) << " WakeFromSleep";
case AVAudioSessionRouteChangeReasonNoSuitableRouteForCategory:
LOG(LS_INFO) << " NoSuitableRouteForCategory";
case AVAudioSessionRouteChangeReasonRouteConfigurationChange:
// Ignore this type of route change since we are focusing
// on detecting headset changes.
LOG(LS_INFO) << " RouteConfigurationChange";
valid_route_change = false;
if (valid_route_change) {
// Log previous route configuration.
AVAudioSessionRouteDescription* prev_route =
LOG(LS_INFO) << "Previous route:";
LOG(LS_INFO) << ios::StdStringFromNSString(
[NSString stringWithFormat:@"%@", prev_route]);
// Only restart audio for a valid route change and if the
// session sample rate has changed.
AVAudioSession* session = [AVAudioSession sharedInstance];
const double session_sample_rate = session.sampleRate;
LOG(LS_INFO) << "session sample rate: " << session_sample_rate;
if (playout_parameters_.sample_rate() != session_sample_rate) {
if (!RestartAudioUnitWithNewFormat(session_sample_rate)) {
LOG(LS_ERROR) << "Audio restart failed";
// Get the default notification center of the current process.
NSNotificationCenter* center = [NSNotificationCenter defaultCenter];
// Add AVAudioSessionInterruptionNotification observer.
id interruption_observer =
[center addObserverForName:AVAudioSessionInterruptionNotification
queue:[NSOperationQueue mainQueue]
// Add AVAudioSessionRouteChangeNotification observer.
id route_change_observer =
[center addObserverForName:AVAudioSessionRouteChangeNotification
queue:[NSOperationQueue mainQueue]
// Increment refcount on observers using ARC bridge. Instance variable is a
// void* instead of an id because header is included in other pure C++
// files.
audio_interruption_observer_ = (__bridge_retained void*)interruption_observer;
route_change_observer_ = (__bridge_retained void*)route_change_observer;
void AudioDeviceIOS::UnregisterNotificationObservers() {
LOGI() << "UnregisterNotificationObservers";
// Transfer ownership of observer back to ARC, which will deallocate the
// observer once it exits this scope.
NSNotificationCenter* center = [NSNotificationCenter defaultCenter];
if (audio_interruption_observer_ != nullptr) {
id observer = (__bridge_transfer id)audio_interruption_observer_;
[center removeObserver:observer];
audio_interruption_observer_ = nullptr;
if (route_change_observer_ != nullptr) {
id observer = (__bridge_transfer id)route_change_observer_;
[center removeObserver:observer];
route_change_observer_ = nullptr;
void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() {
LOGI() << "SetupAudioBuffersForActiveAudioSession";
// Verify the current values once the audio session has been activated.
AVAudioSession* session = [AVAudioSession sharedInstance];
LOG(LS_INFO) << " sample rate: " << session.sampleRate;
LOG(LS_INFO) << " IO buffer duration: " << session.IOBufferDuration;
LOG(LS_INFO) << " output channels: " << session.outputNumberOfChannels;
LOG(LS_INFO) << " input channels: " << session.inputNumberOfChannels;
LOG(LS_INFO) << " output latency: " << session.outputLatency;
LOG(LS_INFO) << " input latency: " << session.inputLatency;
// Log a warning message for the case when we are unable to set the preferred
// hardware sample rate but continue and use the non-ideal sample rate after
// reinitializing the audio parameters. Most BT headsets only support 8kHz or
// 16kHz.
if (session.sampleRate != kPreferredSampleRate) {
LOG(LS_WARNING) << "Unable to set the preferred sample rate";
// At this stage, we also know the exact IO buffer duration and can add
// that info to the existing audio parameters where it is converted into
// number of audio frames.
// Example: IO buffer size = 0.008 seconds <=> 128 audio frames at 16kHz.
// Hence, 128 is the size we expect to see in upcoming render callbacks.
playout_parameters_.reset(session.sampleRate, playout_parameters_.channels(),
record_parameters_.reset(session.sampleRate, record_parameters_.channels(),
LOG(LS_INFO) << " frames per I/O buffer: "
<< playout_parameters_.frames_per_buffer();
LOG(LS_INFO) << " bytes per I/O buffer: "
<< playout_parameters_.GetBytesPerBuffer();
// Update the ADB parameters since the sample rate might have changed.
// Create a modified audio buffer class which allows us to ask for,
// or deliver, any number of samples (and not only multiple of 10ms) to match
// the native audio unit buffer size.
fine_audio_buffer_.reset(new FineAudioBuffer(
audio_device_buffer_, playout_parameters_.GetBytesPerBuffer(),
// The extra/temporary playoutbuffer must be of this size to avoid
// unnecessary memcpy while caching data between successive callbacks.
const int required_playout_buffer_size =
LOG(LS_INFO) << " required playout buffer size: "
<< required_playout_buffer_size;
playout_audio_buffer_.reset(new SInt8[required_playout_buffer_size]);
// Allocate AudioBuffers to be used as storage for the received audio.
// The AudioBufferList structure works as a placeholder for the
// AudioBuffer structure, which holds a pointer to the actual data buffer
// in |record_audio_buffer_|. Recorded audio will be rendered into this memory
// at each input callback when calling AudioUnitRender().
const int data_byte_size = record_parameters_.GetBytesPerBuffer();
record_audio_buffer_.reset(new SInt8[data_byte_size]);
audio_record_buffer_list_.mNumberBuffers = 1;
AudioBuffer* audio_buffer = &audio_record_buffer_list_.mBuffers[0];
audio_buffer->mNumberChannels = record_parameters_.channels();
audio_buffer->mDataByteSize = data_byte_size;
audio_buffer->mData = record_audio_buffer_.get();
bool AudioDeviceIOS::SetupAndInitializeVoiceProcessingAudioUnit() {
LOGI() << "SetupAndInitializeVoiceProcessingAudioUnit";
RTC_DCHECK(!vpio_unit_) << "VoiceProcessingIO audio unit already exists";
// Create an audio component description to identify the Voice-Processing
// I/O audio unit.
AudioComponentDescription vpio_unit_description;
vpio_unit_description.componentType = kAudioUnitType_Output;
vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
vpio_unit_description.componentFlags = 0;
vpio_unit_description.componentFlagsMask = 0;
// Obtain an audio unit instance given the description.
AudioComponent found_vpio_unit_ref =
AudioComponentFindNext(nullptr, &vpio_unit_description);
// Create a Voice-Processing IO audio unit.
OSStatus result = noErr;
result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
if (result != noErr) {
vpio_unit_ = nullptr;
LOG(LS_ERROR) << "AudioComponentInstanceNew failed: " << result;
return false;
// A VP I/O unit's bus 1 connects to input hardware (microphone). Enable
// input on the input scope of the input element.
AudioUnitElement input_bus = 1;
UInt32 enable_input = 1;
result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input, input_bus, &enable_input,
if (result != noErr) {
LOG(LS_ERROR) << "Failed to enable input on input scope of input element: "
<< result;
return false;
// A VP I/O unit's bus 0 connects to output hardware (speaker). Enable
// output on the output scope of the output element.
AudioUnitElement output_bus = 0;
UInt32 enable_output = 1;
result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output, output_bus,
&enable_output, sizeof(enable_output));
if (result != noErr) {
<< "Failed to enable output on output scope of output element: "
<< result;
return false;
// Set the application formats for input and output:
// - use same format in both directions
// - avoid resampling in the I/O unit by using the hardware sample rate
// - linear PCM => noncompressed audio data format with one frame per packet
// - no need to specify interleaving since only mono is supported
AudioStreamBasicDescription application_format = {0};
UInt32 size = sizeof(application_format);
RTC_DCHECK_EQ(1, kPreferredNumberOfChannels);
application_format.mSampleRate = playout_parameters_.sample_rate();
application_format.mFormatID = kAudioFormatLinearPCM;
application_format.mFormatFlags =
kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
application_format.mBytesPerPacket = kBytesPerSample;
application_format.mFramesPerPacket = 1; // uncompressed
application_format.mBytesPerFrame = kBytesPerSample;
application_format.mChannelsPerFrame = kPreferredNumberOfChannels;
application_format.mBitsPerChannel = 8 * kBytesPerSample;
// Store the new format.
application_format_ = application_format;
#if !defined(NDEBUG)
// Set the application format on the output scope of the input element/bus.
result = AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output, input_bus,
&application_format, size);
if (result != noErr) {
<< "Failed to set application format on output scope of input bus: "
<< result;
return false;
// Set the application format on the input scope of the output element/bus.
result = AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input, output_bus,
&application_format, size);
if (result != noErr) {
<< "Failed to set application format on input scope of output bus: "
<< result;
return false;
// Specify the callback function that provides audio samples to the audio
// unit.
AURenderCallbackStruct render_callback;
render_callback.inputProc = GetPlayoutData;
render_callback.inputProcRefCon = this;
result = AudioUnitSetProperty(
vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
output_bus, &render_callback, sizeof(render_callback));
if (result != noErr) {
LOG(LS_ERROR) << "Failed to specify the render callback on the output bus: "
<< result;
return false;
// Disable AU buffer allocation for the recorder, we allocate our own.
// TODO(henrika): not sure that it actually saves resource to make this call.
UInt32 flag = 0;
result = AudioUnitSetProperty(
vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
kAudioUnitScope_Output, input_bus, &flag, sizeof(flag));
if (result != noErr) {
LOG(LS_ERROR) << "Failed to disable buffer allocation on the input bus: "
<< result;
// Specify the callback to be called by the I/O thread to us when input audio
// is available. The recorded samples can then be obtained by calling the
// AudioUnitRender() method.
AURenderCallbackStruct input_callback;
input_callback.inputProc = RecordedDataIsAvailable;
input_callback.inputProcRefCon = this;
result = AudioUnitSetProperty(vpio_unit_,
kAudioUnitScope_Global, input_bus,
&input_callback, sizeof(input_callback));
if (result != noErr) {
LOG(LS_ERROR) << "Failed to specify the input callback on the input bus: "
<< result;
// Initialize the Voice-Processing I/O unit instance.
// Calls to AudioUnitInitialize() can fail if called back-to-back on
// different ADM instances. The error message in this case is -66635 which is
// undocumented. Tests have shown that calling AudioUnitInitialize a second
// time, after a short sleep, avoids this issue.
// See webrtc:5166 for details.
int failed_initalize_attempts = 0;
result = AudioUnitInitialize(vpio_unit_);
while (result != noErr) {
LOG(LS_ERROR) << "Failed to initialize the Voice-Processing I/O unit: "
<< result;
if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
// Max number of initialization attempts exceeded, hence abort.
LOG(LS_WARNING) << "Too many initialization attempts";
return false;
LOG(LS_INFO) << "pause 100ms and try audio unit initialization again...";
[NSThread sleepForTimeInterval:0.1f];
result = AudioUnitInitialize(vpio_unit_);
LOG(LS_INFO) << "Voice-Processing I/O unit is now initialized";
return true;
bool AudioDeviceIOS::RestartAudioUnitWithNewFormat(float sample_rate) {
LOGI() << "RestartAudioUnitWithNewFormat(sample_rate=" << sample_rate << ")";
// Stop the active audio unit.
"Failed to stop the the Voice-Processing I/O unit");
// The stream format is about to be changed and it requires that we first
// uninitialize it to deallocate its resources.
"Failed to uninitialize the the Voice-Processing I/O unit");
// Allocate new buffers given the new stream format.
// Update the existing application format using the new sample rate.
application_format_.mSampleRate = playout_parameters_.sample_rate();
UInt32 size = sizeof(application_format_);
AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output, 1, &application_format_, size);
AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input, 0, &application_format_, size);
// Prepare the audio unit to render audio again.
"Failed to initialize the Voice-Processing I/O unit");
// Start rendering audio using the new format.
"Failed to start the Voice-Processing I/O unit");
return true;
bool AudioDeviceIOS::InitPlayOrRecord() {
LOGI() << "InitPlayOrRecord";
// Activate the audio session if not already activated.
if (!ActivateAudioSession()) {
return false;
// Ensure that the active audio session has the correct category and mode.
AVAudioSession* session = [AVAudioSession sharedInstance];
if (!VerifyAudioSession(session)) {
LOG(LS_ERROR) << "Failed to verify audio session category and mode";
return false;
// Start observing audio session interruptions and route changes.
// Ensure that we got what what we asked for in our active audio session.
// Create, setup and initialize a new Voice-Processing I/O unit.
if (!SetupAndInitializeVoiceProcessingAudioUnit()) {
// Reduce usage count for the audio session and possibly deactivate it if
// this object is the only user.
return false;
return true;
void AudioDeviceIOS::ShutdownPlayOrRecord() {
LOGI() << "ShutdownPlayOrRecord";
// Close and delete the voice-processing I/O unit.
OSStatus result = -1;
if (nullptr != vpio_unit_) {
result = AudioOutputUnitStop(vpio_unit_);
if (result != noErr) {
LOG_F(LS_ERROR) << "AudioOutputUnitStop failed: " << result;
result = AudioUnitUninitialize(vpio_unit_);
if (result != noErr) {
LOG_F(LS_ERROR) << "AudioUnitUninitialize failed: " << result;
// Remove audio session notification observers.
// All I/O should be stopped or paused prior to deactivating the audio
// session, hence we deactivate as last action.
void AudioDeviceIOS::DisposeAudioUnit() {
if (nullptr == vpio_unit_)
OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
if (result != noErr) {
LOG(LS_ERROR) << "AudioComponentInstanceDispose failed:" << result;
vpio_unit_ = nullptr;
OSStatus AudioDeviceIOS::RecordedDataIsAvailable(
void* in_ref_con,
AudioUnitRenderActionFlags* io_action_flags,
const AudioTimeStamp* in_time_stamp,
UInt32 in_bus_number,
UInt32 in_number_frames,
AudioBufferList* io_data) {
RTC_DCHECK_EQ(1u, in_bus_number);
!io_data); // no buffer should be allocated for input at this stage
AudioDeviceIOS* audio_device_ios = static_cast<AudioDeviceIOS*>(in_ref_con);
return audio_device_ios->OnRecordedDataIsAvailable(
io_action_flags, in_time_stamp, in_bus_number, in_number_frames);
OSStatus AudioDeviceIOS::OnRecordedDataIsAvailable(
AudioUnitRenderActionFlags* io_action_flags,
const AudioTimeStamp* in_time_stamp,
UInt32 in_bus_number,
UInt32 in_number_frames) {
OSStatus result = noErr;
// Simply return if recording is not enabled.
if (!rtc::AtomicOps::AcquireLoad(&recording_))
return result;
if (in_number_frames != record_parameters_.frames_per_buffer()) {
// We have seen short bursts (1-2 frames) where |in_number_frames| changes.
// Add a log to keep track of longer sequences if that should ever happen.
LOG(LS_WARNING) << "in_number_frames (" << in_number_frames
<< ") != " << record_parameters_.frames_per_buffer();
// Obtain the recorded audio samples by initiating a rendering cycle.
// Since it happens on the input bus, the |io_data| parameter is a reference
// to the preallocated audio buffer list that the audio unit renders into.
// TODO(henrika): should error handling be improved?
AudioBufferList* io_data = &audio_record_buffer_list_;
result = AudioUnitRender(vpio_unit_, io_action_flags, in_time_stamp,
in_bus_number, in_number_frames, io_data);
if (result != noErr) {
LOG_F(LS_ERROR) << "AudioOutputUnitStart failed: " << result;
return result;
// Get a pointer to the recorded audio and send it to the WebRTC ADB.
// Use the FineAudioBuffer instance to convert between native buffer size
// and the 10ms buffer size used by WebRTC.
const UInt32 data_size_in_bytes = io_data->mBuffers[0].mDataByteSize;
RTC_CHECK_EQ(data_size_in_bytes / kBytesPerSample, in_number_frames);
SInt8* data = static_cast<SInt8*>(io_data->mBuffers[0].mData);
fine_audio_buffer_->DeliverRecordedData(data, data_size_in_bytes,
return noErr;
OSStatus AudioDeviceIOS::GetPlayoutData(
void* in_ref_con,
AudioUnitRenderActionFlags* io_action_flags,
const AudioTimeStamp* in_time_stamp,
UInt32 in_bus_number,
UInt32 in_number_frames,
AudioBufferList* io_data) {
RTC_DCHECK_EQ(0u, in_bus_number);
AudioDeviceIOS* audio_device_ios = static_cast<AudioDeviceIOS*>(in_ref_con);
return audio_device_ios->OnGetPlayoutData(io_action_flags, in_number_frames,
OSStatus AudioDeviceIOS::OnGetPlayoutData(
AudioUnitRenderActionFlags* io_action_flags,
UInt32 in_number_frames,
AudioBufferList* io_data) {
// Verify 16-bit, noninterleaved mono PCM signal format.
RTC_DCHECK_EQ(1u, io_data->mNumberBuffers);
RTC_DCHECK_EQ(1u, io_data->mBuffers[0].mNumberChannels);
// Get pointer to internal audio buffer to which new audio data shall be
// written.
const UInt32 dataSizeInBytes = io_data->mBuffers[0].mDataByteSize;
RTC_CHECK_EQ(dataSizeInBytes / kBytesPerSample, in_number_frames);
SInt8* destination = static_cast<SInt8*>(io_data->mBuffers[0].mData);
// Produce silence and give audio unit a hint about it if playout is not
// activated.
if (!rtc::AtomicOps::AcquireLoad(&playing_)) {
*io_action_flags |= kAudioUnitRenderAction_OutputIsSilence;
memset(destination, 0, dataSizeInBytes);
return noErr;
// Read decoded 16-bit PCM samples from WebRTC (using a size that matches
// the native I/O audio unit) to a preallocated intermediate buffer and
// copy the result to the audio buffer in the |io_data| destination.
SInt8* source = playout_audio_buffer_.get();
memcpy(destination, source, dataSizeInBytes);
return noErr;
} // namespace webrtc