Added a "interleaved_" flag to webrtc::AudioFrame.
And also did some format refactoring on the AudioFrame class, no change on the functionalities on those format refactoring code.
BUG=
TEST=compile
R=andrew@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/2969004
git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@5032 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/modules/interface/module_common_types.h b/modules/interface/module_common_types.h
index bc32b22..0d5e44b 100644
--- a/modules/interface/module_common_types.h
+++ b/modules/interface/module_common_types.h
@@ -729,88 +729,79 @@
* - The +operator assume that you would never add exactly opposite frames when
* deciding the resulting state. To do this use the -operator.
*/
-class AudioFrame
-{
+class AudioFrame {
public:
- // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
- static const int kMaxDataSizeSamples = 3840;
+ // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
+ static const int kMaxDataSizeSamples = 3840;
- enum VADActivity
- {
- kVadActive = 0,
- kVadPassive = 1,
- kVadUnknown = 2
- };
- enum SpeechType
- {
- kNormalSpeech = 0,
- kPLC = 1,
- kCNG = 2,
- kPLCCNG = 3,
- kUndefined = 4
- };
+ enum VADActivity {
+ kVadActive = 0,
+ kVadPassive = 1,
+ kVadUnknown = 2
+ };
+ enum SpeechType {
+ kNormalSpeech = 0,
+ kPLC = 1,
+ kCNG = 2,
+ kPLCCNG = 3,
+ kUndefined = 4
+ };
- AudioFrame();
- virtual ~AudioFrame();
+ AudioFrame();
+ virtual ~AudioFrame() {}
- void UpdateFrame(
- int id,
- uint32_t timestamp,
- const int16_t* data,
- int samples_per_channel,
- int sample_rate_hz,
- SpeechType speech_type,
- VADActivity vad_activity,
- int num_channels = 1,
- uint32_t energy = -1);
+ // |Interleaved_| is assumed to be unchanged with this UpdateFrame() method.
+ void UpdateFrame(
+ int id,
+ uint32_t timestamp,
+ const int16_t* data,
+ int samples_per_channel,
+ int sample_rate_hz,
+ SpeechType speech_type,
+ VADActivity vad_activity,
+ int num_channels = 1,
+ uint32_t energy = -1);
- AudioFrame& Append(const AudioFrame& rhs);
+ AudioFrame& Append(const AudioFrame& rhs);
- void CopyFrom(const AudioFrame& src);
+ void CopyFrom(const AudioFrame& src);
- void Mute();
+ void Mute();
- AudioFrame& operator>>=(const int rhs);
- AudioFrame& operator+=(const AudioFrame& rhs);
- AudioFrame& operator-=(const AudioFrame& rhs);
+ AudioFrame& operator>>=(const int rhs);
+ AudioFrame& operator+=(const AudioFrame& rhs);
+ AudioFrame& operator-=(const AudioFrame& rhs);
- int id_;
- uint32_t timestamp_;
- int16_t data_[kMaxDataSizeSamples];
- int samples_per_channel_;
- int sample_rate_hz_;
- int num_channels_;
- SpeechType speech_type_;
- VADActivity vad_activity_;
- uint32_t energy_;
+ int id_;
+ uint32_t timestamp_;
+ int16_t data_[kMaxDataSizeSamples];
+ int samples_per_channel_;
+ int sample_rate_hz_;
+ int num_channels_;
+ SpeechType speech_type_;
+ VADActivity vad_activity_;
+ uint32_t energy_;
+ bool interleaved_;
private:
- DISALLOW_COPY_AND_ASSIGN(AudioFrame);
+ DISALLOW_COPY_AND_ASSIGN(AudioFrame);
};
inline
AudioFrame::AudioFrame()
- :
- id_(-1),
- timestamp_(0),
- data_(),
- samples_per_channel_(0),
- sample_rate_hz_(0),
- num_channels_(1),
- speech_type_(kUndefined),
- vad_activity_(kVadUnknown),
- energy_(0xffffffff)
-{
-}
+ : id_(-1),
+ timestamp_(0),
+ data_(),
+ samples_per_channel_(0),
+ sample_rate_hz_(0),
+ num_channels_(1),
+ speech_type_(kUndefined),
+ vad_activity_(kVadUnknown),
+ energy_(0xffffffff),
+ interleaved_(true) {}
inline
-AudioFrame::~AudioFrame()
-{
-}
-
-inline
-void
-AudioFrame::UpdateFrame(
+void AudioFrame::UpdateFrame(
int id,
uint32_t timestamp,
const int16_t* data,
@@ -819,229 +810,169 @@
SpeechType speech_type,
VADActivity vad_activity,
int num_channels,
- uint32_t energy)
-{
- id_ = id;
- timestamp_ = timestamp;
- samples_per_channel_ = samples_per_channel;
- sample_rate_hz_ = sample_rate_hz;
- speech_type_ = speech_type;
- vad_activity_ = vad_activity;
- num_channels_ = num_channels;
- energy_ = energy;
+ uint32_t energy) {
+ id_ = id;
+ timestamp_ = timestamp;
+ samples_per_channel_ = samples_per_channel;
+ sample_rate_hz_ = sample_rate_hz;
+ speech_type_ = speech_type;
+ vad_activity_ = vad_activity;
+ num_channels_ = num_channels;
+ energy_ = energy;
- const int length = samples_per_channel * num_channels;
- assert(length <= kMaxDataSizeSamples && length >= 0);
- if(data != NULL)
- {
- memcpy(data_, data, sizeof(int16_t) * length);
- }
- else
- {
- memset(data_, 0, sizeof(int16_t) * length);
- }
+ const int length = samples_per_channel * num_channels;
+ assert(length <= kMaxDataSizeSamples && length >= 0);
+ if(data != NULL) {
+ memcpy(data_, data, sizeof(int16_t) * length);
+ } else {
+ memset(data_, 0, sizeof(int16_t) * length);
+ }
}
-inline void AudioFrame::CopyFrom(const AudioFrame& src)
-{
- if(this == &src)
- {
- return;
- }
- id_ = src.id_;
- timestamp_ = src.timestamp_;
- samples_per_channel_ = src.samples_per_channel_;
- sample_rate_hz_ = src.sample_rate_hz_;
- speech_type_ = src.speech_type_;
- vad_activity_ = src.vad_activity_;
- num_channels_ = src.num_channels_;
- energy_ = src.energy_;
+inline void AudioFrame::CopyFrom(const AudioFrame& src) {
+ if(this == &src)
+ return;
- const int length = samples_per_channel_ * num_channels_;
- assert(length <= kMaxDataSizeSamples && length >= 0);
- memcpy(data_, src.data_, sizeof(int16_t) * length);
+ id_ = src.id_;
+ timestamp_ = src.timestamp_;
+ samples_per_channel_ = src.samples_per_channel_;
+ sample_rate_hz_ = src.sample_rate_hz_;
+ speech_type_ = src.speech_type_;
+ vad_activity_ = src.vad_activity_;
+ num_channels_ = src.num_channels_;
+ energy_ = src.energy_;
+ interleaved_ = src.interleaved_;
+
+ const int length = samples_per_channel_ * num_channels_;
+ assert(length <= kMaxDataSizeSamples && length >= 0);
+ memcpy(data_, src.data_, sizeof(int16_t) * length);
}
inline
-void
-AudioFrame::Mute()
-{
+void AudioFrame::Mute() {
memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
}
inline
-AudioFrame&
-AudioFrame::operator>>=(const int rhs)
-{
- assert((num_channels_ > 0) && (num_channels_ < 3));
- if((num_channels_ > 2) ||
- (num_channels_ < 1))
- {
- return *this;
- }
- for(int i = 0; i < samples_per_channel_ * num_channels_; i++)
- {
- data_[i] = static_cast<int16_t>(data_[i] >> rhs);
- }
+AudioFrame& AudioFrame::operator>>=(const int rhs) {
+ assert((num_channels_ > 0) && (num_channels_ < 3));
+ if((num_channels_ > 2) || (num_channels_ < 1))
return *this;
+
+ for(int i = 0; i < samples_per_channel_ * num_channels_; i++) {
+ data_[i] = static_cast<int16_t>(data_[i] >> rhs);
+ }
+ return *this;
}
inline
-AudioFrame&
-AudioFrame::Append(const AudioFrame& rhs)
-{
- // Sanity check
- assert((num_channels_ > 0) && (num_channels_ < 3));
- if((num_channels_ > 2) ||
- (num_channels_ < 1))
- {
- return *this;
- }
- if(num_channels_ != rhs.num_channels_)
- {
- return *this;
- }
- if((vad_activity_ == kVadActive) ||
- rhs.vad_activity_ == kVadActive)
- {
- vad_activity_ = kVadActive;
- }
- else if((vad_activity_ == kVadUnknown) ||
- rhs.vad_activity_ == kVadUnknown)
- {
- vad_activity_ = kVadUnknown;
- }
- if(speech_type_ != rhs.speech_type_)
- {
- speech_type_ = kUndefined;
- }
-
- int offset = samples_per_channel_ * num_channels_;
- for(int i = 0;
- i < rhs.samples_per_channel_ * rhs.num_channels_;
- i++)
- {
- data_[offset+i] = rhs.data_[i];
- }
- samples_per_channel_ += rhs.samples_per_channel_;
+AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
+ // Sanity check
+ assert((num_channels_ > 0) && (num_channels_ < 3));
+ assert(interleaved_ == rhs.interleaved_);
+ if((num_channels_ > 2) || (num_channels_ < 1))
return *this;
-}
-
-// merge vectors
-inline
-AudioFrame&
-AudioFrame::operator+=(const AudioFrame& rhs)
-{
- // Sanity check
- assert((num_channels_ > 0) && (num_channels_ < 3));
- if((num_channels_ > 2) ||
- (num_channels_ < 1))
- {
- return *this;
- }
- if(num_channels_ != rhs.num_channels_)
- {
- return *this;
- }
- bool noPrevData = false;
- if(samples_per_channel_ != rhs.samples_per_channel_)
- {
- if(samples_per_channel_ == 0)
- {
- // special case we have no data to start with
- samples_per_channel_ = rhs.samples_per_channel_;
- noPrevData = true;
- } else
- {
- return *this;
- }
- }
-
- if((vad_activity_ == kVadActive) ||
- rhs.vad_activity_ == kVadActive)
- {
- vad_activity_ = kVadActive;
- }
- else if((vad_activity_ == kVadUnknown) ||
- rhs.vad_activity_ == kVadUnknown)
- {
- vad_activity_ = kVadUnknown;
- }
-
- if(speech_type_ != rhs.speech_type_)
- {
- speech_type_ = kUndefined;
- }
-
- if(noPrevData)
- {
- memcpy(data_, rhs.data_,
- sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
- } else
- {
- // IMPROVEMENT this can be done very fast in assembly
- for(int i = 0; i < samples_per_channel_ * num_channels_; i++)
- {
- int32_t wrapGuard = static_cast<int32_t>(data_[i]) +
- static_cast<int32_t>(rhs.data_[i]);
- if(wrapGuard < -32768)
- {
- data_[i] = -32768;
- }else if(wrapGuard > 32767)
- {
- data_[i] = 32767;
- }else
- {
- data_[i] = (int16_t)wrapGuard;
- }
- }
- }
- energy_ = 0xffffffff;
+ if(num_channels_ != rhs.num_channels_)
return *this;
+
+ if((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
+ vad_activity_ = kVadActive;
+ } else if(vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
+ vad_activity_ = kVadUnknown;
+ }
+ if(speech_type_ != rhs.speech_type_) {
+ speech_type_ = kUndefined;
+ }
+
+ int offset = samples_per_channel_ * num_channels_;
+ for(int i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) {
+ data_[offset+i] = rhs.data_[i];
+ }
+ samples_per_channel_ += rhs.samples_per_channel_;
+ return *this;
}
inline
-AudioFrame&
-AudioFrame::operator-=(const AudioFrame& rhs)
-{
- // Sanity check
- assert((num_channels_ > 0) && (num_channels_ < 3));
- if((num_channels_ > 2)||
- (num_channels_ < 1))
- {
- return *this;
+AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
+ // Sanity check
+ assert((num_channels_ > 0) && (num_channels_ < 3));
+ assert(interleaved_ == rhs.interleaved_);
+ if((num_channels_ > 2) || (num_channels_ < 1))
+ return *this;
+ if(num_channels_ != rhs.num_channels_)
+ return *this;
+
+ bool noPrevData = false;
+ if(samples_per_channel_ != rhs.samples_per_channel_) {
+ if(samples_per_channel_ == 0) {
+ // special case we have no data to start with
+ samples_per_channel_ = rhs.samples_per_channel_;
+ noPrevData = true;
+ } else {
+ return *this;
}
- if((samples_per_channel_ != rhs.samples_per_channel_) ||
- (num_channels_ != rhs.num_channels_))
- {
- return *this;
- }
- if((vad_activity_ != kVadPassive) ||
- rhs.vad_activity_ != kVadPassive)
- {
- vad_activity_ = kVadUnknown;
- }
+ }
+
+ if((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
+ vad_activity_ = kVadActive;
+ } else if(vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
+ vad_activity_ = kVadUnknown;
+ }
+
+ if(speech_type_ != rhs.speech_type_)
speech_type_ = kUndefined;
- for(int i = 0; i < samples_per_channel_ * num_channels_; i++)
- {
- int32_t wrapGuard = static_cast<int32_t>(data_[i]) -
- static_cast<int32_t>(rhs.data_[i]);
- if(wrapGuard < -32768)
- {
- data_[i] = -32768;
- }
- else if(wrapGuard > 32767)
- {
- data_[i] = 32767;
- }
- else
- {
- data_[i] = (int16_t)wrapGuard;
- }
+ if(noPrevData) {
+ memcpy(data_, rhs.data_,
+ sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
+ } else {
+ // IMPROVEMENT this can be done very fast in assembly
+ for(int i = 0; i < samples_per_channel_ * num_channels_; i++) {
+ int32_t wrapGuard = static_cast<int32_t>(data_[i]) +
+ static_cast<int32_t>(rhs.data_[i]);
+ if(wrapGuard < -32768) {
+ data_[i] = -32768;
+ } else if(wrapGuard > 32767) {
+ data_[i] = 32767;
+ } else {
+ data_[i] = (int16_t)wrapGuard;
+ }
}
- energy_ = 0xffffffff;
+ }
+ energy_ = 0xffffffff;
+ return *this;
+}
+
+inline
+AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
+ // Sanity check
+ assert((num_channels_ > 0) && (num_channels_ < 3));
+ assert(interleaved_ == rhs.interleaved_);
+ if((num_channels_ > 2)|| (num_channels_ < 1))
return *this;
+
+ if((samples_per_channel_ != rhs.samples_per_channel_) ||
+ (num_channels_ != rhs.num_channels_)) {
+ return *this;
+ }
+ if((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) {
+ vad_activity_ = kVadUnknown;
+ }
+ speech_type_ = kUndefined;
+
+ for(int i = 0; i < samples_per_channel_ * num_channels_; i++) {
+ int32_t wrapGuard = static_cast<int32_t>(data_[i]) -
+ static_cast<int32_t>(rhs.data_[i]);
+ if(wrapGuard < -32768) {
+ data_[i] = -32768;
+ } else if(wrapGuard > 32767) {
+ data_[i] = 32767;
+ } else {
+ data_[i] = (int16_t)wrapGuard;
+ }
+ }
+ energy_ = 0xffffffff;
+ return *this;
}
inline bool IsNewerSequenceNumber(uint16_t sequence_number,