Update multiplex encoder to support having augmenting data attached to the video

Multiplex encoder is now supporting attaching user-defined data to the video
frame. This data will be sent with the video frame and thus is guaranteed to
be synchronized. This is useful in cases where the data and video frame need
to by synchronized such as sending information about 3D objects or camera
tracking information with the video stream

Multiplex Encoder with data is implemented in a modular way. A new
VideoFrameBuffer type is created with the encoder. AugmentedVideoFrameBuffer
holds the video frame and the data. MultiplexVideoEncoder encodes both
the frame and data.

Change-Id: I23263f70d111f6f1783c070edec70bd11ebb9868
Bug: webrtc:9632
Reviewed-on: https://webrtc-review.googlesource.com/92642
Commit-Queue: Tarek Hefny <tarekh@google.com>
Reviewed-by: Niklas Enbom <niklas.enbom@webrtc.org>
Reviewed-by: Emircan Uysaler <emircan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24297}
diff --git a/media/engine/multiplexcodecfactory.cc b/media/engine/multiplexcodecfactory.cc
index 23d34f9..236a2e8 100644
--- a/media/engine/multiplexcodecfactory.cc
+++ b/media/engine/multiplexcodecfactory.cc
@@ -33,8 +33,10 @@
 constexpr const char* kMultiplexAssociatedCodecName = cricket::kVp9CodecName;
 
 MultiplexEncoderFactory::MultiplexEncoderFactory(
-    std::unique_ptr<VideoEncoderFactory> factory)
-    : factory_(std::move(factory)) {}
+    std::unique_ptr<VideoEncoderFactory> factory,
+    bool supports_augmenting_data)
+    : factory_(std::move(factory)),
+      supports_augmenting_data_(supports_augmenting_data) {}
 
 std::vector<SdpVideoFormat> MultiplexEncoderFactory::GetSupportedFormats()
     const {
@@ -72,13 +74,15 @@
   }
   SdpVideoFormat associated_format = format;
   associated_format.name = it->second;
-  return std::unique_ptr<VideoEncoder>(
-      new MultiplexEncoderAdapter(factory_.get(), associated_format));
+  return std::unique_ptr<VideoEncoder>(new MultiplexEncoderAdapter(
+      factory_.get(), associated_format, supports_augmenting_data_));
 }
 
 MultiplexDecoderFactory::MultiplexDecoderFactory(
-    std::unique_ptr<VideoDecoderFactory> factory)
-    : factory_(std::move(factory)) {}
+    std::unique_ptr<VideoDecoderFactory> factory,
+    bool supports_augmenting_data)
+    : factory_(std::move(factory)),
+      supports_augmenting_data_(supports_augmenting_data) {}
 
 std::vector<SdpVideoFormat> MultiplexDecoderFactory::GetSupportedFormats()
     const {
@@ -107,8 +111,8 @@
   }
   SdpVideoFormat associated_format = format;
   associated_format.name = it->second;
-  return std::unique_ptr<VideoDecoder>(
-      new MultiplexDecoderAdapter(factory_.get(), associated_format));
+  return std::unique_ptr<VideoDecoder>(new MultiplexDecoderAdapter(
+      factory_.get(), associated_format, supports_augmenting_data_));
 }
 
 }  // namespace webrtc
diff --git a/media/engine/multiplexcodecfactory.h b/media/engine/multiplexcodecfactory.h
index c622af5..030904f 100644
--- a/media/engine/multiplexcodecfactory.h
+++ b/media/engine/multiplexcodecfactory.h
@@ -21,8 +21,12 @@
 
 class MultiplexEncoderFactory : public VideoEncoderFactory {
  public:
-  explicit MultiplexEncoderFactory(
-      std::unique_ptr<VideoEncoderFactory> factory);
+  // supports_augmenting_data defines if the encoder would support augmenting
+  // data in that case the encoder expects video frame buffer of type
+  // AugmentedVideoFrameBuffer the encoder would encode the attached buffer and
+  // data together if the flag is not set any frame buffer can be passed in
+  MultiplexEncoderFactory(std::unique_ptr<VideoEncoderFactory> factory,
+                          bool supports_augmenting_data = false);
 
   std::vector<SdpVideoFormat> GetSupportedFormats() const override;
   CodecInfo QueryVideoEncoder(const SdpVideoFormat& format) const override;
@@ -31,12 +35,17 @@
 
  private:
   std::unique_ptr<VideoEncoderFactory> factory_;
+  const bool supports_augmenting_data_;
 };
 
 class MultiplexDecoderFactory : public VideoDecoderFactory {
  public:
-  explicit MultiplexDecoderFactory(
-      std::unique_ptr<VideoDecoderFactory> factory);
+  // supports_augmenting_data defines if the decoder would support augmenting
+  // data in that case the decoder expects the encoded video frame to contain
+  // augmenting_data it is expected that the sender is using MultiplexEncoder
+  // with supports_augmenting_data set
+  MultiplexDecoderFactory(std::unique_ptr<VideoDecoderFactory> factory,
+                          bool supports_augmenting_data = false);
 
   std::vector<SdpVideoFormat> GetSupportedFormats() const override;
   std::unique_ptr<VideoDecoder> CreateVideoDecoder(
@@ -44,6 +53,7 @@
 
  private:
   std::unique_ptr<VideoDecoderFactory> factory_;
+  const bool supports_augmenting_data_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 10fac98..a81dd7c 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -353,6 +353,8 @@
 
 rtc_static_library("webrtc_multiplex") {
   sources = [
+    "codecs/multiplex/augmented_video_frame_buffer.cc",
+    "codecs/multiplex/include/augmented_video_frame_buffer.h",
     "codecs/multiplex/include/multiplex_decoder_adapter.h",
     "codecs/multiplex/include/multiplex_encoded_image_packer.h",
     "codecs/multiplex/include/multiplex_encoder_adapter.h",
diff --git a/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc b/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc
new file mode 100644
index 0000000..b61a4a6
--- /dev/null
+++ b/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc
@@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h"
+#include "api/video/video_frame_buffer.h"
+
+namespace webrtc {
+
+AugmentedVideoFrameBuffer::AugmentedVideoFrameBuffer(
+    const rtc::scoped_refptr<VideoFrameBuffer>& video_frame_buffer,
+    std::unique_ptr<uint8_t[]> augmenting_data,
+    uint16_t augmenting_data_size)
+    : augmenting_data_size_(augmenting_data_size),
+      augmenting_data_(std::move(augmenting_data)),
+      video_frame_buffer_(video_frame_buffer) {}
+
+rtc::scoped_refptr<VideoFrameBuffer>
+AugmentedVideoFrameBuffer::GetVideoFrameBuffer() const {
+  return video_frame_buffer_;
+}
+
+uint8_t* AugmentedVideoFrameBuffer::GetAugmentingData() const {
+  return augmenting_data_.get();
+}
+
+uint16_t AugmentedVideoFrameBuffer::GetAugmentingDataSize() const {
+  return augmenting_data_size_;
+}
+
+VideoFrameBuffer::Type AugmentedVideoFrameBuffer::type() const {
+  return video_frame_buffer_->type();
+}
+
+int AugmentedVideoFrameBuffer::width() const {
+  return video_frame_buffer_->width();
+}
+
+int AugmentedVideoFrameBuffer::height() const {
+  return video_frame_buffer_->height();
+}
+
+rtc::scoped_refptr<I420BufferInterface> AugmentedVideoFrameBuffer::ToI420() {
+  return video_frame_buffer_->ToI420();
+}
+}  // namespace webrtc
diff --git a/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h b/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h
new file mode 100644
index 0000000..96496ed
--- /dev/null
+++ b/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_
+#define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_
+
+#include <memory>
+
+#include "api/video/video_frame_buffer.h"
+
+namespace webrtc {
+class AugmentedVideoFrameBuffer : public VideoFrameBuffer {
+ public:
+  AugmentedVideoFrameBuffer(
+      const rtc::scoped_refptr<VideoFrameBuffer>& video_frame_buffer,
+      std::unique_ptr<uint8_t[]> augmenting_data,
+      uint16_t augmenting_data_size);
+
+  // Retrieves the underlying VideoFrameBuffer without the augmented data
+  rtc::scoped_refptr<VideoFrameBuffer> GetVideoFrameBuffer() const;
+
+  // Gets a pointer to the augmenting data and moves ownership to the caller
+  uint8_t* GetAugmentingData() const;
+
+  // Get the size of the augmenting data
+  uint16_t GetAugmentingDataSize() const;
+
+  // Returns the type of the underlying VideoFrameBuffer
+  Type type() const final;
+
+  // Returns the width of the underlying VideoFrameBuffer
+  int width() const final;
+
+  // Returns the height of the underlying VideoFrameBuffer
+  int height() const final;
+
+  // Get the I140 Buffer from the underlying frame buffer
+  rtc::scoped_refptr<I420BufferInterface> ToI420() final;
+
+ private:
+  uint16_t augmenting_data_size_;
+  std::unique_ptr<uint8_t[]> augmenting_data_;
+  rtc::scoped_refptr<webrtc::VideoFrameBuffer> video_frame_buffer_;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_AUGMENTED_VIDEO_FRAME_BUFFER_H_
diff --git a/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h b/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h
index 09618a2..b8a90b4 100644
--- a/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h
+++ b/modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h
@@ -26,7 +26,8 @@
  public:
   // |factory| is not owned and expected to outlive this class' lifetime.
   MultiplexDecoderAdapter(VideoDecoderFactory* factory,
-                          const SdpVideoFormat& associated_format);
+                          const SdpVideoFormat& associated_format,
+                          bool supports_augmenting_data = false);
   virtual ~MultiplexDecoderAdapter();
 
   // Implements VideoDecoder
@@ -52,12 +53,17 @@
   // Holds the decoded image output of a frame.
   struct DecodedImageData;
 
+  // Holds the augmenting data of an image
+  struct AugmentingData;
+
   void MergeAlphaImages(VideoFrame* decoded_image,
                         const absl::optional<int32_t>& decode_time_ms,
                         const absl::optional<uint8_t>& qp,
                         VideoFrame* multiplex_decoded_image,
                         const absl::optional<int32_t>& multiplex_decode_time_ms,
-                        const absl::optional<uint8_t>& multiplex_qp);
+                        const absl::optional<uint8_t>& multiplex_qp,
+                        std::unique_ptr<uint8_t[]> augmenting_data,
+                        uint16_t augmenting_data_length);
 
   VideoDecoderFactory* const factory_;
   const SdpVideoFormat associated_format_;
@@ -67,6 +73,8 @@
 
   // Holds YUV or AXX decode output of a frame that is identified by timestamp.
   std::map<uint32_t /* timestamp */, DecodedImageData> decoded_data_;
+  std::map<uint32_t /* timestamp */, AugmentingData> decoded_augmenting_data_;
+  const bool supports_augmenting_data_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/multiplex/include/multiplex_encoded_image_packer.h b/modules/video_coding/codecs/multiplex/include/multiplex_encoded_image_packer.h
index e7f6677..220221a 100644
--- a/modules/video_coding/codecs/multiplex/include/multiplex_encoded_image_packer.h
+++ b/modules/video_coding/codecs/multiplex/include/multiplex_encoded_image_packer.h
@@ -11,6 +11,7 @@
 #ifndef MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_ENCODED_IMAGE_PACKER_H_
 #define MODULES_VIDEO_CODING_CODECS_MULTIPLEX_INCLUDE_MULTIPLEX_ENCODED_IMAGE_PACKER_H_
 
+#include <memory>
 #include <vector>
 
 #include "common_types.h"  // NOLINT(build/include)
@@ -33,9 +34,16 @@
   // The location of the first MultiplexImageComponentHeader in the bitstream,
   // in terms of byte from the beginning of the bitstream.
   uint32_t first_component_header_offset;
+
+  // The location of the augmenting data in the bitstream, in terms of bytes
+  // from the beginning of the bitstream
+  uint32_t augmenting_data_offset;
+
+  // The size of the augmenting data in the bitstream it terms of byte
+  uint16_t augmenting_data_size;
 };
 const int kMultiplexImageHeaderSize =
-    sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint32_t);
+    sizeof(uint8_t) + 2 * sizeof(uint16_t) + 2 * sizeof(uint32_t);
 
 // Struct describing the individual image component's content.
 struct MultiplexImageComponentHeader {
@@ -81,9 +89,14 @@
 struct MultiplexImage {
   uint16_t image_index;
   uint8_t component_count;
+  uint16_t augmenting_data_size;
+  std::unique_ptr<uint8_t[]> augmenting_data;
   std::vector<MultiplexImageComponent> image_components;
 
-  MultiplexImage(uint16_t picture_index, uint8_t component_count);
+  MultiplexImage(uint16_t picture_index,
+                 uint8_t component_count,
+                 std::unique_ptr<uint8_t[]> augmenting_data,
+                 uint16_t augmenting_data_size);
 };
 
 // A utility class providing conversion between two representations of a
diff --git a/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h b/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h
index 7ce8615..a249323 100644
--- a/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h
+++ b/modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h
@@ -34,7 +34,8 @@
  public:
   // |factory| is not owned and expected to outlive this class' lifetime.
   MultiplexEncoderAdapter(VideoEncoderFactory* factory,
-                          const SdpVideoFormat& associated_format);
+                          const SdpVideoFormat& associated_format,
+                          bool supports_augmenting_data = false);
   virtual ~MultiplexEncoderAdapter();
 
   // Implements VideoEncoder
@@ -77,6 +78,9 @@
   EncodedImage combined_image_;
 
   rtc::CriticalSection crit_;
+
+  const bool supports_augmented_data_;
+  int augmenting_data_size_ = 0;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
index dcf99f1..e0d0618 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
@@ -15,6 +15,7 @@
 #include "common_video/include/video_frame.h"
 #include "common_video/include/video_frame_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h"
 #include "rtc_base/keep_ref_until_done.h"
 #include "rtc_base/logging.h"
 
@@ -79,10 +80,23 @@
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DecodedImageData);
 };
 
+struct MultiplexDecoderAdapter::AugmentingData {
+  AugmentingData(std::unique_ptr<uint8_t[]> augmenting_data, uint16_t data_size)
+      : data_(std::move(augmenting_data)), size_(data_size) {}
+  std::unique_ptr<uint8_t[]> data_;
+  const uint16_t size_;
+
+ private:
+  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AugmentingData);
+};
+
 MultiplexDecoderAdapter::MultiplexDecoderAdapter(
     VideoDecoderFactory* factory,
-    const SdpVideoFormat& associated_format)
-    : factory_(factory), associated_format_(associated_format) {}
+    const SdpVideoFormat& associated_format,
+    bool supports_augmenting_data)
+    : factory_(factory),
+      associated_format_(associated_format),
+      supports_augmenting_data_(supports_augmenting_data) {}
 
 MultiplexDecoderAdapter::~MultiplexDecoderAdapter() {
   Release();
@@ -113,8 +127,16 @@
     bool missing_frames,
     const CodecSpecificInfo* codec_specific_info,
     int64_t render_time_ms) {
-  const MultiplexImage& image =
-      MultiplexEncodedImagePacker::Unpack(input_image);
+  MultiplexImage image = MultiplexEncodedImagePacker::Unpack(input_image);
+
+  if (supports_augmenting_data_) {
+    RTC_DCHECK(decoded_augmenting_data_.find(input_image._timeStamp) ==
+               decoded_augmenting_data_.end());
+    decoded_augmenting_data_.emplace(
+        std::piecewise_construct, std::forward_as_tuple(input_image._timeStamp),
+        std::forward_as_tuple(std::move(image.augmenting_data),
+                              image.augmenting_data_size));
+  }
 
   if (image.component_count == 1) {
     RTC_DCHECK(decoded_data_.find(input_image._timeStamp) ==
@@ -157,21 +179,36 @@
                                       absl::optional<uint8_t> qp) {
   const auto& other_decoded_data_it =
       decoded_data_.find(decoded_image->timestamp());
+  const auto& augmenting_data_it =
+      decoded_augmenting_data_.find(decoded_image->timestamp());
   if (other_decoded_data_it != decoded_data_.end()) {
+    uint16_t augmenting_data_size =
+        augmenting_data_it == decoded_augmenting_data_.end()
+            ? 0
+            : augmenting_data_it->second.size_;
+    std::unique_ptr<uint8_t[]> augmenting_data =
+        augmenting_data_size == 0 ? NULL
+                                  : std::move(augmenting_data_it->second.data_);
     auto& other_image_data = other_decoded_data_it->second;
     if (stream_idx == kYUVStream) {
       RTC_DCHECK_EQ(kAXXStream, other_image_data.stream_idx_);
       MergeAlphaImages(decoded_image, decode_time_ms, qp,
                        &other_image_data.decoded_image_,
-                       other_image_data.decode_time_ms_, other_image_data.qp_);
+                       other_image_data.decode_time_ms_, other_image_data.qp_,
+                       std::move(augmenting_data), augmenting_data_size);
     } else {
       RTC_DCHECK_EQ(kYUVStream, other_image_data.stream_idx_);
       RTC_DCHECK_EQ(kAXXStream, stream_idx);
       MergeAlphaImages(&other_image_data.decoded_image_,
                        other_image_data.decode_time_ms_, other_image_data.qp_,
-                       decoded_image, decode_time_ms, qp);
+                       decoded_image, decode_time_ms, qp,
+                       std::move(augmenting_data), augmenting_data_size);
     }
     decoded_data_.erase(decoded_data_.begin(), other_decoded_data_it);
+    if (supports_augmenting_data_) {
+      decoded_augmenting_data_.erase(decoded_augmenting_data_.begin(),
+                                     augmenting_data_it);
+    }
     return;
   }
   RTC_DCHECK(decoded_data_.find(decoded_image->timestamp()) ==
@@ -188,24 +225,31 @@
     const absl::optional<uint8_t>& qp,
     VideoFrame* alpha_decoded_image,
     const absl::optional<int32_t>& alpha_decode_time_ms,
-    const absl::optional<uint8_t>& alpha_qp) {
+    const absl::optional<uint8_t>& alpha_qp,
+    std::unique_ptr<uint8_t[]> augmenting_data,
+    uint16_t augmenting_data_length) {
+  rtc::scoped_refptr<VideoFrameBuffer> merged_buffer;
   if (!alpha_decoded_image->timestamp()) {
-    decoded_complete_callback_->Decoded(*decoded_image, decode_time_ms, qp);
-    return;
+    merged_buffer = decoded_image->video_frame_buffer();
+  } else {
+    rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer =
+        decoded_image->video_frame_buffer()->ToI420();
+    rtc::scoped_refptr<webrtc::I420BufferInterface> alpha_buffer =
+        alpha_decoded_image->video_frame_buffer()->ToI420();
+    RTC_DCHECK_EQ(yuv_buffer->width(), alpha_buffer->width());
+    RTC_DCHECK_EQ(yuv_buffer->height(), alpha_buffer->height());
+    merged_buffer = WrapI420ABuffer(
+        yuv_buffer->width(), yuv_buffer->height(), yuv_buffer->DataY(),
+        yuv_buffer->StrideY(), yuv_buffer->DataU(), yuv_buffer->StrideU(),
+        yuv_buffer->DataV(), yuv_buffer->StrideV(), alpha_buffer->DataY(),
+        alpha_buffer->StrideY(),
+        rtc::Bind(&KeepBufferRefs, yuv_buffer, alpha_buffer));
   }
-
-  rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer =
-      decoded_image->video_frame_buffer()->ToI420();
-  rtc::scoped_refptr<webrtc::I420BufferInterface> alpha_buffer =
-      alpha_decoded_image->video_frame_buffer()->ToI420();
-  RTC_DCHECK_EQ(yuv_buffer->width(), alpha_buffer->width());
-  RTC_DCHECK_EQ(yuv_buffer->height(), alpha_buffer->height());
-  rtc::scoped_refptr<I420ABufferInterface> merged_buffer = WrapI420ABuffer(
-      yuv_buffer->width(), yuv_buffer->height(), yuv_buffer->DataY(),
-      yuv_buffer->StrideY(), yuv_buffer->DataU(), yuv_buffer->StrideU(),
-      yuv_buffer->DataV(), yuv_buffer->StrideV(), alpha_buffer->DataY(),
-      alpha_buffer->StrideY(),
-      rtc::Bind(&KeepBufferRefs, yuv_buffer, alpha_buffer));
+  if (supports_augmenting_data_) {
+    merged_buffer = rtc::scoped_refptr<webrtc::AugmentedVideoFrameBuffer>(
+        new rtc::RefCountedObject<AugmentedVideoFrameBuffer>(
+            merged_buffer, std::move(augmenting_data), augmenting_data_length));
+  }
 
   VideoFrame merged_image(merged_buffer, decoded_image->timestamp(),
                           0 /* render_time_ms */, decoded_image->rotation());
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc b/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
index 33f9c4f..63cd6da 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
@@ -23,6 +23,14 @@
   ByteWriter<uint16_t>::WriteBigEndian(buffer + offset, header.image_index);
   offset += sizeof(uint16_t);
 
+  ByteWriter<uint16_t>::WriteBigEndian(buffer + offset,
+                                       header.augmenting_data_size);
+  offset += sizeof(uint16_t);
+
+  ByteWriter<uint32_t>::WriteBigEndian(buffer + offset,
+                                       header.augmenting_data_offset);
+  offset += sizeof(uint32_t);
+
   ByteWriter<uint32_t>::WriteBigEndian(buffer + offset,
                                        header.first_component_header_offset);
   offset += sizeof(uint32_t);
@@ -40,6 +48,14 @@
   header.image_index = ByteReader<uint16_t>::ReadBigEndian(buffer + offset);
   offset += sizeof(uint16_t);
 
+  header.augmenting_data_size =
+      ByteReader<uint16_t>::ReadBigEndian(buffer + offset);
+  offset += sizeof(uint16_t);
+
+  header.augmenting_data_offset =
+      ByteReader<uint32_t>::ReadBigEndian(buffer + offset);
+  offset += sizeof(uint32_t);
+
   header.first_component_header_offset =
       ByteReader<uint32_t>::ReadBigEndian(buffer + offset);
   offset += sizeof(uint32_t);
@@ -113,8 +129,14 @@
   memcpy(buffer, image.encoded_image._buffer, image.encoded_image._length);
 }
 
-MultiplexImage::MultiplexImage(uint16_t picture_index, uint8_t frame_count)
-    : image_index(picture_index), component_count(frame_count) {}
+MultiplexImage::MultiplexImage(uint16_t picture_index,
+                               uint8_t frame_count,
+                               std::unique_ptr<uint8_t[]> augmenting_data,
+                               uint16_t augmenting_data_size)
+    : image_index(picture_index),
+      component_count(frame_count),
+      augmenting_data_size(augmenting_data_size),
+      augmenting_data(std::move(augmenting_data)) {}
 
 EncodedImage MultiplexEncodedImagePacker::PackAndRelease(
     const MultiplexImage& multiplex_image) {
@@ -125,8 +147,12 @@
   header.image_index = multiplex_image.image_index;
   int header_offset = kMultiplexImageHeaderSize;
   header.first_component_header_offset = header_offset;
-  int bitstream_offset = header_offset + kMultiplexImageComponentHeaderSize *
-                                             header.component_count;
+  header.augmenting_data_offset =
+      header_offset +
+      kMultiplexImageComponentHeaderSize * header.component_count;
+  header.augmenting_data_size = multiplex_image.augmenting_data_size;
+  int bitstream_offset =
+      header.augmenting_data_offset + header.augmenting_data_size;
 
   const std::vector<MultiplexImageComponent>& images =
       multiplex_image.image_components;
@@ -182,6 +208,13 @@
                          kMultiplexImageComponentHeaderSize * (i + 1)));
   }
 
+  // Augmenting Data
+  if (multiplex_image.augmenting_data_size != 0) {
+    memcpy(combined_image._buffer + header.augmenting_data_offset,
+           multiplex_image.augmenting_data.get(),
+           multiplex_image.augmenting_data_size);
+  }
+
   // Bitstreams
   for (size_t i = 0; i < images.size(); i++) {
     PackBitstream(combined_image._buffer + frame_headers[i].bitstream_offset,
@@ -196,7 +229,6 @@
     const EncodedImage& combined_image) {
   const MultiplexImageHeader& header = UnpackHeader(combined_image._buffer);
 
-  MultiplexImage multiplex_image(header.image_index, header.component_count);
   std::vector<MultiplexImageComponentHeader> frame_headers;
   int header_offset = header.first_component_header_offset;
 
@@ -207,6 +239,19 @@
   }
 
   RTC_DCHECK_LE(frame_headers.size(), header.component_count);
+  std::unique_ptr<uint8_t[]> augmenting_data = nullptr;
+  if (header.augmenting_data_size != 0) {
+    augmenting_data =
+        std::unique_ptr<uint8_t[]>(new uint8_t[header.augmenting_data_size]);
+    memcpy(augmenting_data.get(),
+           combined_image._buffer + header.augmenting_data_offset,
+           header.augmenting_data_size);
+  }
+
+  MultiplexImage multiplex_image(header.image_index, header.component_count,
+                                 std::move(augmenting_data),
+                                 header.augmenting_data_size);
+
   for (size_t i = 0; i < frame_headers.size(); i++) {
     MultiplexImageComponent image_component;
     image_component.component_index = frame_headers[i].component_index;
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index dcf1b56..6f921e3 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -16,6 +16,7 @@
 #include "common_video/include/video_frame_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
 #include "modules/include/module_common_types.h"
+#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h"
 #include "rtc_base/keep_ref_until_done.h"
 #include "rtc_base/logging.h"
 
@@ -47,10 +48,12 @@
 
 MultiplexEncoderAdapter::MultiplexEncoderAdapter(
     VideoEncoderFactory* factory,
-    const SdpVideoFormat& associated_format)
+    const SdpVideoFormat& associated_format,
+    bool supports_augmented_data)
     : factory_(factory),
       associated_format_(associated_format),
-      encoded_complete_callback_(nullptr) {}
+      encoded_complete_callback_(nullptr),
+      supports_augmented_data_(supports_augmented_data) {}
 
 MultiplexEncoderAdapter::~MultiplexEncoderAdapter() {
   Release();
@@ -122,13 +125,30 @@
   }
   const bool has_alpha = input_image.video_frame_buffer()->type() ==
                          VideoFrameBuffer::Type::kI420A;
+  std::unique_ptr<uint8_t[]> augmenting_data = nullptr;
+  uint16_t augmenting_data_length = 0;
+  AugmentedVideoFrameBuffer* augmented_video_frame_buffer = nullptr;
+  if (supports_augmented_data_) {
+    augmented_video_frame_buffer = static_cast<AugmentedVideoFrameBuffer*>(
+        input_image.video_frame_buffer().get());
+    augmenting_data_length =
+        augmented_video_frame_buffer->GetAugmentingDataSize();
+    augmenting_data =
+        std::unique_ptr<uint8_t[]>(new uint8_t[augmenting_data_length]);
+    memcpy(augmenting_data.get(),
+           augmented_video_frame_buffer->GetAugmentingData(),
+           augmenting_data_length);
+    augmenting_data_size_ = augmenting_data_length;
+  }
+
   {
     rtc::CritScope cs(&crit_);
     stashed_images_.emplace(
         std::piecewise_construct,
         std::forward_as_tuple(input_image.timestamp()),
-        std::forward_as_tuple(picture_index_,
-                              has_alpha ? kAlphaCodecStreams : 1));
+        std::forward_as_tuple(
+            picture_index_, has_alpha ? kAlphaCodecStreams : 1,
+            std::move(augmenting_data), augmenting_data_length));
   }
 
   ++picture_index_;
@@ -136,15 +156,18 @@
   // Encode YUV
   int rv = encoders_[kYUVStream]->Encode(input_image, codec_specific_info,
                                          &adjusted_frame_types);
+
   // If we do not receive an alpha frame, we send a single frame for this
   // |picture_index_|. The receiver will receive |frame_count| as 1 which
-  // soecifies this case.
+  // specifies this case.
   if (rv || !has_alpha)
     return rv;
 
   // Encode AXX
   const I420ABufferInterface* yuva_buffer =
-      input_image.video_frame_buffer()->GetI420A();
+      supports_augmented_data_
+          ? augmented_video_frame_buffer->GetVideoFrameBuffer()->GetI420A()
+          : input_image.video_frame_buffer()->GetI420A();
   rtc::scoped_refptr<I420BufferInterface> alpha_buffer =
       WrapI420Buffer(input_image.width(), input_image.height(),
                      yuva_buffer->DataA(), yuva_buffer->StrideA(),
@@ -177,12 +200,16 @@
 int MultiplexEncoderAdapter::SetRateAllocation(
     const VideoBitrateAllocation& bitrate,
     uint32_t framerate) {
+  VideoBitrateAllocation bitrate_allocation(bitrate);
+  bitrate_allocation.SetBitrate(
+      0, 0, bitrate.GetBitrate(0, 0) - augmenting_data_size_);
   for (auto& encoder : encoders_) {
     // TODO(emircan): |framerate| is used to calculate duration in encoder
     // instances. We report the total frame rate to keep real time for now.
     // Remove this after refactoring duration logic.
     const int rv = encoder->SetRateAllocation(
-        bitrate, static_cast<uint32_t>(encoders_.size()) * framerate);
+        bitrate_allocation,
+        static_cast<uint32_t>(encoders_.size()) * framerate);
     if (rv)
       return rv;
   }
diff --git a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
index 064e8b3..303914d 100644
--- a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
+++ b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
@@ -15,6 +15,7 @@
 #include "common_video/include/video_frame_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
 #include "media/base/mediaconstants.h"
+#include "modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h"
 #include "modules/video_coding/codecs/multiplex/include/multiplex_decoder_adapter.h"
 #include "modules/video_coding/codecs/multiplex/include/multiplex_encoded_image_packer.h"
 #include "modules/video_coding/codecs/multiplex/include/multiplex_encoder_adapter.h"
@@ -32,21 +33,26 @@
 const VideoCodecType kMultiplexAssociatedCodecType =
     PayloadStringToCodecType(kMultiplexAssociatedCodecName);
 
-class TestMultiplexAdapter : public VideoCodecUnitTest {
+class TestMultiplexAdapter
+    : public VideoCodecUnitTest,
+      public testing::WithParamInterface<bool /* supports_augmenting_data */> {
  public:
   TestMultiplexAdapter()
       : decoder_factory_(new webrtc::MockVideoDecoderFactory),
-        encoder_factory_(new webrtc::MockVideoEncoderFactory) {}
+        encoder_factory_(new webrtc::MockVideoEncoderFactory),
+        supports_augmenting_data_(GetParam()) {}
 
  protected:
   std::unique_ptr<VideoDecoder> CreateDecoder() override {
     return absl::make_unique<MultiplexDecoderAdapter>(
-        decoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName));
+        decoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName),
+        supports_augmenting_data_);
   }
 
   std::unique_ptr<VideoEncoder> CreateEncoder() override {
     return absl::make_unique<MultiplexEncoderAdapter>(
-        encoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName));
+        encoder_factory_.get(), SdpVideoFormat(kMultiplexAssociatedCodecName),
+        supports_augmenting_data_);
   }
 
   void ModifyCodecSettings(VideoCodec* codec_settings) override {
@@ -56,6 +62,23 @@
     codec_settings->codecType = webrtc::kVideoCodecMultiplex;
   }
 
+  std::unique_ptr<VideoFrame> CreateDataAugmentedInputFrame(
+      VideoFrame* video_frame) {
+    rtc::scoped_refptr<VideoFrameBuffer> video_buffer =
+        video_frame->video_frame_buffer();
+    std::unique_ptr<uint8_t[]> data =
+        std::unique_ptr<uint8_t[]>(new uint8_t[16]);
+    for (int i = 0; i < 16; i++) {
+      data[i] = i;
+    }
+    rtc::scoped_refptr<AugmentedVideoFrameBuffer> augmented_video_frame_buffer =
+        new rtc::RefCountedObject<AugmentedVideoFrameBuffer>(
+            video_buffer, std::move(data), 16);
+    return absl::WrapUnique<VideoFrame>(
+        new VideoFrame(augmented_video_frame_buffer, video_frame->timestamp(),
+                       video_frame->render_time_ms(), video_frame->rotation()));
+  }
+
   std::unique_ptr<VideoFrame> CreateI420AInputFrame() {
     VideoFrame* input_frame = NextInputFrame();
     rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer =
@@ -70,14 +93,50 @@
                        345 /* render_time_ms */, kVideoRotation_0));
   }
 
-  std::unique_ptr<VideoFrame> ExtractAXXFrame(const VideoFrame& yuva_frame) {
-    const I420ABufferInterface* yuva_buffer =
-        yuva_frame.video_frame_buffer()->GetI420A();
+  std::unique_ptr<VideoFrame> CreateInputFrame(bool contains_alpha) {
+    std::unique_ptr<VideoFrame> video_frame;
+    if (contains_alpha) {
+      video_frame = CreateI420AInputFrame();
+    } else {
+      VideoFrame* next_frame = NextInputFrame();
+      video_frame = absl::WrapUnique<VideoFrame>(new VideoFrame(
+          next_frame->video_frame_buffer(), next_frame->timestamp(),
+          next_frame->render_time_ms(), next_frame->rotation()));
+    }
+    if (supports_augmenting_data_) {
+      video_frame = CreateDataAugmentedInputFrame(video_frame.get());
+    }
+
+    return video_frame;
+  }
+
+  void CheckData(rtc::scoped_refptr<VideoFrameBuffer> video_frame_buffer) {
+    if (!supports_augmenting_data_) {
+      return;
+    }
+    AugmentedVideoFrameBuffer* augmented_buffer =
+        static_cast<AugmentedVideoFrameBuffer*>(video_frame_buffer.get());
+    EXPECT_EQ(augmented_buffer->GetAugmentingDataSize(), 16);
+    uint8_t* data = augmented_buffer->GetAugmentingData();
+    for (int i = 0; i < 16; i++) {
+      EXPECT_EQ(data[i], i);
+    }
+  }
+
+  std::unique_ptr<VideoFrame> ExtractAXXFrame(const VideoFrame& video_frame) {
+    rtc::scoped_refptr<VideoFrameBuffer> video_frame_buffer =
+        video_frame.video_frame_buffer();
+    if (supports_augmenting_data_) {
+      AugmentedVideoFrameBuffer* augmentedBuffer =
+          static_cast<AugmentedVideoFrameBuffer*>(video_frame_buffer.get());
+      video_frame_buffer = augmentedBuffer->GetVideoFrameBuffer();
+    }
+    const I420ABufferInterface* yuva_buffer = video_frame_buffer->GetI420A();
     rtc::scoped_refptr<I420BufferInterface> axx_buffer = WrapI420Buffer(
         yuva_buffer->width(), yuva_buffer->height(), yuva_buffer->DataA(),
         yuva_buffer->StrideA(), yuva_buffer->DataU(), yuva_buffer->StrideU(),
         yuva_buffer->DataV(), yuva_buffer->StrideV(),
-        rtc::KeepRefUntilDone(yuva_frame.video_frame_buffer()));
+        rtc::KeepRefUntilDone(video_frame_buffer));
     return absl::WrapUnique<VideoFrame>(
         new VideoFrame(axx_buffer, 123 /* RTP timestamp */,
                        345 /* render_time_ms */, kVideoRotation_0));
@@ -106,21 +165,22 @@
 
   const std::unique_ptr<webrtc::MockVideoDecoderFactory> decoder_factory_;
   const std::unique_ptr<webrtc::MockVideoEncoderFactory> encoder_factory_;
+  const bool supports_augmenting_data_;
 };
 
 // TODO(emircan): Currently VideoCodecUnitTest tests do a complete setup
 // step that goes beyond constructing |decoder_|. Simplify these tests to do
 // less.
-TEST_F(TestMultiplexAdapter, ConstructAndDestructDecoder) {
+TEST_P(TestMultiplexAdapter, ConstructAndDestructDecoder) {
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Release());
 }
 
-TEST_F(TestMultiplexAdapter, ConstructAndDestructEncoder) {
+TEST_P(TestMultiplexAdapter, ConstructAndDestructEncoder) {
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release());
 }
 
-TEST_F(TestMultiplexAdapter, EncodeDecodeI420Frame) {
-  VideoFrame* input_frame = NextInputFrame();
+TEST_P(TestMultiplexAdapter, EncodeDecodeI420Frame) {
+  std::unique_ptr<VideoFrame> input_frame = CreateInputFrame(false);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*input_frame, nullptr, nullptr));
   EncodedImage encoded_frame;
@@ -134,11 +194,12 @@
   absl::optional<uint8_t> decoded_qp;
   ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
   ASSERT_TRUE(decoded_frame);
-  EXPECT_GT(I420PSNR(input_frame, decoded_frame.get()), 36);
+  EXPECT_GT(I420PSNR(input_frame.get(), decoded_frame.get()), 36);
+  CheckData(decoded_frame->video_frame_buffer());
 }
 
-TEST_F(TestMultiplexAdapter, EncodeDecodeI420AFrame) {
-  std::unique_ptr<VideoFrame> yuva_frame = CreateI420AInputFrame();
+TEST_P(TestMultiplexAdapter, EncodeDecodeI420AFrame) {
+  std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*yuva_frame, nullptr, nullptr));
   EncodedImage encoded_frame;
@@ -159,10 +220,12 @@
   std::unique_ptr<VideoFrame> output_axx_frame =
       ExtractAXXFrame(*decoded_frame);
   EXPECT_GT(I420PSNR(input_axx_frame.get(), output_axx_frame.get()), 47);
+
+  CheckData(decoded_frame->video_frame_buffer());
 }
 
-TEST_F(TestMultiplexAdapter, CheckSingleFrameEncodedBitstream) {
-  VideoFrame* input_frame = NextInputFrame();
+TEST_P(TestMultiplexAdapter, CheckSingleFrameEncodedBitstream) {
+  std::unique_ptr<VideoFrame> input_frame = CreateInputFrame(false);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*input_frame, nullptr, nullptr));
   EncodedImage encoded_frame;
@@ -181,8 +244,8 @@
   EXPECT_EQ(kVideoFrameKey, component.encoded_image._frameType);
 }
 
-TEST_F(TestMultiplexAdapter, CheckDoubleFramesEncodedBitstream) {
-  std::unique_ptr<VideoFrame> yuva_frame = CreateI420AInputFrame();
+TEST_P(TestMultiplexAdapter, CheckDoubleFramesEncodedBitstream) {
+  std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*yuva_frame, nullptr, nullptr));
   EncodedImage encoded_frame;
@@ -206,8 +269,8 @@
   }
 }
 
-TEST_F(TestMultiplexAdapter, ImageIndexIncreases) {
-  std::unique_ptr<VideoFrame> yuva_frame = CreateI420AInputFrame();
+TEST_P(TestMultiplexAdapter, ImageIndexIncreases) {
+  std::unique_ptr<VideoFrame> yuva_frame = CreateInputFrame(true);
   const size_t expected_num_encoded_frames = 3;
   for (size_t i = 0; i < expected_num_encoded_frames; ++i) {
     EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
@@ -222,4 +285,8 @@
   }
 }
 
+INSTANTIATE_TEST_CASE_P(TestMultiplexAdapter,
+                        TestMultiplexAdapter,
+                        ::testing::Bool());
+
 }  // namespace webrtc