Add owned data buffer to EncodedImage

Bug: webrtc:9378
Change-Id: I6a66b9301cbadf1d6517bf7a96028099970a20a3
Reviewed-on: https://webrtc-review.googlesource.com/c/117964
Commit-Queue: Niels Moller <nisse@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26585}
diff --git a/api/media_transport_interface.cc b/api/media_transport_interface.cc
index 571e82b..63d7ea1 100644
--- a/api/media_transport_interface.cc
+++ b/api/media_transport_interface.cc
@@ -74,33 +74,10 @@
       referenced_frame_ids_(std::move(referenced_frame_ids)) {}
 
 MediaTransportEncodedVideoFrame& MediaTransportEncodedVideoFrame::operator=(
-    const MediaTransportEncodedVideoFrame& o) {
-  payload_type_ = o.payload_type_;
-  encoded_image_ = o.encoded_image_;
-  encoded_data_ = o.encoded_data_;
-  frame_id_ = o.frame_id_;
-  referenced_frame_ids_ = o.referenced_frame_ids_;
-  if (!encoded_data_.empty()) {
-    // We own the underlying data.
-    encoded_image_.set_buffer(encoded_data_.data(), encoded_data_.size());
-  }
-  return *this;
-}
+    const MediaTransportEncodedVideoFrame&) = default;
 
 MediaTransportEncodedVideoFrame& MediaTransportEncodedVideoFrame::operator=(
-    MediaTransportEncodedVideoFrame&& o) {
-  payload_type_ = o.payload_type_;
-  encoded_image_ = o.encoded_image_;
-  encoded_data_ = std::move(o.encoded_data_);
-  frame_id_ = o.frame_id_;
-  referenced_frame_ids_ = std::move(o.referenced_frame_ids_);
-  if (!encoded_data_.empty()) {
-    // We take over ownership of the underlying data.
-    encoded_image_.set_buffer(encoded_data_.data(), encoded_data_.size());
-    o.encoded_image_.set_buffer(nullptr, 0);
-  }
-  return *this;
-}
+    MediaTransportEncodedVideoFrame&&) = default;
 
 MediaTransportEncodedVideoFrame::MediaTransportEncodedVideoFrame(
     const MediaTransportEncodedVideoFrame& o)
@@ -114,14 +91,6 @@
   *this = std::move(o);
 }
 
-void MediaTransportEncodedVideoFrame::Retain() {
-  if (encoded_image_.data() && encoded_data_.empty()) {
-    encoded_data_ = std::vector<uint8_t>(
-        encoded_image_.data(), encoded_image_.data() + encoded_image_.size());
-    encoded_image_.set_buffer(encoded_data_.data(), encoded_image_.size());
-  }
-}
-
 SendDataParams::SendDataParams() = default;
 SendDataParams::SendDataParams(const SendDataParams&) = default;
 
diff --git a/api/media_transport_interface.h b/api/media_transport_interface.h
index 1266000..5cd2923 100644
--- a/api/media_transport_interface.h
+++ b/api/media_transport_interface.h
@@ -204,23 +204,20 @@
     return referenced_frame_ids_;
   }
 
-  // Hack to workaround lack of ownership of the encoded_image_._buffer. If we
+  // Hack to workaround lack of ownership of the EncodedImage buffer. If we
   // don't already own the underlying data, make a copy.
-  void Retain();
+  void Retain() { encoded_image_.Retain(); }
 
  private:
   MediaTransportEncodedVideoFrame();
 
   int payload_type_;
 
-  // The buffer is not owned by the encoded image. On the sender it means that
-  // it will need to make a copy using the Retain() method, if it wants to
+  // The buffer is not always owned by the encoded image. On the sender it means
+  // that it will need to make a copy using the Retain() method, if it wants to
   // deliver it asynchronously.
   webrtc::EncodedImage encoded_image_;
 
-  // If non-empty, this is the data for the encoded image.
-  std::vector<uint8_t> encoded_data_;
-
   // Frame id uniquely identifies a frame in a stream. It needs to be unique in
   // a given time window (i.e. technically unique identifier for the lifetime of
   // the connection is not needed, but you need to guarantee that remote side
diff --git a/api/video/encoded_image.cc b/api/video/encoded_image.cc
index b093d1b..f96b9c2 100644
--- a/api/video/encoded_image.cc
+++ b/api/video/encoded_image.cc
@@ -29,10 +29,24 @@
 
 EncodedImage::EncodedImage() : EncodedImage(nullptr, 0, 0) {}
 
+EncodedImage::EncodedImage(EncodedImage&&) = default;
 EncodedImage::EncodedImage(const EncodedImage&) = default;
 
 EncodedImage::EncodedImage(uint8_t* buffer, size_t size, size_t capacity)
-    : buffer_(buffer), size_(size), capacity_(capacity) {}
+    : size_(size), buffer_(buffer), capacity_(capacity) {}
+
+EncodedImage::~EncodedImage() = default;
+
+EncodedImage& EncodedImage::operator=(EncodedImage&&) = default;
+EncodedImage& EncodedImage::operator=(const EncodedImage&) = default;
+
+void EncodedImage::Retain() {
+  if (buffer_) {
+    encoded_data_ = std::vector<uint8_t>(size_);
+    memcpy(encoded_data_.data(), buffer_, size_);
+    buffer_ = nullptr;
+  }
+}
 
 void EncodedImage::SetEncodeTime(int64_t encode_start_ms,
                                  int64_t encode_finish_ms) {
diff --git a/api/video/encoded_image.h b/api/video/encoded_image.h
index fb3288f..363d34c 100644
--- a/api/video/encoded_image.h
+++ b/api/video/encoded_image.h
@@ -12,6 +12,7 @@
 #define API_VIDEO_ENCODED_IMAGE_H_
 
 #include <stdint.h>
+#include <vector>
 
 #include "absl/types/optional.h"
 #include "api/video/color_space.h"
@@ -37,9 +38,17 @@
   static size_t GetBufferPaddingBytes(VideoCodecType codec_type);
 
   EncodedImage();
+  EncodedImage(EncodedImage&&);
+  // Discouraged: potentially expensive.
   EncodedImage(const EncodedImage&);
   EncodedImage(uint8_t* buffer, size_t length, size_t capacity);
 
+  ~EncodedImage();
+
+  EncodedImage& operator=(EncodedImage&&);
+  // Discouraged: potentially expensive.
+  EncodedImage& operator=(const EncodedImage&);
+
   // TODO(nisse): Change style to timestamp(), set_timestamp(), for consistency
   // with the VideoFrame class.
   // Set frame timestamp (90kHz).
@@ -68,19 +77,38 @@
 
   size_t size() const { return size_; }
   void set_size(size_t new_size) {
-    RTC_DCHECK_LE(new_size, capacity_);
+    RTC_DCHECK_LE(new_size, capacity());
     size_ = new_size;
   }
-  size_t capacity() const { return capacity_; }
+  size_t capacity() const { return buffer_ ? capacity_ : encoded_data_.size(); }
 
   void set_buffer(uint8_t* buffer, size_t capacity) {
     buffer_ = buffer;
     capacity_ = capacity;
   }
 
-  // TODO(bugs.webrtc.org/9378): When changed to owning the buffer, data() on a
-  // const object should return a const uint8_t*.
-  uint8_t* data() const { return buffer_; }
+  void Allocate(size_t capacity) {
+    encoded_data_.resize(capacity);
+    buffer_ = nullptr;
+  }
+
+  uint8_t* data() { return buffer_ ? buffer_ : encoded_data_.data(); }
+  const uint8_t* data() const {
+    return buffer_ ? buffer_ : encoded_data_.data();
+  }
+  // TODO(nisse): At some places, code accepts a const ref EncodedImage, but
+  // still writes to it, to clear padding at the end of the encoded data.
+  // Padding is required by ffmpeg; the best way to deal with that is likely to
+  // make this class ensure that buffers always have a few zero padding bytes.
+  uint8_t* mutable_data() const { return const_cast<uint8_t*>(data()); }
+
+  // TODO(bugs.webrtc.org/9378): Delete. Used by code that wants to modify a
+  // buffer corresponding to a const EncodedImage. Requires an un-owned buffer.
+  uint8_t* buffer() const { return buffer_; }
+
+  // Hack to workaround lack of ownership of the encoded data. If we don't
+  // already own the underlying data, make an owned copy.
+  void Retain();
 
   uint32_t _encodedWidth = 0;
   uint32_t _encodedHeight = 0;
@@ -111,11 +139,14 @@
   } timing_;
 
  private:
-  // TODO(bugs.webrtc.org/9378): Fix ownership. Currently not owning the data
-  // buffer.
-  uint8_t* buffer_;
+  // TODO(bugs.webrtc.org/9378): We're transitioning to always owning the
+  // encoded data.
+  std::vector<uint8_t> encoded_data_;
   size_t size_;      // Size of encoded frame data.
-  size_t capacity_;  // Allocated size of _buffer.
+  // Non-null when used with an un-owned buffer.
+  uint8_t* buffer_;
+  // Allocated size of _buffer; relevant only if it's non-null.
+  size_t capacity_;
   uint32_t timestamp_rtp_ = 0;
   absl::optional<int> spatial_index_;
   absl::optional<webrtc::ColorSpace> color_space_;
diff --git a/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/modules/video_coding/codecs/h264/h264_decoder_impl.cc
index 9f4c696..73190d1 100644
--- a/modules/video_coding/codecs/h264/h264_decoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_decoder_impl.cc
@@ -259,12 +259,12 @@
   // "If the first 23 bits of the additional bytes are not 0, then damaged MPEG
   // bitstreams could cause overread and segfault." See
   // AV_INPUT_BUFFER_PADDING_SIZE. We'll zero the entire padding just in case.
-  memset(input_image.data() + input_image.size(), 0,
+  memset(input_image.mutable_data() + input_image.size(), 0,
          EncodedImage::GetBufferPaddingBytes(kVideoCodecH264));
 
   AVPacket packet;
   av_init_packet(&packet);
-  packet.data = input_image.data();
+  packet.data = input_image.mutable_data();
   if (input_image.size() >
       static_cast<size_t>(std::numeric_limits<int>::max())) {
     ReportError();
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc b/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
index af1c8a6..1afdf28 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoded_image_packer.cc
@@ -221,7 +221,7 @@
   for (size_t i = 0; i < images.size(); i++) {
     PackBitstream(combined_image.data() + frame_headers[i].bitstream_offset,
                   images[i]);
-    delete[] images[i].encoded_image.data();
+    delete[] images[i].encoded_image.buffer();
   }
 
   return combined_image;
@@ -263,7 +263,7 @@
     encoded_image.SetTimestamp(combined_image.Timestamp());
     encoded_image._frameType = frame_headers[i].frame_type;
     encoded_image.set_buffer(
-        combined_image.data() + frame_headers[i].bitstream_offset,
+        combined_image.mutable_data() + frame_headers[i].bitstream_offset,
         static_cast<size_t>(frame_headers[i].bitstream_length));
     const size_t padding =
         EncodedImage::GetBufferPaddingBytes(image_component.codec_type);
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 637bce5..fb588eb 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -253,8 +253,8 @@
     }
   }
   stashed_images_.clear();
-  if (combined_image_.data()) {
-    delete[] combined_image_.data();
+  if (combined_image_.buffer()) {
+    delete[] combined_image_.buffer();
     combined_image_.set_buffer(nullptr, 0);
   }
   return WEBRTC_VIDEO_CODEC_OK;
@@ -302,8 +302,8 @@
 
       // We have to send out those stashed frames, otherwise the delta frame
       // dependency chain is broken.
-      if (combined_image_.data())
-        delete[] combined_image_.data();
+      if (combined_image_.buffer())
+        delete[] combined_image_.buffer();
       combined_image_ =
           MultiplexEncodedImagePacker::PackAndRelease(iter->second);
 
diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc
index b30c032..c64d290 100644
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@@ -587,9 +587,9 @@
   copied_image.set_size(payload_size_bytes);
 
   // Replace previous EncodedImage for this spatial layer.
-  uint8_t* old_data = merged_encoded_frames_.at(spatial_idx).data();
-  if (old_data) {
-    delete[] old_data;
+  uint8_t* old_buffer = merged_encoded_frames_.at(spatial_idx).buffer();
+  if (old_buffer) {
+    delete[] old_buffer;
   }
   merged_encoded_frames_.at(spatial_idx) = copied_image;
 
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index 5e6a402..42e57a6 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -888,7 +888,7 @@
               encoded_images_[encoder_idx].capacity()) {
             uint8_t* buffer = new uint8_t[pkt->data.frame.sz + length];
             memcpy(buffer, encoded_images_[encoder_idx].data(), length);
-            delete[] encoded_images_[encoder_idx].data();
+            delete[] encoded_images_[encoder_idx].buffer();
             encoded_images_[encoder_idx].set_buffer(
                 buffer, pkt->data.frame.sz + length);
           }
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 3bf03fa..5feca0b 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -185,8 +185,8 @@
 int VP9EncoderImpl::Release() {
   int ret_val = WEBRTC_VIDEO_CODEC_OK;
 
-  if (encoded_image_.data() != nullptr) {
-    delete[] encoded_image_.data();
+  if (encoded_image_.buffer() != nullptr) {
+    delete[] encoded_image_.buffer();
     encoded_image_.set_buffer(nullptr, 0);
   }
   if (encoder_ != nullptr) {
@@ -1266,7 +1266,7 @@
   }
 
   if (pkt->data.frame.sz > encoded_image_.capacity()) {
-    delete[] encoded_image_.data();
+    delete[] encoded_image_.buffer();
     encoded_image_.set_buffer(new uint8_t[pkt->data.frame.sz],
                               pkt->data.frame.sz);
   }
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index aff9c5a..c18ef13 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -31,15 +31,7 @@
 }
 
 VCMEncodedFrame::~VCMEncodedFrame() {
-  Free();
-}
-
-void VCMEncodedFrame::Free() {
   Reset();
-  if (data() != nullptr) {
-    delete[] data();
-    set_buffer(nullptr, 0);
-  }
 }
 
 void VCMEncodedFrame::Reset() {
@@ -156,15 +148,10 @@
 void VCMEncodedFrame::VerifyAndAllocate(size_t minimumSize) {
   size_t old_capacity = capacity();
   if (minimumSize > old_capacity) {
-    // create buffer of sufficient size
-    uint8_t* old_data = data();
-
-    set_buffer(new uint8_t[minimumSize], minimumSize);
-    if (old_data) {
-      // copy old data
-      memcpy(data(), old_data, old_capacity);
-      delete[] old_data;
-    }
+    // TODO(nisse): EncodedImage::Allocate is implemented as
+    // std::vector::resize, which means that old contents is kept. Find out if
+    // any code depends on that behavior.
+    Allocate(minimumSize);
   }
 }
 
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index 2cdcff9..eeaea15 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -28,10 +28,6 @@
 
   ~VCMEncodedFrame();
   /**
-   *   Delete VideoFrame and resets members to zero
-   */
-  void Free();
-  /**
    *   Set render time in milliseconds
    */
   void SetRenderTime(const int64_t renderTimeMs) {
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index 1e0b647..c3c9f23 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -170,15 +170,11 @@
   // Since FFmpeg use an optimized bitstream reader that reads in chunks of
   // 32/64 bits we have to add at least that much padding to the buffer
   // to make sure the decoder doesn't read out of bounds.
-  // NOTE! EncodedImage::_size is the size of the buffer (think capacity of
-  //       an std::vector) and EncodedImage::_length is the actual size of
-  //       the bitstream (think size of an std::vector).
   size_t new_size = frame_size + (codec_type_ == kVideoCodecH264
                                       ? EncodedImage::kBufferPaddingBytesH264
                                       : 0);
   if (capacity() < new_size) {
-    delete[] data();
-    set_buffer(new uint8_t[new_size], new_size);
+    Allocate(new_size);
   }
 
   set_size(frame_size);
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index c65941a..5af14cc 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -82,7 +82,7 @@
     // Only store the base layer.
     if (encoded_image.SpatialIndex().value_or(0) == 0) {
       if (encoded_image._frameType == kVideoFrameKey) {
-        delete[] encoded_key_frame_.data();
+        delete[] encoded_key_frame_.buffer();
         encoded_key_frame_.set_buffer(new uint8_t[encoded_image.capacity()],
                                       encoded_image.capacity());
         encoded_key_frame_.set_size(encoded_image.size());
@@ -91,7 +91,7 @@
         memcpy(encoded_key_frame_.data(), encoded_image.data(),
                encoded_image.size());
       } else {
-        delete[] encoded_frame_.data();
+        delete[] encoded_frame_.buffer();
         encoded_frame_.set_buffer(new uint8_t[encoded_image.capacity()],
                                   encoded_image.capacity());
         encoded_frame_.set_size(encoded_image.size());
@@ -905,7 +905,7 @@
   EXPECT_EQ(0, decoder_->Decode(encoded_frame[2], false, NULL, 0));
 
   for (int i = 0; i < 3; ++i) {
-    delete[] encoded_frame[i].data();
+    delete[] encoded_frame[i].buffer();
   }
 }
 
diff --git a/test/fake_encoder.cc b/test/fake_encoder.cc
index c4cbfe1..85e6151 100644
--- a/test/fake_encoder.cc
+++ b/test/fake_encoder.cc
@@ -313,23 +313,26 @@
     const size_t kSpsNalHeader = 0x67;
     const size_t kPpsNalHeader = 0x68;
     const size_t kIdrNalHeader = 0x65;
-    encoded_image.data()[fragmentation.fragmentationOffset[0]] = kSpsNalHeader;
-    encoded_image.data()[fragmentation.fragmentationOffset[1]] = kPpsNalHeader;
-    encoded_image.data()[fragmentation.fragmentationOffset[2]] = kIdrNalHeader;
+    encoded_image.buffer()[fragmentation.fragmentationOffset[0]] =
+        kSpsNalHeader;
+    encoded_image.buffer()[fragmentation.fragmentationOffset[1]] =
+        kPpsNalHeader;
+    encoded_image.buffer()[fragmentation.fragmentationOffset[2]] =
+        kIdrNalHeader;
   } else {
     const size_t kNumSlices = 1;
     fragmentation.VerifyAndAllocateFragmentationHeader(kNumSlices);
     fragmentation.fragmentationOffset[0] = 0;
     fragmentation.fragmentationLength[0] = encoded_image.size();
     const size_t kNalHeader = 0x41;
-    encoded_image.data()[fragmentation.fragmentationOffset[0]] = kNalHeader;
+    encoded_image.buffer()[fragmentation.fragmentationOffset[0]] = kNalHeader;
   }
   uint8_t value = 0;
   int fragment_counter = 0;
   for (size_t i = 0; i < encoded_image.size(); ++i) {
     if (fragment_counter == fragmentation.fragmentationVectorSize ||
         i != fragmentation.fragmentationOffset[fragment_counter]) {
-      encoded_image.data()[i] = value++;
+      encoded_image.buffer()[i] = value++;
     } else {
       ++fragment_counter;
     }
diff --git a/test/fake_vp8_encoder.cc b/test/fake_vp8_encoder.cc
index b6a89b8..c368eb5 100644
--- a/test/fake_vp8_encoder.cc
+++ b/test/fake_vp8_encoder.cc
@@ -127,7 +127,7 @@
 
   // Write width and height to the payload the same way as the real encoder
   // does.
-  WriteFakeVp8(encoded_image.data(), encoded_image._encodedWidth,
+  WriteFakeVp8(encoded_image.buffer(), encoded_image._encodedWidth,
                encoded_image._encodedHeight,
                encoded_image._frameType == kVideoFrameKey);
   return callback_->OnEncodedImage(encoded_image, &overrided_specific_info,
diff --git a/video/video_receive_stream.cc b/video/video_receive_stream.cc
index 836ad94..4e6444a 100644
--- a/video/video_receive_stream.cc
+++ b/video/video_receive_stream.cc
@@ -124,16 +124,20 @@
  public:
   explicit EncodedFrameForMediaTransport(
       MediaTransportEncodedVideoFrame frame) {
-    // TODO(nisse): This is too ugly. We copy the EncodedImage (a base class of
-    // ours, in several steps), to get all the meta data. But we then need to
-    // reset the buffer and allocate a new copy, since EncodedFrame must own it.
+    // TODO(nisse): This is ugly. We copy the EncodedImage (a base class of
+    // ours, in several steps), to get all the meta data. We should be using
+    // std::move in some way. Then we also need to handle the case of an unowned
+    // buffer, in which case we need to make an owned copy.
     *static_cast<class EncodedImage*>(this) = frame.encoded_image();
-    // Don't use the copied _buffer pointer.
-    set_buffer(nullptr, 0);
 
-    VerifyAndAllocate(frame.encoded_image().size());
-    set_size(frame.encoded_image().size());
-    memcpy(data(), frame.encoded_image().data(), size());
+    if (buffer()) {
+      // Unowned data. Make a copy we own.
+      set_buffer(nullptr, 0);
+
+      VerifyAndAllocate(frame.encoded_image().size());
+      set_size(frame.encoded_image().size());
+      memcpy(data(), frame.encoded_image().data(), size());
+    }
 
     _payloadType = static_cast<uint8_t>(frame.payload_type());