Fix potential unsafe access to VCMTimestampMap::data

The access to |_timestampMap| was guarded by a lock but
not the access to the data pointer stored in |_timestampMap|.
There was a potential race condition if new data was added
in VCMGenericDecoder::Decode() while the data pointer
retrieved from _timestampMap.Pop() was being used in
VCMDecodedFrameCallback::Decoded().

This CL moves the storage of data to within |_timestampMap|,
instead of being a pointer so that it's guarded by the same
lock.

Bug: webrtc:11229
Change-Id: I3f2afb568ed724db5719d508a73de402c4531dec
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209361
Commit-Queue: Johannes Kron <kron@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33374}
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 0de3b4c..2a50436 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -171,6 +171,7 @@
     "../../api:sequence_checker",
     "../../api/units:data_rate",
     "../../api/units:time_delta",
+    "../../api/units:timestamp",
     "../../api/video:builtin_video_bitrate_allocator_factory",
     "../../api/video:encoded_frame",
     "../../api/video:encoded_image",
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index 3d48a3e..bd13d21 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -91,7 +91,7 @@
                        "timestamp", decodedImage.timestamp());
   // TODO(holmer): We should improve this so that we can handle multiple
   // callbacks from one call to Decode().
-  VCMFrameInformation* frameInfo;
+  absl::optional<VCMFrameInformation> frameInfo;
   int timestamp_map_size = 0;
   {
     MutexLock lock(&lock_);
@@ -99,7 +99,7 @@
     timestamp_map_size = _timestampMap.Size();
   }
 
-  if (frameInfo == NULL) {
+  if (!frameInfo) {
     RTC_LOG(LS_WARNING) << "Too many frames backed up in the decoder, dropping "
                            "this one.";
     _receiveCallback->OnDroppedFrames(1);
@@ -196,14 +196,14 @@
 }
 
 void VCMDecodedFrameCallback::Map(uint32_t timestamp,
-                                  VCMFrameInformation* frameInfo) {
+                                  const VCMFrameInformation& frameInfo) {
   MutexLock lock(&lock_);
   _timestampMap.Add(timestamp, frameInfo);
 }
 
 int32_t VCMDecodedFrameCallback::Pop(uint32_t timestamp) {
   MutexLock lock(&lock_);
-  if (_timestampMap.Pop(timestamp) == NULL) {
+  if (_timestampMap.Pop(timestamp) == absl::nullopt) {
     return VCM_GENERAL_ERROR;
   }
   _receiveCallback->OnDroppedFrames(1);
@@ -215,8 +215,6 @@
 
 VCMGenericDecoder::VCMGenericDecoder(VideoDecoder* decoder, bool isExternal)
     : _callback(NULL),
-      _frameInfos(),
-      _nextFrameInfoIdx(0),
       decoder_(decoder),
       _codecType(kVideoCodecGeneric),
       _isExternal(isExternal),
@@ -249,26 +247,25 @@
 int32_t VCMGenericDecoder::Decode(const VCMEncodedFrame& frame, Timestamp now) {
   TRACE_EVENT1("webrtc", "VCMGenericDecoder::Decode", "timestamp",
                frame.Timestamp());
-  _frameInfos[_nextFrameInfoIdx].decodeStart = now;
-  _frameInfos[_nextFrameInfoIdx].renderTimeMs = frame.RenderTimeMs();
-  _frameInfos[_nextFrameInfoIdx].rotation = frame.rotation();
-  _frameInfos[_nextFrameInfoIdx].timing = frame.video_timing();
-  _frameInfos[_nextFrameInfoIdx].ntp_time_ms =
-      frame.EncodedImage().ntp_time_ms_;
-  _frameInfos[_nextFrameInfoIdx].packet_infos = frame.PacketInfos();
+  VCMFrameInformation frame_info;
+  frame_info.decodeStart = now;
+  frame_info.renderTimeMs = frame.RenderTimeMs();
+  frame_info.rotation = frame.rotation();
+  frame_info.timing = frame.video_timing();
+  frame_info.ntp_time_ms = frame.EncodedImage().ntp_time_ms_;
+  frame_info.packet_infos = frame.PacketInfos();
 
   // Set correctly only for key frames. Thus, use latest key frame
   // content type. If the corresponding key frame was lost, decode will fail
   // and content type will be ignored.
   if (frame.FrameType() == VideoFrameType::kVideoFrameKey) {
-    _frameInfos[_nextFrameInfoIdx].content_type = frame.contentType();
+    frame_info.content_type = frame.contentType();
     _last_keyframe_content_type = frame.contentType();
   } else {
-    _frameInfos[_nextFrameInfoIdx].content_type = _last_keyframe_content_type;
+    frame_info.content_type = _last_keyframe_content_type;
   }
-  _callback->Map(frame.Timestamp(), &_frameInfos[_nextFrameInfoIdx]);
+  _callback->Map(frame.Timestamp(), frame_info);
 
-  _nextFrameInfoIdx = (_nextFrameInfoIdx + 1) % kDecoderFrameMemoryLength;
   int32_t ret = decoder_->Decode(frame.EncodedImage(), frame.MissingFrame(),
                                  frame.RenderTimeMs());
   VideoDecoder::DecoderInfo decoder_info = decoder_->GetDecoderInfo();
diff --git a/modules/video_coding/generic_decoder.h b/modules/video_coding/generic_decoder.h
index b595323..2ff6b20 100644
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@@ -29,18 +29,6 @@
 
 enum { kDecoderFrameMemoryLength = 10 };
 
-struct VCMFrameInformation {
-  int64_t renderTimeMs;
-  absl::optional<Timestamp> decodeStart;
-  void* userData;
-  VideoRotation rotation;
-  VideoContentType content_type;
-  EncodedImage::Timing timing;
-  int64_t ntp_time_ms;
-  RtpPacketInfos packet_infos;
-  // ColorSpace is not stored here, as it might be modified by decoders.
-};
-
 class VCMDecodedFrameCallback : public DecodedImageCallback {
  public:
   VCMDecodedFrameCallback(VCMTiming* timing, Clock* clock);
@@ -56,7 +44,7 @@
 
   void OnDecoderImplementationName(const char* implementation_name);
 
-  void Map(uint32_t timestamp, VCMFrameInformation* frameInfo);
+  void Map(uint32_t timestamp, const VCMFrameInformation& frameInfo);
   int32_t Pop(uint32_t timestamp);
 
  private:
@@ -117,7 +105,6 @@
  private:
   VCMDecodedFrameCallback* _callback;
   VCMFrameInformation _frameInfos[kDecoderFrameMemoryLength];
-  uint32_t _nextFrameInfoIdx;
   std::unique_ptr<VideoDecoder> decoder_;
   VideoCodecType _codecType;
   const bool _isExternal;
diff --git a/modules/video_coding/timestamp_map.cc b/modules/video_coding/timestamp_map.cc
index d79075f..7762e36 100644
--- a/modules/video_coding/timestamp_map.cc
+++ b/modules/video_coding/timestamp_map.cc
@@ -24,7 +24,7 @@
 
 VCMTimestampMap::~VCMTimestampMap() {}
 
-void VCMTimestampMap::Add(uint32_t timestamp, VCMFrameInformation* data) {
+void VCMTimestampMap::Add(uint32_t timestamp, const VCMFrameInformation& data) {
   ring_buffer_[next_add_idx_].timestamp = timestamp;
   ring_buffer_[next_add_idx_].data = data;
   next_add_idx_ = (next_add_idx_ + 1) % capacity_;
@@ -35,18 +35,18 @@
   }
 }
 
-VCMFrameInformation* VCMTimestampMap::Pop(uint32_t timestamp) {
+absl::optional<VCMFrameInformation> VCMTimestampMap::Pop(uint32_t timestamp) {
   while (!IsEmpty()) {
     if (ring_buffer_[next_pop_idx_].timestamp == timestamp) {
       // Found start time for this timestamp.
-      VCMFrameInformation* data = ring_buffer_[next_pop_idx_].data;
-      ring_buffer_[next_pop_idx_].data = nullptr;
+      const VCMFrameInformation& data = ring_buffer_[next_pop_idx_].data;
+      ring_buffer_[next_pop_idx_].timestamp = 0;
       next_pop_idx_ = (next_pop_idx_ + 1) % capacity_;
       return data;
     } else if (IsNewerTimestamp(ring_buffer_[next_pop_idx_].timestamp,
                                 timestamp)) {
       // The timestamp we are looking for is not in the list.
-      return nullptr;
+      return absl::nullopt;
     }
 
     // Not in this position, check next (and forget this position).
@@ -54,7 +54,7 @@
   }
 
   // Could not find matching timestamp in list.
-  return nullptr;
+  return absl::nullopt;
 }
 
 bool VCMTimestampMap::IsEmpty() const {
diff --git a/modules/video_coding/timestamp_map.h b/modules/video_coding/timestamp_map.h
index cfa1257..86ce55b 100644
--- a/modules/video_coding/timestamp_map.h
+++ b/modules/video_coding/timestamp_map.h
@@ -13,23 +13,41 @@
 
 #include <memory>
 
+#include "absl/types/optional.h"
+#include "api/rtp_packet_infos.h"
+#include "api/units/timestamp.h"
+#include "api/video/encoded_image.h"
+#include "api/video/video_content_type.h"
+#include "api/video/video_rotation.h"
+#include "api/video/video_timing.h"
+
 namespace webrtc {
 
-struct VCMFrameInformation;
+struct VCMFrameInformation {
+  int64_t renderTimeMs;
+  absl::optional<Timestamp> decodeStart;
+  void* userData;
+  VideoRotation rotation;
+  VideoContentType content_type;
+  EncodedImage::Timing timing;
+  int64_t ntp_time_ms;
+  RtpPacketInfos packet_infos;
+  // ColorSpace is not stored here, as it might be modified by decoders.
+};
 
 class VCMTimestampMap {
  public:
   explicit VCMTimestampMap(size_t capacity);
   ~VCMTimestampMap();
 
-  void Add(uint32_t timestamp, VCMFrameInformation* data);
-  VCMFrameInformation* Pop(uint32_t timestamp);
+  void Add(uint32_t timestamp, const VCMFrameInformation& data);
+  absl::optional<VCMFrameInformation> Pop(uint32_t timestamp);
   size_t Size() const;
 
  private:
   struct TimestampDataTuple {
     uint32_t timestamp;
-    VCMFrameInformation* data;
+    VCMFrameInformation data;
   };
   bool IsEmpty() const;