Pass explicit frame dependency information to RtpPayloadParams

Prior to this CL, RtpPayloadParams had code that assumed
dependency patterns in VP8, in order to write that information
into the [Generic Frame Descriptor] RTP extension.

This CL starts moving that code out of RtpPayloadParams.
Upcoming CLs will migrate additional encoder-wrappers to
the new scheme, then remove the deprecated code.

Bug: webrtc:10249
Change-Id: I5fc84aedf8e11f79d52b989ff8b7ce9568b6cf32
Reviewed-on: https://webrtc-review.googlesource.com/c/119958
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Elad Alon <eladalon@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26438}
diff --git a/api/video_codecs/vp8_temporal_layers.h b/api/video_codecs/vp8_temporal_layers.h
index 70223a4..ecc7e3d 100644
--- a/api/video_codecs/vp8_temporal_layers.h
+++ b/api/video_codecs/vp8_temporal_layers.h
@@ -14,6 +14,8 @@
 #include <memory>
 #include <vector>
 
+#include "rtc_base/checks.h"
+
 namespace webrtc {
 
 // Some notes on the prerequisites of the TemporalLayers interface.
@@ -99,6 +101,38 @@
                 BufferFlags arf,
                 FreezeEntropy);
 
+    enum class Buffer : int { kLast = 0, kGolden = 1, kArf = 2, kCount };
+
+    bool References(Buffer buffer) const {
+      switch (buffer) {
+        case Buffer::kLast:
+          return (last_buffer_flags & kReference) != 0;
+        case Buffer::kGolden:
+          return (golden_buffer_flags & kReference) != 0;
+        case Buffer::kArf:
+          return (arf_buffer_flags & kReference) != 0;
+        case Buffer::kCount:
+          break;
+      }
+      RTC_NOTREACHED();
+      return false;
+    }
+
+    bool Updates(Buffer buffer) const {
+      switch (buffer) {
+        case Buffer::kLast:
+          return (last_buffer_flags & kUpdate) != 0;
+        case Buffer::kGolden:
+          return (golden_buffer_flags & kUpdate) != 0;
+        case Buffer::kArf:
+          return (arf_buffer_flags & kUpdate) != 0;
+        case Buffer::kCount:
+          break;
+      }
+      RTC_NOTREACHED();
+      return false;
+    }
+
     bool drop_frame;
     BufferFlags last_buffer_flags;
     BufferFlags golden_buffer_flags;
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index a0f123c..93ef654 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -11,9 +11,9 @@
 #include "call/rtp_payload_params.h"
 
 #include <stddef.h>
+#include <algorithm>
 
 #include "absl/container/inlined_vector.h"
-#include "absl/types/optional.h"
 #include "absl/types/variant.h"
 #include "api/video/video_timing.h"
 #include "common_types.h"  // NOLINT(build/include)
@@ -21,7 +21,7 @@
 #include "modules/video_coding/codecs/interface/common_constants.h"
 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
-#include "modules/video_coding/include/video_codec_interface.h"
+#include "rtc_base/arraysize.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/random.h"
@@ -132,6 +132,8 @@
   for (auto& spatial_layer : last_shared_frame_id_)
     spatial_layer.fill(-1);
 
+  buffer_id_to_frame_id_.fill(-1);
+
   Random random(rtc::TimeMicros());
   state_.picture_id =
       state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
@@ -170,7 +172,8 @@
   SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
 
   if (generic_descriptor_experiment_)
-    SetGeneric(shared_frame_id, is_keyframe, &rtp_video_header);
+    SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
+               &rtp_video_header);
 
   return rtp_video_header;
 }
@@ -231,19 +234,33 @@
   }
 }
 
-void RtpPayloadParams::SetGeneric(int64_t frame_id,
+void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
+                                  int64_t frame_id,
                                   bool is_keyframe,
                                   RTPVideoHeader* rtp_video_header) {
-  if (rtp_video_header->codec == kVideoCodecVP8) {
-    Vp8ToGeneric(frame_id, is_keyframe, rtp_video_header);
+  switch (rtp_video_header->codec) {
+    case VideoCodecType::kVideoCodecGeneric:
+      // TODO(philipel): Implement generic codec to new generic descriptor.
+      return;
+    case VideoCodecType::kVideoCodecVP8:
+      if (codec_specific_info) {
+        Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
+                     is_keyframe, rtp_video_header);
+      }
+      return;
+    case VideoCodecType::kVideoCodecVP9:
+      // TODO(philipel): Implement VP9 to new generic descriptor.
+      return;
+    case VideoCodecType::kVideoCodecH264:
+      // TODO(philipel): Implement H264 to new generic descriptor.
+    case VideoCodecType::kVideoCodecMultiplex:
+      return;
   }
-
-  // TODO(philipel): Implement VP9 to new generic descriptor.
-  // TODO(philipel): Implement H264 to new generic descriptor.
-  // TODO(philipel): Implement generic codec to new generic descriptor.
+  RTC_NOTREACHED() << "Unsupported codec.";
 }
 
-void RtpPayloadParams::Vp8ToGeneric(int64_t shared_frame_id,
+void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
+                                    int64_t shared_frame_id,
                                     bool is_keyframe,
                                     RTPVideoHeader* rtp_video_header) {
   const auto& vp8_header =
@@ -266,6 +283,28 @@
   generic.spatial_index = spatial_index;
   generic.temporal_index = temporal_index;
 
+  if (vp8_info.useExplicitDependencies) {
+    SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
+                          vp8_header.layerSync, &generic);
+  } else {
+    SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
+                                 spatial_index, temporal_index,
+                                 vp8_header.layerSync, &generic);
+  }
+}
+
+void RtpPayloadParams::SetDependenciesVp8Deprecated(
+    const CodecSpecificInfoVP8& vp8_info,
+    int64_t shared_frame_id,
+    bool is_keyframe,
+    int spatial_index,
+    int temporal_index,
+    bool layer_sync,
+    RTPVideoHeader::GenericDescriptorInfo* generic) {
+  RTC_DCHECK(!vp8_info.useExplicitDependencies);
+  RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
+  new_version_used_ = false;
+
   if (is_keyframe) {
     RTC_DCHECK_EQ(temporal_index, 0);
     last_shared_frame_id_[spatial_index].fill(-1);
@@ -273,7 +312,7 @@
     return;
   }
 
-  if (vp8_header.layerSync) {
+  if (layer_sync) {
     int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
 
     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
@@ -284,14 +323,14 @@
 
     RTC_DCHECK_GE(tl0_frame_id, 0);
     RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
-    generic.dependencies.push_back(tl0_frame_id);
+    generic->dependencies.push_back(tl0_frame_id);
   } else {
     for (int i = 0; i <= temporal_index; ++i) {
       int64_t frame_id = last_shared_frame_id_[spatial_index][i];
 
       if (frame_id != -1) {
         RTC_DCHECK_LT(frame_id, shared_frame_id);
-        generic.dependencies.push_back(frame_id);
+        generic->dependencies.push_back(frame_id);
       }
     }
   }
@@ -299,4 +338,53 @@
   last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
 }
 
+void RtpPayloadParams::SetDependenciesVp8New(
+    const CodecSpecificInfoVP8& vp8_info,
+    int64_t shared_frame_id,
+    bool is_keyframe,
+    bool layer_sync,
+    RTPVideoHeader::GenericDescriptorInfo* generic) {
+  RTC_DCHECK(vp8_info.useExplicitDependencies);
+  RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
+  new_version_used_ = true;
+
+  if (is_keyframe) {
+    RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
+    buffer_id_to_frame_id_.fill(shared_frame_id);
+    return;
+  }
+
+  constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
+
+  RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
+  RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
+                arraysize(vp8_info.referencedBuffers));
+
+  for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
+    const size_t referenced_buffer = vp8_info.referencedBuffers[i];
+    RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
+    RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
+
+    const int64_t dependency_frame_id =
+        buffer_id_to_frame_id_[referenced_buffer];
+    RTC_DCHECK_GE(dependency_frame_id, 0);
+    RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
+
+    const bool is_new_dependency =
+        std::find(generic->dependencies.begin(), generic->dependencies.end(),
+                  dependency_frame_id) == generic->dependencies.end();
+    if (is_new_dependency) {
+      generic->dependencies.push_back(dependency_frame_id);
+    }
+  }
+
+  RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
+  for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
+    const size_t updated_id = vp8_info.updatedBuffers[i];
+    buffer_id_to_frame_id_[updated_id] = shared_frame_id;
+  }
+
+  RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
+}
+
 }  // namespace webrtc
diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h
index b91bfe4..605e294 100644
--- a/call/rtp_payload_params.h
+++ b/call/rtp_payload_params.h
@@ -11,13 +11,14 @@
 #ifndef CALL_RTP_PAYLOAD_PARAMS_H_
 #define CALL_RTP_PAYLOAD_PARAMS_H_
 
-#include <map>
-#include <vector>
+#include <array>
 
+#include "absl/types/optional.h"
 #include "api/video_codecs/video_encoder.h"
 #include "call/rtp_config.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
+#include "modules/video_coding/include/video_codec_interface.h"
 
 namespace webrtc {
 
@@ -43,18 +44,53 @@
  private:
   void SetCodecSpecific(RTPVideoHeader* rtp_video_header,
                         bool first_frame_in_picture);
-  void SetGeneric(int64_t frame_id,
+  void SetGeneric(const CodecSpecificInfo* codec_specific_info,
+                  int64_t frame_id,
                   bool is_keyframe,
                   RTPVideoHeader* rtp_video_header);
 
-  void Vp8ToGeneric(int64_t shared_frame_id,
+  void Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
+                    int64_t shared_frame_id,
                     bool is_keyframe,
                     RTPVideoHeader* rtp_video_header);
 
+  // TODO(bugs.webrtc.org/10242): Delete SetDependenciesVp8Deprecated() and move
+  // the logic in SetDependenciesVp8New() into Vp8ToGeneric() once all hardware
+  // wrappers have been updated.
+  void SetDependenciesVp8Deprecated(
+      const CodecSpecificInfoVP8& vp8_info,
+      int64_t shared_frame_id,
+      bool is_keyframe,
+      int spatial_index,
+      int temporal_index,
+      bool layer_sync,
+      RTPVideoHeader::GenericDescriptorInfo* generic);
+  void SetDependenciesVp8New(const CodecSpecificInfoVP8& vp8_info,
+                             int64_t shared_frame_id,
+                             bool is_keyframe,
+                             bool layer_sync,
+                             RTPVideoHeader::GenericDescriptorInfo* generic);
+
+  // TODO(bugs.webrtc.org/10242): Remove once all encoder-wrappers are updated.
   // Holds the last shared frame id for a given (spatial, temporal) layer.
   std::array<std::array<int64_t, RtpGenericFrameDescriptor::kMaxTemporalLayers>,
              RtpGenericFrameDescriptor::kMaxSpatialLayers>
       last_shared_frame_id_;
+
+  // TODO(eladalon): When additional codecs are supported,
+  // set kMaxCodecBuffersCount to the max() of these codecs' buffer count.
+  static constexpr size_t kMaxCodecBuffersCount =
+      CodecSpecificInfoVP8::kBuffersCount;
+
+  // Maps buffer IDs to the frame-ID stored in them.
+  std::array<int64_t, kMaxCodecBuffersCount> buffer_id_to_frame_id_;
+
+  // Until we remove SetDependenciesVp8Deprecated(), we should make sure
+  // that, for a given object, we either always use
+  // SetDependenciesVp8Deprecated(), or always use SetDependenciesVp8New().
+  // TODO(bugs.webrtc.org/10242): Remove.
+  absl::optional<bool> new_version_used_;
+
   const uint32_t ssrc_;
   RtpPayloadState state_;
 
diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/modules/video_coding/codecs/vp8/default_temporal_layers.cc
index c5e163f..a372603 100644
--- a/modules/video_coding/codecs/vp8/default_temporal_layers.cc
+++ b/modules/video_coding/codecs/vp8/default_temporal_layers.cc
@@ -20,11 +20,13 @@
 #include "modules/include/module_common_types.h"
 #include "modules/video_coding/codecs/vp8/default_temporal_layers.h"
 #include "modules/video_coding/include/video_codec_interface.h"
+#include "rtc_base/arraysize.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
+using Buffer = Vp8TemporalLayers::FrameConfig::Buffer;
 
 Vp8TemporalLayers::FrameConfig::FrameConfig()
     : FrameConfig(kNone, kNone, kNone, false) {}
@@ -512,6 +514,24 @@
     }
   }
 
+  vp8_info->useExplicitDependencies = true;
+  RTC_DCHECK_EQ(vp8_info->referencedBuffersCount, 0u);
+  RTC_DCHECK_EQ(vp8_info->updatedBuffersCount, 0u);
+
+  for (int i = 0; i < static_cast<int>(Buffer::kCount); ++i) {
+    if (!is_keyframe && frame.frame_config.References(static_cast<Buffer>(i))) {
+      RTC_DCHECK_LT(vp8_info->referencedBuffersCount,
+                    arraysize(CodecSpecificInfoVP8::referencedBuffers));
+      vp8_info->referencedBuffers[vp8_info->referencedBuffersCount++] = i;
+    }
+
+    if (is_keyframe || frame.frame_config.Updates(static_cast<Buffer>(i))) {
+      RTC_DCHECK_LT(vp8_info->updatedBuffersCount,
+                    arraysize(CodecSpecificInfoVP8::updatedBuffers));
+      vp8_info->updatedBuffers[vp8_info->updatedBuffersCount++] = i;
+    }
+  }
+
   if (!frame.expired) {
     for (Vp8BufferReference buffer : kAllBuffers) {
       if (frame.updated_buffer_mask & static_cast<uint8_t>(buffer)) {
diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
index a7bcbe6..c3291ab 100644
--- a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
+++ b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc
@@ -12,6 +12,7 @@
 
 #include <cstdint>
 
+#include "absl/memory/memory.h"
 #include "api/video/video_bitrate_allocation.h"
 #include "api/video_codecs/video_codec.h"
 #include "common_types.h"  // NOLINT(build/include)
@@ -83,12 +84,24 @@
 
 using BufferFlags = Vp8TemporalLayers::BufferFlags;
 
-TEST(TemporalLayersTest, 2Layers) {
+class TemporalLayersTest : public ::testing::Test {
+ public:
+  ~TemporalLayersTest() override = default;
+
+  CodecSpecificInfoVP8* IgnoredCodecSpecificInfoVp8() {
+    codec_specific_info_ = absl::make_unique<CodecSpecificInfo>();
+    return &codec_specific_info_->codecSpecific.VP8;
+  }
+
+ private:
+  std::unique_ptr<CodecSpecificInfo> codec_specific_info_;
+};
+
+TEST_F(TemporalLayersTest, 2Layers) {
   constexpr int kNumLayers = 2;
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -121,6 +134,8 @@
 
   uint32_t timestamp = 0;
   for (int i = 0; i < 16; ++i) {
+    CodecSpecificInfo info;
+    CodecSpecificInfoVP8& vp8_info = info.codecSpecific.VP8;
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
     EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
@@ -135,12 +150,11 @@
   }
 }
 
-TEST(TemporalLayersTest, 3Layers) {
+TEST_F(TemporalLayersTest, 3Layers) {
   constexpr int kNumLayers = 3;
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -173,6 +187,8 @@
 
   unsigned int timestamp = 0;
   for (int i = 0; i < 16; ++i) {
+    CodecSpecificInfo info;
+    CodecSpecificInfoVP8& vp8_info = info.codecSpecific.VP8;
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
     EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
@@ -187,13 +203,12 @@
   }
 }
 
-TEST(TemporalLayersTest, Alternative3Layers) {
+TEST_F(TemporalLayersTest, Alternative3Layers) {
   constexpr int kNumLayers = 3;
   ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/");
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -214,6 +229,8 @@
 
   unsigned int timestamp = 0;
   for (int i = 0; i < 8; ++i) {
+    CodecSpecificInfo info;
+    CodecSpecificInfoVP8& vp8_info = info.codecSpecific.VP8;
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
     EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
@@ -228,13 +245,12 @@
   }
 }
 
-TEST(TemporalLayersTest, SearchOrder) {
+TEST_F(TemporalLayersTest, SearchOrder) {
   constexpr int kNumLayers = 3;
   ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/");
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -247,19 +263,19 @@
   uint32_t timestamp = 0;
   Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame. First one only references TL0. Updates altref.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast);
   EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone);
 
   // TL1 frame. Can only reference TL0. Updated golden.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast);
   EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone);
 
@@ -267,18 +283,17 @@
   // updated, the next to last was altref.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kGolden);
   EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kAltref);
 }
 
-TEST(TemporalLayersTest, SearchOrderWithDrop) {
+TEST_F(TemporalLayersTest, SearchOrderWithDrop) {
   constexpr int kNumLayers = 3;
   ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/");
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -291,12 +306,12 @@
   uint32_t timestamp = 0;
   Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame. First one only references TL0. Updates altref.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast);
   EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone);
 
@@ -308,17 +323,16 @@
   // been populated this cycle. Altref was last to be updated, before that last.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kAltref);
   EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kLast);
 }
 
-TEST(TemporalLayersTest, 4Layers) {
+TEST_F(TemporalLayersTest, 4Layers) {
   constexpr int kNumLayers = 4;
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -350,6 +364,8 @@
 
   uint32_t timestamp = 0;
   for (int i = 0; i < 16; ++i) {
+    CodecSpecificInfo info;
+    CodecSpecificInfoVP8& vp8_info = info.codecSpecific.VP8;
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
     EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i;
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, i == 0, kDefaultQp,
@@ -364,7 +380,7 @@
   }
 }
 
-TEST(TemporalLayersTest, DoesNotReferenceDroppedFrames) {
+TEST_F(TemporalLayersTest, DoesNotReferenceDroppedFrames) {
   constexpr int kNumLayers = 3;
   // Use a repeating pattern of tl 0, 2, 1, 2.
   // Tl 0, 1, 2 update last, golden, altref respectively.
@@ -372,7 +388,6 @@
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -382,7 +397,7 @@
   uint32_t timestamp = 0;
   Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Dropped TL2 frame.
   tl_config = tl.UpdateLayerConfig(++timestamp);
@@ -396,7 +411,7 @@
   // both contain the last keyframe.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference);
   EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference);
   EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference);
@@ -406,23 +421,23 @@
   // TL0 base layer frame, updating and referencing last.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame, updating altref.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL1 frame, updating golden.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame. Can still reference all buffer since they have been update this
   // cycle.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference);
   EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference);
   EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference);
@@ -432,7 +447,7 @@
   // TL0 base layer frame, updating and referencing last.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Dropped TL2 frame.
   tl_config = tl.UpdateLayerConfig(++timestamp);
@@ -446,20 +461,19 @@
   // and cannot be referenced.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference);
   EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference);
   EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference);
 }
 
-TEST(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExist) {
+TEST_F(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExist) {
   constexpr int kNumLayers = 3;
   // Use a repeating pattern of tl 0, 2, 1, 2.
   // Tl 0, 1 updates last, golden respectively. Altref is always last keyframe.
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -469,24 +483,24 @@
   uint32_t timestamp = 0;
   Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Do a full cycle of the pattern.
   for (int i = 0; i < 7; ++i) {
     tl_config = tl.UpdateLayerConfig(++timestamp);
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                    &vp8_info);
+                    IgnoredCodecSpecificInfoVp8());
   }
 
   // TL0 base layer frame, starting the cycle over.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Encoder has a hiccup and builds a queue, so frame encoding is delayed.
   // TL1 frame, updating golden.
@@ -506,13 +520,13 @@
   // buffers are now OK to reference.
   // Enqueued TL1 frame ready.
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   // Enqueued TL2 frame.
   tl.OnEncodeDone(++timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   // Enqueued TL0 frame.
   tl.OnEncodeDone(++timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame, all buffers are now in a known good state, OK to reference.
   tl_config = tl.UpdateLayerConfig(++timestamp + 1);
@@ -521,7 +535,7 @@
   EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference);
 }
 
-TEST(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExistLongDelay) {
+TEST_F(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExistLongDelay) {
   constexpr int kNumLayers = 3;
   // Use a repeating pattern of tl 0, 2, 1, 2.
   // Tl 0, 1 updates last, golden, altref respectively.
@@ -529,7 +543,6 @@
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -539,24 +552,24 @@
   uint32_t timestamp = 0;
   Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Do a full cycle of the pattern.
   for (int i = 0; i < 3; ++i) {
     tl_config = tl.UpdateLayerConfig(++timestamp);
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                    &vp8_info);
+                    IgnoredCodecSpecificInfoVp8());
   }
 
   // TL0 base layer frame, starting the cycle over.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame.
   tl_config = tl.UpdateLayerConfig(++timestamp);
   tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // Encoder has a hiccup and builds a queue, so frame encoding is delayed.
   // Encoded, but delayed frames in TL 1, 2.
@@ -571,10 +584,10 @@
 
   // TL1 frame from last cycle is ready.
   tl.OnEncodeDone(timestamp + 1, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
   // TL2 frame from last cycle is ready.
   tl.OnEncodeDone(timestamp + 2, kDefaultBytesPerFrame, false, kDefaultQp,
-                  &vp8_info);
+                  IgnoredCodecSpecificInfoVp8());
 
   // TL2 frame, that should be referencing all buffers, but altref and golden
   // haven not been updated this cycle. (Don't be fooled by the late frames from
@@ -585,12 +598,11 @@
   EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference);
 }
 
-TEST(TemporalLayersTest, KeyFrame) {
+TEST_F(TemporalLayersTest, KeyFrame) {
   constexpr int kNumLayers = 3;
   DefaultTemporalLayers tl(kNumLayers);
   DefaultTemporalLayersChecker checker(kNumLayers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_info;
   tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame,
                                           kDefaultFramerate, kNumLayers),
                     kDefaultFramerate);
@@ -622,7 +634,7 @@
       EXPECT_EQ(expected_flags[j], LibvpxVp8Encoder::EncodeFlags(tl_config))
           << j;
       tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, false, kDefaultQp,
-                      &vp8_info);
+                      IgnoredCodecSpecificInfoVp8());
       EXPECT_TRUE(checker.CheckTemporalConfig(false, tl_config));
       EXPECT_EQ(expected_temporal_idx[j], tl_config.packetizer_temporal_idx);
       EXPECT_EQ(expected_temporal_idx[j], tl_config.encoder_layer_id);
@@ -630,6 +642,8 @@
       timestamp += 3000;
     }
 
+    CodecSpecificInfo info;
+    CodecSpecificInfoVP8& vp8_info = info.codecSpecific.VP8;
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp);
     tl.OnEncodeDone(timestamp, kDefaultBytesPerFrame, true, kDefaultQp,
                     &vp8_info);
@@ -640,7 +654,8 @@
   }
 }
 
-class TemporalLayersReferenceTest : public ::testing::TestWithParam<int> {
+class TemporalLayersReferenceTest : public TemporalLayersTest,
+                                    public ::testing::WithParamInterface<int> {
  public:
   TemporalLayersReferenceTest()
       : timestamp_(1),
@@ -706,7 +721,6 @@
   const int num_layers = GetParam();
   DefaultTemporalLayers tl(num_layers);
   Vp8EncoderConfig cfg;
-  CodecSpecificInfoVP8 vp8_specifics;
   tl.OnRatesUpdated(
       GetTemporalLayerRates(kDefaultBytesPerFrame, kDefaultFramerate, 1),
       kDefaultFramerate);
@@ -721,7 +735,7 @@
   for (int i = 0; i < kMaxPatternLength; ++i) {
     Vp8TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp_);
     tl.OnEncodeDone(timestamp_, kDefaultBytesPerFrame, i == 0, kDefaultQp,
-                    &vp8_specifics);
+                    IgnoredCodecSpecificInfoVp8());
     ++timestamp_;
     EXPECT_FALSE(tl_config.drop_frame);
     tl_configs.push_back(tl_config);
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index d6e696d..052963c 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -24,15 +24,33 @@
 
 class RTPFragmentationHeader;  // forward declaration
 
-// Note: if any pointers are added to this struct, it must be fitted
+// Note: If any pointers are added to this struct, it must be fitted
 // with a copy-constructor. See below.
+// Hack alert - the code assumes that thisstruct is memset when constructed.
 struct CodecSpecificInfoVP8 {
   bool nonReference;
   uint8_t temporalIdx;
   bool layerSync;
   int8_t keyIdx;  // Negative value to skip keyIdx.
-};
 
+  // Used to generate the list of dependency frames.
+  // |referencedBuffers| and |updatedBuffers| contain buffer IDs.
+  // Note that the buffer IDs here have a one-to-one mapping with the actual
+  // codec buffers, but the exact mapping (i.e. whether 0 refers to Last,
+  // to Golden or to Arf) is not pre-determined.
+  // More references may be specified than are strictly necessary, but not less.
+  // TODO(bugs.webrtc.org/10242): Remove |useExplicitDependencies| once all
+  // encoder-wrappers are updated.
+  bool useExplicitDependencies;
+  static constexpr size_t kBuffersCount = 3;
+  size_t referencedBuffers[kBuffersCount];
+  size_t referencedBuffersCount;
+  size_t updatedBuffers[kBuffersCount];
+  size_t updatedBuffersCount;
+};
+static_assert(std::is_pod<CodecSpecificInfoVP8>::value, "");
+
+// Hack alert - the code assumes that thisstruct is memset when constructed.
 struct CodecSpecificInfoVP9 {
   bool first_frame_in_picture;  // First frame, increment picture_id.
   bool inter_pic_predicted;     // This layer frame is dependent on previously
@@ -60,18 +78,22 @@
 
   bool end_of_picture;
 };
+static_assert(std::is_pod<CodecSpecificInfoVP9>::value, "");
 
+// Hack alert - the code assumes that thisstruct is memset when constructed.
 struct CodecSpecificInfoH264 {
   H264PacketizationMode packetization_mode;
 };
+static_assert(std::is_pod<CodecSpecificInfoH264>::value, "");
 
 union CodecSpecificInfoUnion {
   CodecSpecificInfoVP8 VP8;
   CodecSpecificInfoVP9 VP9;
   CodecSpecificInfoH264 H264;
 };
+static_assert(std::is_pod<CodecSpecificInfoUnion>::value, "");
 
-// Note: if any pointers are added to this struct or its sub-structs, it
+// Note: If any pointers are added to this struct or its sub-structs, it
 // must be fitted with a copy-constructor. This is because it is copied
 // in the copy-constructor of VCMEncodedFrame.
 struct CodecSpecificInfo {