Convert VP8 descriptor to generic descriptor.

Also adds a running picture id for the old generic format when
kVideoCodecGeneric is used (behind "WebRTC-GenericPictureId" field trial).

Bug: webrtc:9361
Change-Id: I6f232a2663bb60257c97ed3473eb07044d325b90
Reviewed-on: https://webrtc-review.googlesource.com/94842
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24449}
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index d61d40f..608688b 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -13,8 +13,10 @@
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
 #include "rtc_base/random.h"
 #include "rtc_base/timeutils.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
 
@@ -107,17 +109,26 @@
 
 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
                                    const RtpPayloadState* state)
-    : ssrc_(ssrc) {
+    : ssrc_(ssrc),
+      generic_picture_id_experiment_(
+          field_trial::IsEnabled("WebRTC-GenericPictureId")) {
+  for (auto& spatial_layer : last_shared_frame_id_)
+    spatial_layer.fill(-1);
+
   Random random(rtc::TimeMicros());
   state_.picture_id =
       state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
   state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
 }
+
+RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
+
 RtpPayloadParams::~RtpPayloadParams() {}
 
 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
     const EncodedImage& image,
-    const CodecSpecificInfo* codec_specific_info) {
+    const CodecSpecificInfo* codec_specific_info,
+    int64_t shared_frame_id) {
   RTPVideoHeader rtp_video_header;
   if (codec_specific_info) {
     PopulateRtpWithCodecSpecifics(*codec_specific_info, &rtp_video_header);
@@ -128,12 +139,15 @@
 
   SetVideoTiming(image, &rtp_video_header.video_timing);
 
-  // Sets picture id and tl0 pic idx.
+  const bool is_keyframe = image._frameType == kVideoFrameKey;
   const bool first_frame_in_picture =
       (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
           ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
           : true;
-  Set(&rtp_video_header, first_frame_in_picture);
+
+  SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
+  SetGeneric(shared_frame_id, is_keyframe, &rtp_video_header);
+
   return rtp_video_header;
 }
 
@@ -145,8 +159,8 @@
   return state_;
 }
 
-void RtpPayloadParams::Set(RTPVideoHeader* rtp_video_header,
-                           bool first_frame_in_picture) {
+void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
+                                        bool first_frame_in_picture) {
   // Always set picture id. Set tl0_pic_idx iff temporal index is set.
   if (first_frame_in_picture) {
     state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
@@ -179,5 +193,84 @@
       vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
     }
   }
+  // There are currently two generic descriptors in WebRTC. The old descriptor
+  // can not share a picture id space between simulcast streams, so we use the
+  // |picture_id| in this case. We let the |picture_id| tag along in |frame_id|
+  // until the old generic format can be removed.
+  // TODO(philipel): Remove this when the new generic format has been fully
+  //                 implemented.
+  if (generic_picture_id_experiment_ &&
+      rtp_video_header->codec == kVideoCodecGeneric) {
+    rtp_video_header->generic.emplace().frame_id = state_.picture_id;
+  }
 }
+
+void RtpPayloadParams::SetGeneric(int64_t frame_id,
+                                  bool is_keyframe,
+                                  RTPVideoHeader* rtp_video_header) {
+  if (rtp_video_header->codec == kVideoCodecVP8) {
+    Vp8ToGeneric(frame_id, is_keyframe, rtp_video_header);
+  }
+
+  // TODO(philipel): Implement VP9 to new generic descriptor.
+  // TODO(philipel): Implement H264 to new generic descriptor.
+  // TODO(philipel): Implement generic codec to new generic descriptor.
+}
+
+void RtpPayloadParams::Vp8ToGeneric(int64_t shared_frame_id,
+                                    bool is_keyframe,
+                                    RTPVideoHeader* rtp_video_header) {
+  const auto& vp8_header =
+      absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
+  const int spatial_index = 0;
+  const int temporal_index =
+      vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
+
+  if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
+      spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+    RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
+                           "used with generic frame descriptor.";
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& generic =
+      rtp_video_header->generic.emplace();
+
+  generic.frame_id = shared_frame_id;
+  generic.spatial_index = spatial_index;
+  generic.temporal_index = temporal_index;
+
+  if (is_keyframe) {
+    RTC_DCHECK_EQ(temporal_index, 0);
+    last_shared_frame_id_[spatial_index].fill(-1);
+    last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+    return;
+  }
+
+  if (vp8_header.layerSync) {
+    int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
+
+    for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
+      if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
+        last_shared_frame_id_[spatial_index][i] = -1;
+      }
+    }
+
+    RTC_DCHECK_GE(tl0_frame_id, 0);
+    RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
+    generic.dependencies.push_back(tl0_frame_id);
+  } else {
+    for (int i = 0; i <= temporal_index; ++i) {
+      int64_t frame_id = last_shared_frame_id_[spatial_index][i];
+
+      if (frame_id != -1) {
+        RTC_DCHECK_LT(frame_id, shared_frame_id);
+        generic.dependencies.push_back(frame_id);
+      }
+    }
+  }
+
+  last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
+}
+
 }  // namespace webrtc
diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h
index 0c71a7b..664ac17 100644
--- a/call/rtp_payload_params.h
+++ b/call/rtp_payload_params.h
@@ -17,6 +17,7 @@
 #include "api/video_codecs/video_encoder.h"
 #include "call/rtp_config.h"
 #include "common_types.h"  // NOLINT(build/include)
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
 
 namespace webrtc {
@@ -29,21 +30,36 @@
 class RtpPayloadParams final {
  public:
   RtpPayloadParams(const uint32_t ssrc, const RtpPayloadState* state);
+  RtpPayloadParams(const RtpPayloadParams& other);
   ~RtpPayloadParams();
 
-  RTPVideoHeader GetRtpVideoHeader(
-      const EncodedImage& image,
-      const CodecSpecificInfo* codec_specific_info);
+  RTPVideoHeader GetRtpVideoHeader(const EncodedImage& image,
+                                   const CodecSpecificInfo* codec_specific_info,
+                                   int64_t shared_frame_id);
 
   uint32_t ssrc() const;
 
   RtpPayloadState state() const;
 
  private:
-  void Set(RTPVideoHeader* rtp_video_header, bool first_frame_in_picture);
+  void SetCodecSpecific(RTPVideoHeader* rtp_video_header,
+                        bool first_frame_in_picture);
+  void SetGeneric(int64_t frame_id,
+                  bool is_keyframe,
+                  RTPVideoHeader* rtp_video_header);
 
+  void Vp8ToGeneric(int64_t shared_frame_id,
+                    bool is_keyframe,
+                    RTPVideoHeader* rtp_video_header);
+
+  // Holds the last shared frame id for a given (spatial, temporal) layer.
+  std::array<std::array<int64_t, RtpGenericFrameDescriptor::kMaxTemporalLayers>,
+             RtpGenericFrameDescriptor::kMaxSpatialLayers>
+      last_shared_frame_id_;
   const uint32_t ssrc_;
   RtpPayloadState state_;
+
+  const bool generic_picture_id_experiment_;
 };
 }  // namespace webrtc
 #endif  // CALL_RTP_PAYLOAD_PARAMS_H_
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index b2339cd..4d0b017 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -9,9 +9,11 @@
  */
 
 #include <memory>
+#include <set>
 
 #include "call/rtp_payload_params.h"
 #include "modules/video_coding/include/video_codec_interface.h"
+#include "test/field_trial.h"
 #include "test/gtest.h"
 
 namespace webrtc {
@@ -23,6 +25,7 @@
 const uint8_t kTemporalIdx = 1;
 const int16_t kInitialPictureId1 = 222;
 const int16_t kInitialTl0PicIdx1 = 99;
+const int64_t kDontCare = 0;
 }  // namespace
 
 TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp8) {
@@ -40,20 +43,28 @@
   memset(&codec_info, 0, sizeof(CodecSpecificInfo));
   codec_info.codecType = kVideoCodecVP8;
   codec_info.codecSpecific.VP8.simulcastIdx = 1;
-  codec_info.codecSpecific.VP8.temporalIdx = kTemporalIdx;
+  codec_info.codecSpecific.VP8.temporalIdx = 0;
   codec_info.codecSpecific.VP8.keyIdx = kNoKeyIdx;
-  codec_info.codecSpecific.VP8.layerSync = true;
+  codec_info.codecSpecific.VP8.layerSync = false;
   codec_info.codecSpecific.VP8.nonReference = true;
 
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
+
+  codec_info.codecType = kVideoCodecVP8;
+  codec_info.codecSpecific.VP8.simulcastIdx = 1;
+  codec_info.codecSpecific.VP8.temporalIdx = 1;
+  codec_info.codecSpecific.VP8.layerSync = true;
+
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, 1);
 
   EXPECT_EQ(kVideoRotation_90, header.rotation);
   EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
   EXPECT_EQ(1, header.simulcastIdx);
   EXPECT_EQ(kVideoCodecVP8, header.codec);
-  EXPECT_EQ(kPictureId + 1, header.vp8().pictureId);
+  EXPECT_EQ(kPictureId + 2, header.vp8().pictureId);
   EXPECT_EQ(kTemporalIdx, header.vp8().temporalIdx);
-  EXPECT_EQ(kTl0PicIdx, header.vp8().tl0PicIdx);
+  EXPECT_EQ(kTl0PicIdx + 1, header.vp8().tl0PicIdx);
   EXPECT_EQ(kNoKeyIdx, header.vp8().keyIdx);
   EXPECT_TRUE(header.vp8().layerSync);
   EXPECT_TRUE(header.vp8().nonReference);
@@ -78,7 +89,8 @@
   codec_info.codecSpecific.VP9.temporal_idx = 2;
   codec_info.codecSpecific.VP9.end_of_picture = false;
 
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoRotation_90, header.rotation);
   EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
@@ -99,7 +111,7 @@
   codec_info.codecSpecific.VP9.spatial_idx += 1;
   codec_info.codecSpecific.VP9.end_of_picture = true;
 
-  header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoRotation_90, header.rotation);
   EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
@@ -124,7 +136,8 @@
   codec_info.codecSpecific.H264.packetization_mode =
       H264PacketizationMode::SingleNalUnit;
 
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(0, header.simulcastIdx);
   EXPECT_EQ(kVideoCodecH264, header.codec);
@@ -144,7 +157,8 @@
   codec_info.codecSpecific.VP8.simulcastIdx = 0;
 
   RtpPayloadParams params(kSsrc1, &state);
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
   EXPECT_EQ(kVideoCodecVP8, header.codec);
   EXPECT_EQ(kInitialPictureId1 + 1, header.vp8().pictureId);
 
@@ -166,7 +180,8 @@
   codec_info.codecSpecific.VP8.temporalIdx = kNoTemporalIdx;
 
   RtpPayloadParams params(kSsrc1, &state);
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
   EXPECT_EQ(kVideoCodecVP8, header.codec);
   EXPECT_EQ(0, header.vp8().pictureId);
 
@@ -189,7 +204,8 @@
   codec_info.codecSpecific.VP8.temporalIdx = 1;
 
   RtpPayloadParams params(kSsrc1, &state);
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoCodecVP8, header.codec);
   EXPECT_EQ(kInitialPictureId1 + 1, header.vp8().pictureId);
@@ -198,7 +214,7 @@
   // OnEncodedImage, temporalIdx: 0.
   codec_info.codecSpecific.VP8.temporalIdx = 0;
 
-  header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
   EXPECT_EQ(kVideoCodecVP8, header.codec);
   EXPECT_EQ(kInitialPictureId1 + 2, header.vp8().pictureId);
   EXPECT_EQ(kInitialTl0PicIdx1 + 1, header.vp8().tl0PicIdx);
@@ -223,7 +239,8 @@
   codec_info.codecSpecific.VP9.first_frame_in_picture = true;
 
   RtpPayloadParams params(kSsrc1, &state);
-  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
   const auto& vp9_header =
@@ -234,7 +251,7 @@
   // OnEncodedImage, temporalIdx: 0.
   codec_info.codecSpecific.VP9.temporal_idx = 0;
 
-  header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
   EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
@@ -243,7 +260,7 @@
   // OnEncodedImage, first_frame_in_picture = false
   codec_info.codecSpecific.VP9.first_frame_in_picture = false;
 
-  header = params.GetRtpVideoHeader(encoded_image, &codec_info);
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
   EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
@@ -253,4 +270,112 @@
   EXPECT_EQ(kInitialPictureId1 + 2, params.state().picture_id);
   EXPECT_EQ(kInitialTl0PicIdx1 + 1, params.state().tl0_pic_idx);
 }
+
+TEST(RtpPayloadParamsTest, PictureIdForOldGenericFormat) {
+  test::ScopedFieldTrials generic_picture_id(
+      "WebRTC-GenericPictureId/Enabled/");
+  RtpPayloadState state{};
+
+  EncodedImage encoded_image;
+  CodecSpecificInfo codec_info{};
+  codec_info.codecType = kVideoCodecGeneric;
+
+  RtpPayloadParams params(kSsrc1, &state);
+  RTPVideoHeader header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
+
+  EXPECT_EQ(kVideoCodecGeneric, header.codec);
+  ASSERT_TRUE(header.generic);
+  EXPECT_EQ(0, header.generic->frame_id);
+
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
+  ASSERT_TRUE(header.generic);
+  EXPECT_EQ(1, header.generic->frame_id);
+}
+
+class RtpPayloadParamsVp8ToGenericTest : public ::testing::Test {
+ public:
+  enum LayerSync { kNoSync, kSync };
+
+  RtpPayloadParamsVp8ToGenericTest() : state_(), params_(123, &state_) {}
+
+  void ConvertAndCheck(int temporal_index,
+                       int64_t shared_frame_id,
+                       FrameType frame_type,
+                       LayerSync layer_sync,
+                       const std::set<int64_t>& expected_deps) {
+    EncodedImage encoded_image;
+    encoded_image._frameType = frame_type;
+
+    CodecSpecificInfo codec_info{};
+    codec_info.codecType = kVideoCodecVP8;
+    codec_info.codecSpecific.VP8.temporalIdx = temporal_index;
+    codec_info.codecSpecific.VP8.layerSync = layer_sync == kSync;
+
+    RTPVideoHeader header =
+        params_.GetRtpVideoHeader(encoded_image, &codec_info, shared_frame_id);
+
+    ASSERT_TRUE(header.generic);
+    EXPECT_TRUE(header.generic->higher_spatial_layers.empty());
+    EXPECT_EQ(header.generic->spatial_index, 0);
+
+    EXPECT_EQ(header.generic->frame_id, shared_frame_id);
+    EXPECT_EQ(header.generic->temporal_index, temporal_index);
+    std::set<int64_t> actual_deps(header.generic->dependencies.begin(),
+                                  header.generic->dependencies.end());
+    EXPECT_EQ(expected_deps, actual_deps);
+  }
+
+ protected:
+  RtpPayloadState state_;
+  RtpPayloadParams params_;
+};
+
+TEST_F(RtpPayloadParamsVp8ToGenericTest, Keyframe) {
+  ConvertAndCheck(0, 0, kVideoFrameKey, kNoSync, {});
+  ConvertAndCheck(0, 1, kVideoFrameDelta, kNoSync, {0});
+  ConvertAndCheck(0, 2, kVideoFrameKey, kNoSync, {});
+}
+
+TEST_F(RtpPayloadParamsVp8ToGenericTest, TooHighTemporalIndex) {
+  ConvertAndCheck(0, 0, kVideoFrameKey, kNoSync, {});
+
+  EncodedImage encoded_image;
+  encoded_image._frameType = kVideoFrameDelta;
+  CodecSpecificInfo codec_info{};
+  codec_info.codecType = kVideoCodecVP8;
+  codec_info.codecSpecific.VP8.temporalIdx =
+      RtpGenericFrameDescriptor::kMaxTemporalLayers;
+  codec_info.codecSpecific.VP8.layerSync = false;
+
+  RTPVideoHeader header =
+      params_.GetRtpVideoHeader(encoded_image, &codec_info, 1);
+  EXPECT_FALSE(header.generic);
+}
+
+TEST_F(RtpPayloadParamsVp8ToGenericTest, LayerSync) {
+  // 02120212 pattern
+  ConvertAndCheck(0, 0, kVideoFrameKey, kNoSync, {});
+  ConvertAndCheck(2, 1, kVideoFrameDelta, kNoSync, {0});
+  ConvertAndCheck(1, 2, kVideoFrameDelta, kNoSync, {0});
+  ConvertAndCheck(2, 3, kVideoFrameDelta, kNoSync, {0, 1, 2});
+
+  ConvertAndCheck(0, 4, kVideoFrameDelta, kNoSync, {0});
+  ConvertAndCheck(2, 5, kVideoFrameDelta, kNoSync, {2, 3, 4});
+  ConvertAndCheck(1, 6, kVideoFrameDelta, kSync, {4});  // layer sync
+  ConvertAndCheck(2, 7, kVideoFrameDelta, kNoSync, {4, 5, 6});
+}
+
+TEST_F(RtpPayloadParamsVp8ToGenericTest, FrameIdGaps) {
+  // 0101 pattern
+  ConvertAndCheck(0, 0, kVideoFrameKey, kNoSync, {});
+  ConvertAndCheck(1, 1, kVideoFrameDelta, kNoSync, {0});
+
+  ConvertAndCheck(0, 5, kVideoFrameDelta, kNoSync, {0});
+  ConvertAndCheck(1, 10, kVideoFrameDelta, kNoSync, {1, 5});
+
+  ConvertAndCheck(0, 15, kVideoFrameDelta, kNoSync, {5});
+  ConvertAndCheck(1, 20, kVideoFrameDelta, kNoSync, {10, 15});
+}
+
 }  // namespace webrtc
diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc
index 23116b2..723632b 100644
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@@ -319,10 +319,11 @@
   if (!active_)
     return Result(Result::ERROR_SEND_FAILED);
 
+  shared_frame_id_++;
   size_t stream_index = GetSimulcastIdx(codec_specific_info).value_or(0);
   RTC_DCHECK_LT(stream_index, rtp_modules_.size());
   RTPVideoHeader rtp_video_header = params_[stream_index].GetRtpVideoHeader(
-      encoded_image, codec_specific_info);
+      encoded_image, codec_specific_info, shared_frame_id_);
 
   uint32_t frame_id;
   if (!rtp_modules_[stream_index]->Sending()) {
diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic.cc b/modules/rtp_rtcp/source/rtp_format_video_generic.cc
index 1a8efc1..081a3bf 100644
--- a/modules/rtp_rtcp/source/rtp_format_video_generic.cc
+++ b/modules/rtp_rtcp/source/rtp_format_video_generic.cc
@@ -25,10 +25,10 @@
     FrameType frame_type,
     size_t max_payload_len,
     size_t last_packet_reduction_len)
-    : picture_id_(
-          rtp_video_header.frame_id != kNoPictureId
-              ? absl::optional<uint16_t>(rtp_video_header.frame_id & 0x7FFF)
-              : absl::nullopt),
+    : picture_id_(rtp_video_header.generic
+                      ? absl::optional<uint16_t>(
+                            rtp_video_header.generic->frame_id & 0x7FFF)
+                      : absl::nullopt),
       payload_data_(nullptr),
       payload_size_(0),
       max_payload_len_(max_payload_len - kGenericHeaderLength -
@@ -167,7 +167,8 @@
       RTC_LOG(LS_WARNING) << "Too short payload for generic header.";
       return false;
     }
-    parsed_payload->video_header().frame_id =
+    parsed_payload->video_header().generic.emplace();
+    parsed_payload->video_header().generic->frame_id =
         ((payload_data[0] & 0x7F) << 8) | payload_data[1];
     payload_data += kExtendedHeaderLength;
     payload_data_length -= kExtendedHeaderLength;
diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
index 35ffa67..e77dabf 100644
--- a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
@@ -195,7 +195,7 @@
   const size_t kPayloadSize = 13;
 
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.frame_id = 37;
+  rtp_video_header.generic.emplace().frame_id = 37;
   RtpPacketizerGeneric packetizer(rtp_video_header, kVideoFrameKey,
                                   kMaxPayloadLen, kLastPacketReductionLen);
   packetizer.SetPayloadData(kTestPayload, kPayloadSize, nullptr);
@@ -216,7 +216,7 @@
   const size_t kPayloadSize = 13;
 
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.frame_id = 0x8137;
+  rtp_video_header.generic.emplace().frame_id = 0x8137;
   RtpPacketizerGeneric packetizer(rtp_video_header, kVideoFrameKey,
                                   kMaxPayloadLen, kLastPacketReductionLen);
   packetizer.SetPayloadData(kTestPayload, kPayloadSize, nullptr);
@@ -255,7 +255,7 @@
   RtpDepacketizer::ParsedPayload parsed_payload;
   depacketizer.Parse(&parsed_payload, payload, kPayloadLen);
 
-  EXPECT_EQ(kNoPictureId, parsed_payload.video_header().frame_id);
+  EXPECT_FALSE(parsed_payload.video_header().generic);
 }
 
 TEST(RtpDepacketizerVideoGeneric, ExtendedHeaderParsesFrameId) {
@@ -266,7 +266,8 @@
   RtpDepacketizer::ParsedPayload parsed_payload;
   depacketizer.Parse(&parsed_payload, payload, kPayloadLen);
 
-  EXPECT_EQ(0x1337, parsed_payload.video_header().frame_id);
+  ASSERT_TRUE(parsed_payload.video_header().generic);
+  EXPECT_EQ(0x1337, parsed_payload.video_header().generic->frame_id);
 }
 
 }  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc
index 080cac7..b5b8ce5 100644
--- a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc
+++ b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc
@@ -14,7 +14,9 @@
 
 namespace webrtc {
 
-constexpr size_t RtpGenericFrameDescriptor::kMaxNumFrameDependencies;
+constexpr int RtpGenericFrameDescriptor::kMaxNumFrameDependencies;
+constexpr int RtpGenericFrameDescriptor::kMaxTemporalLayers;
+constexpr int RtpGenericFrameDescriptor::kMaxSpatialLayers;
 
 RtpGenericFrameDescriptor::RtpGenericFrameDescriptor() = default;
 
@@ -25,7 +27,7 @@
 
 void RtpGenericFrameDescriptor::SetTemporalLayer(int temporal_layer) {
   RTC_DCHECK_GE(temporal_layer, 0);
-  RTC_DCHECK_LE(temporal_layer, 7);
+  RTC_DCHECK_LT(temporal_layer, kMaxTemporalLayers);
   temporal_layer_ = temporal_layer;
 }
 
diff --git a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h
index 51a9ac0..e4b775e 100644
--- a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h
+++ b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h
@@ -20,6 +20,10 @@
 // Data to put on the wire for FrameDescriptor rtp header extension.
 class RtpGenericFrameDescriptor {
  public:
+  static constexpr int kMaxNumFrameDependencies = 8;
+  static constexpr int kMaxTemporalLayers = 8;
+  static constexpr int kMaxSpatialLayers = 8;
+
   RtpGenericFrameDescriptor();
 
   bool FirstPacketInSubFrame() const { return beginning_of_subframe_; }
@@ -51,8 +55,6 @@
   bool AddFrameDependencyDiff(uint16_t fdiff);
 
  private:
-  static constexpr size_t kMaxNumFrameDependencies = 8;
-
   bool beginning_of_subframe_ = false;
   bool end_of_subframe_ = false;
   bool beginning_of_frame_ = false;
diff --git a/modules/rtp_rtcp/source/rtp_video_header.cc b/modules/rtp_rtcp/source/rtp_video_header.cc
index cab0d2e..a3ee8ba 100644
--- a/modules/rtp_rtcp/source/rtp_video_header.cc
+++ b/modules/rtp_rtcp/source/rtp_video_header.cc
@@ -16,4 +16,9 @@
 RTPVideoHeader::RTPVideoHeader(const RTPVideoHeader& other) = default;
 RTPVideoHeader::~RTPVideoHeader() = default;
 
+RTPVideoHeader::GenericDescriptorInfo::GenericDescriptorInfo() = default;
+RTPVideoHeader::GenericDescriptorInfo::GenericDescriptorInfo(
+    const GenericDescriptorInfo& other) = default;
+RTPVideoHeader::GenericDescriptorInfo::~GenericDescriptorInfo() = default;
+
 }  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h
index edeb48d..74b48ca 100644
--- a/modules/rtp_rtcp/source/rtp_video_header.h
+++ b/modules/rtp_rtcp/source/rtp_video_header.h
@@ -25,6 +25,18 @@
     absl::variant<RTPVideoHeaderVP8, RTPVideoHeaderVP9, RTPVideoHeaderH264>;
 
 struct RTPVideoHeader {
+  struct GenericDescriptorInfo {
+    GenericDescriptorInfo();
+    GenericDescriptorInfo(const GenericDescriptorInfo& other);
+    ~GenericDescriptorInfo();
+
+    int64_t frame_id = 0;
+    int spatial_index = 0;
+    int temporal_index = 0;
+    absl::InlinedVector<int64_t, 5> dependencies;
+    absl::InlinedVector<int, 5> higher_spatial_layers;
+  };
+
   RTPVideoHeader();
   RTPVideoHeader(const RTPVideoHeader& other);
 
@@ -45,12 +57,7 @@
     return absl::get<RTPVideoHeaderVP8>(video_type_header);
   }
 
-  // Information for generic codec descriptor.
-  int64_t frame_id = kNoPictureId;
-  int spatial_index = 0;
-  int temporal_index = 0;
-  absl::InlinedVector<int64_t, 5> dependencies;
-  absl::InlinedVector<int, 5> higher_spatial_layers;
+  absl::optional<GenericDescriptorInfo> generic;
 
   uint16_t width = 0;
   uint16_t height = 0;