Calculate VP9 generic info from vp9 specific info Encoder wrapper can provide more accurate vp9 generic info, but each vp9 encoder wrapper would need to fill this structure. Inserting this code into the call allows to have some generic info for all vp9 encoder wrappers. Bug: webrtc:11999 Change-Id: I82490d24454815aa29bbb1c86f351e0b37292d59 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/214491 Reviewed-by: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33836}

commit: af366443b71c14f9c367f2db873dd10734088e69 [log] [tgz]
author: Danil Chapovalov <danilchap@webrtc.org> Thu Apr 22 13:20:28 2021
committer: Commit Bot <commit-bot@chromium.org> Mon Apr 26 17:49:59 2021
tree: 3c71da71ece6c2994cf50203af6ca9fe3b717973
parent: a839cca2bc77a267007bde0f4d64179d407ab7e7 [diff]
diff --git a/call/BUILD.gn b/call/BUILD.gn
index 0e3e89d..b7cac1c 100644
--- a/call/BUILD.gn
+++ b/call/BUILD.gn

@@ -454,6 +454,7 @@
         "../test:audio_codec_mocks",
         "../test:direct_transport",
         "../test:encoder_settings",
+        "../test:explicit_key_value_config",
         "../test:fake_video_codecs",
         "../test:field_trial",
         "../test:mock_frame_transformer",

diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index 18b1138..0bcebb0 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc

@@ -131,6 +131,9 @@
     : ssrc_(ssrc),
       generic_picture_id_experiment_(
           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
+                           "Enabled")),
+      simulate_generic_vp9_(
+          absl::StartsWith(trials.Lookup("WebRTC-Vp9DependencyDescriptor"),
                            "Enabled")) {
   for (auto& spatial_layer : last_shared_frame_id_)
     spatial_layer.fill(-1);
@@ -277,8 +280,13 @@
       }
       return;
     case VideoCodecType::kVideoCodecVP9:
+      if (simulate_generic_vp9_ && codec_specific_info != nullptr) {
+        Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
+                     *rtp_video_header);
+      }
+      return;
     case VideoCodecType::kVideoCodecAV1:
-      // TODO(philipel): Implement VP9 and AV1 to generic descriptor.
+      // TODO(philipel): Implement AV1 to generic descriptor.
       return;
     case VideoCodecType::kVideoCodecH264:
       if (codec_specific_info) {
@@ -399,6 +407,111 @@
   }
 }
 
+void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
+                                    int64_t shared_frame_id,
+                                    RTPVideoHeader& rtp_video_header) {
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
+  const int num_spatial_layers = vp9_header.num_spatial_layers;
+  const int num_temporal_layers = kMaxTemporalStreams;
+
+  int spatial_index =
+      vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
+  int temporal_index =
+      vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
+
+  if (spatial_index >= num_spatial_layers ||
+      temporal_index >= num_temporal_layers ||
+      num_spatial_layers > RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+    // Prefer to generate no generic layering than an inconsistent one.
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& result =
+      rtp_video_header.generic.emplace();
+
+  result.frame_id = shared_frame_id;
+  result.spatial_index = spatial_index;
+  result.temporal_index = temporal_index;
+
+  result.decode_target_indications.reserve(num_spatial_layers *
+                                           num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      DecodeTargetIndication dti;
+      if (sid < spatial_index || tid < temporal_index) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (spatial_index != sid &&
+                 vp9_header.non_ref_for_inter_layer_pred) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (sid == spatial_index && tid == temporal_index) {
+        // Assume that if frame is decodable, all of its own layer is decodable.
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (!vp9_header.inter_pic_predicted) {
+        // Key frame or spatial upswitch
+        dti = DecodeTargetIndication::kSwitch;
+      } else {
+        // Make no other assumptions. That should be safe, though suboptimal.
+        // To provide more accurate dti, encoder wrapper should fill in
+        // CodecSpecificInfo::generic_frame_info
+        dti = DecodeTargetIndication::kRequired;
+      }
+      result.decode_target_indications.push_back(dti);
+    }
+  }
+
+  // Calculate frame dependencies.
+  static constexpr int kPictureDiffLimit = 128;
+  if (last_vp9_frame_id_.empty()) {
+    // Create the array only if it is ever used.
+    last_vp9_frame_id_.resize(kPictureDiffLimit);
+  }
+  if (vp9_header.inter_layer_predicted && spatial_index > 0) {
+    result.dependencies.push_back(
+        last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+                          [spatial_index - 1]);
+  }
+  if (vp9_header.inter_pic_predicted) {
+    for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
+      // picture_id is 15 bit number that wraps around. Though undeflow may
+      // produce picture that exceeds 2^15, it is ok because in this
+      // code block only last 7 bits of the picture_id are used.
+      uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+      result.dependencies.push_back(
+          last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+    }
+  }
+  last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
+      shared_frame_id;
+
+  // Calculate chains, asuming chain includes all frames with temporal_id = 0
+  if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
+    // Assume frames without dependencies also reset chains.
+    for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
+      chain_last_frame_id_[sid] = -1;
+    }
+  }
+  result.chain_diffs.resize(num_spatial_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    if (chain_last_frame_id_[sid] == -1) {
+      result.chain_diffs[sid] = 0;
+      continue;
+    }
+    result.chain_diffs[sid] = shared_frame_id - chain_last_frame_id_[sid];
+  }
+
+  if (temporal_index == 0) {
+    chain_last_frame_id_[spatial_index] = shared_frame_id;
+    if (!vp9_header.non_ref_for_inter_layer_pred) {
+      for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
+        chain_last_frame_id_[sid] = shared_frame_id;
+      }
+    }
+  }
+}
+
 void RtpPayloadParams::SetDependenciesVp8Deprecated(
     const CodecSpecificInfoVP8& vp8_info,
     int64_t shared_frame_id,

diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h
index ebfdd46..2f37400 100644
--- a/call/rtp_payload_params.h
+++ b/call/rtp_payload_params.h

@@ -12,6 +12,7 @@
 #define CALL_RTP_PAYLOAD_PARAMS_H_
 
 #include <array>
+#include <vector>
 
 #include "absl/types/optional.h"
 #include "api/transport/webrtc_key_value_config.h"
@@ -61,6 +62,10 @@
                     bool is_keyframe,
                     RTPVideoHeader* rtp_video_header);
 
+  void Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
+                    int64_t shared_frame_id,
+                    RTPVideoHeader& rtp_video_header);
+
   void H264ToGeneric(const CodecSpecificInfoH264& h264_info,
                      int64_t shared_frame_id,
                      bool is_keyframe,
@@ -94,6 +99,13 @@
   std::array<std::array<int64_t, RtpGenericFrameDescriptor::kMaxTemporalLayers>,
              RtpGenericFrameDescriptor::kMaxSpatialLayers>
       last_shared_frame_id_;
+  // circular buffer of frame ids for the last 128 vp9 pictures.
+  // ids for the `picture_id` are stored at the index `picture_id % 128`.
+  std::vector<std::array<int64_t, RtpGenericFrameDescriptor::kMaxSpatialLayers>>
+      last_vp9_frame_id_;
+  // Last frame id for each chain
+  std::array<int64_t, RtpGenericFrameDescriptor::kMaxSpatialLayers>
+      chain_last_frame_id_;
 
   // TODO(eladalon): When additional codecs are supported,
   // set kMaxCodecBuffersCount to the max() of these codecs' buffer count.
@@ -113,6 +125,7 @@
   RtpPayloadState state_;
 
   const bool generic_picture_id_experiment_;
+  const bool simulate_generic_vp9_;
 };
 }  // namespace webrtc
 #endif  // CALL_RTP_PAYLOAD_PARAMS_H_

diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index 56ed2cd..7db38db 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc

@@ -26,10 +26,12 @@
 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
 #include "modules/video_coding/include/video_codec_interface.h"
+#include "test/explicit_key_value_config.h"
 #include "test/field_trial.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
 
+using ::testing::Each;
 using ::testing::ElementsAre;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
@@ -461,6 +463,410 @@
   ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15});
 }
 
+class RtpPayloadParamsVp9ToGenericTest : public ::testing::Test {
+ protected:
+  RtpPayloadParamsVp9ToGenericTest()
+      : field_trials_("WebRTC-Vp9DependencyDescriptor/Enabled/") {}
+
+  test::ExplicitKeyValueConfig field_trials_;
+  RtpPayloadState state_;
+};
+
+TEST_F(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
+  RtpPayloadParams params(/*ssrc=*/123, &state_, field_trials_);
+
+  EncodedImage encoded_image;
+  CodecSpecificInfo codec_info;
+  codec_info.codecType = kVideoCodecVP9;
+  codec_info.codecSpecific.VP9.num_spatial_layers = 1;
+  codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
+  codec_info.codecSpecific.VP9.first_frame_in_picture = true;
+  codec_info.end_of_picture = true;
+
+  // Key frame.
+  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+  codec_info.codecSpecific.VP9.inter_pic_predicted = false;
+  codec_info.codecSpecific.VP9.num_ref_pics = 0;
+  RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info,
+                                                   /*shared_frame_id=*/1);
+
+  ASSERT_TRUE(header.generic);
+  EXPECT_EQ(header.generic->spatial_index, 0);
+  EXPECT_EQ(header.generic->temporal_index, 0);
+  EXPECT_EQ(header.generic->frame_id, 1);
+  ASSERT_THAT(header.generic->decode_target_indications, Not(IsEmpty()));
+  EXPECT_EQ(header.generic->decode_target_indications[0],
+            DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(header.generic->dependencies, IsEmpty());
+  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0));
+
+  // Delta frame.
+  encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
+  codec_info.codecSpecific.VP9.inter_pic_predicted = true;
+  codec_info.codecSpecific.VP9.num_ref_pics = 1;
+  codec_info.codecSpecific.VP9.p_diff[0] = 1;
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info,
+                                    /*shared_frame_id=*/3);
+
+  ASSERT_TRUE(header.generic);
+  EXPECT_EQ(header.generic->spatial_index, 0);
+  EXPECT_EQ(header.generic->temporal_index, 0);
+  EXPECT_EQ(header.generic->frame_id, 3);
+  ASSERT_THAT(header.generic->decode_target_indications, Not(IsEmpty()));
+  EXPECT_EQ(header.generic->decode_target_indications[0],
+            DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(header.generic->dependencies, ElementsAre(1));
+  // previous frame in the chain was frame#1,
+  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3 - 1));
+}
+
+TEST_F(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
+  // Test with 2 temporal layers structure that is not used by webrtc:
+  //    1---3   5
+  //   /   /   /   ...
+  //  0---2---4---
+  RtpPayloadParams params(/*ssrc=*/123, &state_, field_trials_);
+
+  EncodedImage image;
+  CodecSpecificInfo info;
+  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.num_spatial_layers = 1;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = true;
+
+  RTPVideoHeader headers[6];
+  // Key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
+
+  // Delta frames.
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 1;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
+
+  info.codecSpecific.VP9.temporal_up_switch = false;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 2;
+  headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
+
+  info.codecSpecific.VP9.temporal_up_switch = false;
+  info.codecSpecific.VP9.temporal_idx = 1;
+  info.codecSpecific.VP9.num_ref_pics = 2;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  info.codecSpecific.VP9.p_diff[1] = 2;
+  headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 2;
+  headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/9);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 1;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/11);
+
+  ASSERT_TRUE(headers[0].generic);
+  int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  ASSERT_GE(num_decode_targets, 2);
+
+  for (int frame_idx = 0; frame_idx < 6; ++frame_idx) {
+    const RTPVideoHeader& header = headers[frame_idx];
+    ASSERT_TRUE(header.generic);
+    EXPECT_EQ(header.generic->spatial_index, 0);
+    EXPECT_EQ(header.generic->temporal_index, frame_idx % 2);
+    EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
+    ASSERT_THAT(header.generic->decode_target_indications,
+                SizeIs(num_decode_targets));
+    // Expect only T0 frames are needed for the 1st decode target.
+    if (header.generic->temporal_index == 0) {
+      EXPECT_NE(header.generic->decode_target_indications[0],
+                DecodeTargetIndication::kNotPresent);
+    } else {
+      EXPECT_EQ(header.generic->decode_target_indications[0],
+                DecodeTargetIndication::kNotPresent);
+    }
+    // Expect all frames are needed for the 2nd decode target.
+    EXPECT_NE(header.generic->decode_target_indications[1],
+              DecodeTargetIndication::kNotPresent);
+  }
+
+  // Expect switch at every beginning of the pattern.
+  EXPECT_THAT(headers[0].generic->decode_target_indications,
+              Each(DecodeTargetIndication::kSwitch));
+  EXPECT_THAT(headers[4].generic->decode_target_indications,
+              Each(DecodeTargetIndication::kSwitch));
+
+  EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());          // T0, 1
+  EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));     // T1, 3
+  EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1));     // T0, 5
+  EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(5, 3));  // T1, 7
+  EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(5));     // T0, 9
+  EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9));     // T1, 11
+
+  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0));
+  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2));
+  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4));
+  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2));
+  EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(4));
+  EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2));
+}
+
+TEST_F(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
+  // Test with 3 temporal layers structure that is not used by webrtc, but used
+  // by chromium: https://imgur.com/pURAGvp
+  RtpPayloadParams params(/*ssrc=*/123, &state_, field_trials_);
+
+  EncodedImage image;
+  CodecSpecificInfo info;
+  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.num_spatial_layers = 1;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = true;
+
+  RTPVideoHeader headers[9];
+  // Key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
+
+  // Delta frames.
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 2;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 1;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 2;
+  headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 2;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
+
+  info.codecSpecific.VP9.temporal_up_switch = false;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 4;
+  headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/9);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 2;
+  info.codecSpecific.VP9.num_ref_pics = 2;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  info.codecSpecific.VP9.p_diff[1] = 3;
+  headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/11);
+
+  info.codecSpecific.VP9.temporal_up_switch = false;
+  info.codecSpecific.VP9.temporal_idx = 1;
+  info.codecSpecific.VP9.num_ref_pics = 2;
+  info.codecSpecific.VP9.p_diff[0] = 2;
+  info.codecSpecific.VP9.p_diff[1] = 4;
+  headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/13);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 2;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/15);
+
+  info.codecSpecific.VP9.temporal_up_switch = true;
+  info.codecSpecific.VP9.temporal_idx = 0;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 4;
+  headers[8] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/17);
+
+  ASSERT_TRUE(headers[0].generic);
+  int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  ASSERT_GE(num_decode_targets, 3);
+
+  for (int frame_idx = 0; frame_idx < 9; ++frame_idx) {
+    const RTPVideoHeader& header = headers[frame_idx];
+    ASSERT_TRUE(header.generic);
+    EXPECT_EQ(header.generic->spatial_index, 0);
+    EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
+    ASSERT_THAT(header.generic->decode_target_indications,
+                SizeIs(num_decode_targets));
+    // Expect only T0 frames are needed for the 1st decode target.
+    if (header.generic->temporal_index == 0) {
+      EXPECT_NE(header.generic->decode_target_indications[0],
+                DecodeTargetIndication::kNotPresent);
+    } else {
+      EXPECT_EQ(header.generic->decode_target_indications[0],
+                DecodeTargetIndication::kNotPresent);
+    }
+    // Expect only T0 and T1 frames are needed for the 2nd decode target.
+    if (header.generic->temporal_index <= 1) {
+      EXPECT_NE(header.generic->decode_target_indications[1],
+                DecodeTargetIndication::kNotPresent);
+    } else {
+      EXPECT_EQ(header.generic->decode_target_indications[1],
+                DecodeTargetIndication::kNotPresent);
+    }
+    // Expect all frames are needed for the 3rd decode target.
+    EXPECT_NE(header.generic->decode_target_indications[2],
+              DecodeTargetIndication::kNotPresent);
+  }
+
+  EXPECT_EQ(headers[0].generic->temporal_index, 0);
+  EXPECT_EQ(headers[1].generic->temporal_index, 2);
+  EXPECT_EQ(headers[2].generic->temporal_index, 1);
+  EXPECT_EQ(headers[3].generic->temporal_index, 2);
+  EXPECT_EQ(headers[4].generic->temporal_index, 0);
+  EXPECT_EQ(headers[5].generic->temporal_index, 2);
+  EXPECT_EQ(headers[6].generic->temporal_index, 1);
+  EXPECT_EQ(headers[7].generic->temporal_index, 2);
+  EXPECT_EQ(headers[8].generic->temporal_index, 0);
+
+  // Expect switch at every beginning of the pattern.
+  EXPECT_THAT(headers[0].generic->decode_target_indications,
+              Each(DecodeTargetIndication::kSwitch));
+  EXPECT_THAT(headers[8].generic->decode_target_indications,
+              Each(DecodeTargetIndication::kSwitch));
+
+  EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());          // T0, 1
+  EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));     // T2, 3
+  EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1));     // T1, 5
+  EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(5));     // T2, 7
+  EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(1));     // T0, 9
+  EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9, 5));  // T2, 11
+  EXPECT_THAT(headers[6].generic->dependencies, ElementsAre(9, 5));  // T1, 13
+  EXPECT_THAT(headers[7].generic->dependencies, ElementsAre(13));    // T2, 15
+  EXPECT_THAT(headers[8].generic->dependencies, ElementsAre(9));     // T0, 17
+
+  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0));
+  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2));
+  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4));
+  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(6));
+  EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(8));
+  EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2));
+  EXPECT_THAT(headers[6].generic->chain_diffs, ElementsAre(4));
+  EXPECT_THAT(headers[7].generic->chain_diffs, ElementsAre(6));
+  EXPECT_THAT(headers[8].generic->chain_diffs, ElementsAre(8));
+}
+
+TEST_F(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
+  //  1---3--
+  //  |     ...
+  //  0---2--
+  RtpPayloadParams params(/*ssrc=*/123, &state_, field_trials_);
+
+  EncodedImage image;
+  CodecSpecificInfo info;
+  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.num_spatial_layers = 2;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+
+  RTPVideoHeader headers[4];
+  // Key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.inter_layer_predicted = false;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = false;
+  headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
+
+  image.SetSpatialIndex(1);
+  info.codecSpecific.VP9.inter_layer_predicted = true;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.first_frame_in_picture = false;
+  info.end_of_picture = true;
+  headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
+
+  // Delta frames.
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_layer_predicted = false;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = false;
+  headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
+
+  image.SetSpatialIndex(1);
+  info.codecSpecific.VP9.inter_layer_predicted = false;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.first_frame_in_picture = false;
+  info.end_of_picture = true;
+  headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
+
+  ASSERT_TRUE(headers[0].generic);
+  int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  // Rely on implementation detail there are always kMaxTemporalStreams temporal
+  // layers assumed, in particular assume Decode Target#0 matches layer S0T0,
+  // and Decode Target#kMaxTemporalStreams matches layer S1T0.
+  ASSERT_EQ(num_decode_targets, kMaxTemporalStreams * 2);
+
+  for (int frame_idx = 0; frame_idx < 4; ++frame_idx) {
+    const RTPVideoHeader& header = headers[frame_idx];
+    ASSERT_TRUE(header.generic);
+    EXPECT_EQ(header.generic->spatial_index, frame_idx % 2);
+    EXPECT_EQ(header.generic->temporal_index, 0);
+    EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
+    ASSERT_THAT(header.generic->decode_target_indications,
+                SizeIs(num_decode_targets));
+  }
+
+  // Expect S0 key frame is switch for both Decode Targets.
+  EXPECT_EQ(headers[0].generic->decode_target_indications[0],
+            DecodeTargetIndication::kSwitch);
+  EXPECT_EQ(headers[0].generic->decode_target_indications[kMaxTemporalStreams],
+            DecodeTargetIndication::kSwitch);
+  // S1 key frame is only needed for the 2nd Decode Targets.
+  EXPECT_EQ(headers[1].generic->decode_target_indications[0],
+            DecodeTargetIndication::kNotPresent);
+  EXPECT_NE(headers[1].generic->decode_target_indications[kMaxTemporalStreams],
+            DecodeTargetIndication::kNotPresent);
+  // Delta frames are only needed for their own Decode Targets.
+  EXPECT_NE(headers[2].generic->decode_target_indications[0],
+            DecodeTargetIndication::kNotPresent);
+  EXPECT_EQ(headers[2].generic->decode_target_indications[kMaxTemporalStreams],
+            DecodeTargetIndication::kNotPresent);
+  EXPECT_EQ(headers[3].generic->decode_target_indications[0],
+            DecodeTargetIndication::kNotPresent);
+  EXPECT_NE(headers[3].generic->decode_target_indications[kMaxTemporalStreams],
+            DecodeTargetIndication::kNotPresent);
+
+  EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());       // S0, 1
+  EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));  // S1, 3
+  EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1));  // S0, 5
+  EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(3));  // S1, 7
+
+  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0, 0));
+  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2, 2));
+  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4, 2));
+  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2, 4));
+}
+
 class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
  public:
   enum LayerSync { kNoSync, kSync };
commit	af366443b71c14f9c367f2db873dd10734088e69	[log] [tgz]
author	Danil Chapovalov <danilchap@webrtc.org>	Thu Apr 22 13:20:28 2021
committer	Commit Bot <commit-bot@chromium.org>	Mon Apr 26 17:49:59 2021
tree	3c71da71ece6c2994cf50203af6ca9fe3b717973
parent	a839cca2bc77a267007bde0f4d64179d407ab7e7 [diff]