Write and parse the generic video descriptor.

Bug: webrtc:9361
Change-Id: Id129a6ab7a86641c6e80827458ef0c40c5640855
Reviewed-on: https://webrtc-review.googlesource.com/99542
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24718}
diff --git a/modules/rtp_rtcp/source/rtp_sender_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
index f738a31..3c7a6d2 100644
--- a/modules/rtp_rtcp/source/rtp_sender_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
@@ -21,6 +21,8 @@
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/rtp_rtcp/source/rtcp_packet/transport_feedback.h"
 #include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
@@ -59,8 +61,10 @@
 const size_t kGenericHeaderLength = 1;
 const uint8_t kPayloadData[] = {47, 11, 32, 93, 89};
 const int64_t kDefaultExpectedRetransmissionTimeMs = 125;
+const int kGenericDescriptorId = 10;
 
 using ::testing::_;
+using ::testing::ElementsAre;
 using ::testing::ElementsAreArray;
 using ::testing::Invoke;
 
@@ -84,6 +88,8 @@
     receivers_extensions_.Register(kRtpExtensionVideoTiming,
                                    kVideoTimingExtensionId);
     receivers_extensions_.Register(kRtpExtensionMid, kMidExtensionId);
+    receivers_extensions_.Register(kRtpExtensionGenericFrameDescriptor,
+                                   kGenericDescriptorId);
   }
 
   bool SendRtp(const uint8_t* data,
@@ -2016,6 +2022,36 @@
             rtp_sender_video_->GetStorageType(header, kSettings, kRttMs));
 }
 
+TEST_P(RtpSenderVideoTest, PopulateGenericFrameDescriptor) {
+  const int64_t kFrameId = 100000;
+  uint8_t kFrame[100];
+  EXPECT_EQ(0, rtp_sender_->RegisterRtpHeaderExtension(
+                   kRtpExtensionGenericFrameDescriptor, kGenericDescriptorId));
+
+  RTPVideoHeader hdr;
+  RTPVideoHeader::GenericDescriptorInfo& generic = hdr.generic.emplace();
+  generic.frame_id = kFrameId;
+  generic.temporal_index = 3;
+  generic.spatial_index = 2;
+  generic.higher_spatial_layers.push_back(4);
+  generic.dependencies.push_back(kFrameId - 1);
+  generic.dependencies.push_back(kFrameId - 500);
+  rtp_sender_video_->SendVideo(kVideoCodecGeneric, kVideoFrameDelta, kPayload,
+                               kTimestamp, 0, kFrame, sizeof(kFrame), nullptr,
+                               &hdr, kDefaultExpectedRetransmissionTimeMs);
+
+  RtpGenericFrameDescriptor descriptor_wire;
+  EXPECT_EQ(1U, transport_.sent_packets_.size());
+  EXPECT_TRUE(
+      transport_.last_sent_packet()
+          .GetExtension<RtpGenericFrameDescriptorExtension>(&descriptor_wire));
+  EXPECT_EQ(static_cast<uint16_t>(generic.frame_id), descriptor_wire.FrameId());
+  EXPECT_EQ(generic.temporal_index, descriptor_wire.TemporalLayer());
+  EXPECT_THAT(descriptor_wire.FrameDependenciesDiffs(), ElementsAre(1, 500));
+  uint8_t spatial_bitmask = 0x14;
+  EXPECT_EQ(spatial_bitmask, descriptor_wire.SpatialLayersBitmask());
+}
+
 TEST_P(RtpSenderTest, OnOverheadChanged) {
   MockOverheadObserver mock_overhead_observer;
   rtp_sender_.reset(new RTPSender(
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index 90ff013..8f69dda 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -24,6 +24,7 @@
 #include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp8.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
 #include "rtc_base/checks.h"
@@ -65,6 +66,35 @@
   if (last_packet &&
       video_header.video_timing.flags != VideoSendTiming::kInvalid)
     packet->SetExtension<VideoTimingExtension>(video_header.video_timing);
+
+  if (video_header.generic) {
+    RtpGenericFrameDescriptor generic_descriptor;
+    generic_descriptor.SetFirstPacketInSubFrame(first_packet);
+    generic_descriptor.SetLastPacketInSubFrame(last_packet);
+    generic_descriptor.SetFirstSubFrameInFrame(true);
+    generic_descriptor.SetLastSubFrameInFrame(true);
+
+    if (first_packet) {
+      generic_descriptor.SetFrameId(
+          static_cast<uint16_t>(video_header.generic->frame_id));
+      for (int64_t dep : video_header.generic->dependencies) {
+        generic_descriptor.AddFrameDependencyDiff(
+            video_header.generic->frame_id - dep);
+      }
+
+      uint8_t spatial_bimask = 1 << video_header.generic->spatial_index;
+      for (int layer : video_header.generic->higher_spatial_layers) {
+        RTC_DCHECK_GT(layer, video_header.generic->spatial_index);
+        RTC_DCHECK_LT(layer, 8);
+        spatial_bimask |= 1 << layer;
+      }
+      generic_descriptor.SetSpatialLayersBitmask(spatial_bimask);
+
+      generic_descriptor.SetTemporalLayer(video_header.generic->temporal_index);
+    }
+    packet->SetExtension<RtpGenericFrameDescriptorExtension>(
+        generic_descriptor);
+  }
 }
 
 }  // namespace
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index 870183a..098f23d 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -25,6 +25,7 @@
 #include "modules/rtp_rtcp/include/rtp_rtcp.h"
 #include "modules/rtp_rtcp/include/ulpfec_receiver.h"
 #include "modules/rtp_rtcp/source/rtp_format.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
 #include "modules/rtp_rtcp/source/rtp_rtcp_config.h"
@@ -454,10 +455,8 @@
   webrtc_rtp_header.video_header().playout_delay.min_ms = -1;
   webrtc_rtp_header.video_header().playout_delay.max_ms = -1;
 
-  // Retrieve the video rotation information.
   packet.GetExtension<VideoOrientation>(
       &webrtc_rtp_header.video_header().rotation);
-
   packet.GetExtension<VideoContentTypeExtension>(
       &webrtc_rtp_header.video_header().content_type);
   packet.GetExtension<VideoTimingExtension>(
@@ -465,6 +464,40 @@
   packet.GetExtension<PlayoutDelayLimits>(
       &webrtc_rtp_header.video_header().playout_delay);
 
+  RtpGenericFrameDescriptor generic_descriptor_wire;
+  if (packet.GetExtension<RtpGenericFrameDescriptorExtension>(
+          &generic_descriptor_wire)) {
+    webrtc_rtp_header.video_header().is_first_packet_in_frame =
+        generic_descriptor_wire.FirstSubFrameInFrame() &&
+        generic_descriptor_wire.FirstPacketInSubFrame();
+    // TODO(philipel): Add is_last_packet_in_frame to the RtpVideoHeader and use
+    //                 the information from the generic descriptor to set it.
+
+    // For now we store the diffs in |generic_descirptor.dependencies|. They
+    // are later recaculated when the frame id is unwrapped.
+    // TODO(philipel): Remove RTPVideoHeader::GenericDescriptorInfo and use
+    //                 RtpGenericFrameDescriptor instead.
+    RTPVideoHeader::GenericDescriptorInfo& generic_descriptor =
+        webrtc_rtp_header.video_header().generic.emplace();
+    if (generic_descriptor_wire.FirstPacketInSubFrame()) {
+      generic_descriptor.frame_id = generic_descriptor_wire.FrameId();
+      for (uint16_t diff : generic_descriptor_wire.FrameDependenciesDiffs()) {
+        generic_descriptor.dependencies.push_back(diff);
+      }
+
+      generic_descriptor.temporal_index =
+          generic_descriptor_wire.TemporalLayer();
+      uint8_t spatial_bitmask = generic_descriptor_wire.SpatialLayersBitmask();
+      while (spatial_bitmask && !(spatial_bitmask & 1)) {
+        spatial_bitmask >>= 1;
+        ++generic_descriptor.spatial_index;
+      }
+
+      // Since the receiver doesn't care knowing about higher spatial layer
+      // frames that depend on this frame we don't parse it.
+    }
+  }
+
   OnReceivedPayloadData(parsed_payload.payload, parsed_payload.payload_length,
                         &webrtc_rtp_header);
 }