Do not propagate generic descriptor on receiving frame

It was used only for the frame decryptor.
Decryptor needs only raw representation that it can recreate
in a way compatible with the new version of the descriptor.

This relands commit abf73de8eae90e9ac7e88ce1d52728e8102e824f.
with adjustments.

Change-Id: I935977179bef31d8e1023964b967658e9a7db92d
Bug: webrtc:10342
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/168489
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30532}
diff --git a/video/buffered_frame_decryptor.cc b/video/buffered_frame_decryptor.cc
index 90d14d3..ae83da9 100644
--- a/video/buffered_frame_decryptor.cc
+++ b/video/buffered_frame_decryptor.cc
@@ -11,7 +11,10 @@
 #include "video/buffered_frame_decryptor.h"
 
 #include <utility>
+#include <vector>
 
+#include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h"
+#include "modules/video_coding/frame_object.h"
 #include "rtc_base/logging.h"
 #include "system_wrappers/include/field_trial.h"
 
@@ -60,9 +63,7 @@
     return FrameDecision::kStash;
   }
   // When using encryption we expect the frame to have the generic descriptor.
-  absl::optional<RtpGenericFrameDescriptor> descriptor =
-      frame->GetGenericFrameDescriptor();
-  if (!descriptor) {
+  if (frame->GetRtpVideoHeader().generic == absl::nullopt) {
     RTC_LOG(LS_ERROR) << "No generic frame descriptor found dropping frame.";
     return FrameDecision::kDrop;
   }
@@ -76,9 +77,9 @@
                                                      max_plaintext_byte_size);
 
   // Only enable authenticating the header if the field trial is enabled.
-  rtc::ArrayView<const uint8_t> additional_data;
+  std::vector<uint8_t> additional_data;
   if (generic_descriptor_auth_experiment_) {
-    additional_data = descriptor->GetByteRepresentation();
+    additional_data = RtpDescriptorAuthentication(frame->GetRtpVideoHeader());
   }
 
   // Attempt to decrypt the video frame.
diff --git a/video/buffered_frame_decryptor_unittest.cc b/video/buffered_frame_decryptor_unittest.cc
index 1b21acf..bbc08b0 100644
--- a/video/buffered_frame_decryptor_unittest.cc
+++ b/video/buffered_frame_decryptor_unittest.cc
@@ -57,6 +57,8 @@
   std::unique_ptr<video_coding::RtpFrameObject> CreateRtpFrameObject(
       bool key_frame) {
     seq_num_++;
+    RTPVideoHeader rtp_video_header;
+    rtp_video_header.generic.emplace();
 
     // clang-format off
     return std::make_unique<video_coding::RtpFrameObject>(
@@ -73,9 +75,8 @@
         kVideoCodecGeneric,
         kVideoRotation_0,
         VideoContentType::UNSPECIFIED,
-        RTPVideoHeader(),
+        rtp_video_header,
         /*color_space=*/absl::nullopt,
-        RtpGenericFrameDescriptor(),
         RtpPacketInfos(),
         EncodedImageBuffer::Create(/*size=*/0));
     // clang-format on
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index 9f5fe02..cce557a 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -28,6 +28,7 @@
 #include "modules/rtp_rtcp/include/ulpfec_receiver.h"
 #include "modules/rtp_rtcp/source/create_video_rtp_depacketizer.h"
 #include "modules/rtp_rtcp/source/rtp_format.h"
+#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
@@ -367,51 +368,43 @@
   rtp_packet.GetExtension<PlayoutDelayLimits>(&video_header.playout_delay);
   rtp_packet.GetExtension<FrameMarkingExtension>(&video_header.frame_marking);
 
-  RtpGenericFrameDescriptor& generic_descriptor =
-      packet->generic_descriptor.emplace();
-  if (rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension01>(
-          &generic_descriptor)) {
-    if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>()) {
-      RTC_LOG(LS_WARNING) << "RTP packet had two different GFD versions.";
-      return;
-    }
-    generic_descriptor.SetByteRepresentation(
-        rtp_packet.GetRawExtension<RtpGenericFrameDescriptorExtension01>());
-  } else if ((rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
-                 &generic_descriptor))) {
-    generic_descriptor.SetByteRepresentation(
-        rtp_packet.GetRawExtension<RtpGenericFrameDescriptorExtension00>());
-  } else {
-    packet->generic_descriptor = absl::nullopt;
+  if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>() &&
+      rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension01>()) {
+    RTC_LOG(LS_WARNING) << "RTP packet had two different GFD versions.";
+    return;
   }
-  if (packet->generic_descriptor != absl::nullopt) {
-    video_header.is_first_packet_in_frame =
-        packet->generic_descriptor->FirstPacketInSubFrame();
-    video_header.is_last_packet_in_frame =
-        packet->generic_descriptor->LastPacketInSubFrame();
 
-    if (packet->generic_descriptor->FirstPacketInSubFrame()) {
+  RtpGenericFrameDescriptor generic_descriptor;
+  bool has_generic_descriptor =
+      rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension01>(
+          &generic_descriptor) ||
+      rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
+          &generic_descriptor);
+  if (has_generic_descriptor) {
+    video_header.is_first_packet_in_frame =
+        generic_descriptor.FirstPacketInSubFrame();
+    video_header.is_last_packet_in_frame =
+        generic_descriptor.LastPacketInSubFrame();
+
+    if (generic_descriptor.FirstPacketInSubFrame()) {
       video_header.frame_type =
-          packet->generic_descriptor->FrameDependenciesDiffs().empty()
+          generic_descriptor.FrameDependenciesDiffs().empty()
               ? VideoFrameType::kVideoFrameKey
               : VideoFrameType::kVideoFrameDelta;
 
       auto& descriptor = video_header.generic.emplace();
       int64_t frame_id =
-          frame_id_unwrapper_.Unwrap(packet->generic_descriptor->FrameId());
+          frame_id_unwrapper_.Unwrap(generic_descriptor.FrameId());
       descriptor.frame_id = frame_id;
-      descriptor.spatial_index = packet->generic_descriptor->SpatialLayer();
-      descriptor.temporal_index = packet->generic_descriptor->TemporalLayer();
-      descriptor.discardable =
-          packet->generic_descriptor->Discardable().value_or(false);
-      for (uint16_t fdiff :
-           packet->generic_descriptor->FrameDependenciesDiffs()) {
+      descriptor.spatial_index = generic_descriptor.SpatialLayer();
+      descriptor.temporal_index = generic_descriptor.TemporalLayer();
+      descriptor.discardable = generic_descriptor.Discardable().value_or(false);
+      for (uint16_t fdiff : generic_descriptor.FrameDependenciesDiffs()) {
         descriptor.dependencies.push_back(frame_id - fdiff);
       }
     }
-
-    video_header.width = packet->generic_descriptor->Width();
-    video_header.height = packet->generic_descriptor->Height();
+    video_header.width = generic_descriptor.Width();
+    video_header.height = generic_descriptor.Height();
   }
 
   // Color space should only be transmitted in the last packet of a frame,
@@ -435,7 +428,7 @@
       // TODO(bugs.webrtc.org/10336): Implement support for reordering.
       RTC_LOG(LS_INFO)
           << "LossNotificationController does not support reordering.";
-    } else if (!packet->generic_descriptor) {
+    } else if (!has_generic_descriptor) {
       RTC_LOG(LS_WARNING) << "LossNotificationController requires generic "
                              "frame descriptor, but it is missing.";
     } else {