Width and Height was not associated and provided to decoder for H264 streams which have Nalus before SPS

Summary:
There is an issue with WebRTC for handling of certain H.264 bitstreams where the packets forming the H.264 stream has non-zero packets before the packet containing SPS.

Typically a IDR (key frame) will have SPS/PPS (if present) or the IDR slice in the first packet.
But this is not required in all cases, for example when packetization-mode = 0, you can have each NALU in separate packet. And certain NALUs can exist before SPS, for example SEI, AUD.

The way WebRTC associates width/height to encoded frames is by tracking the dependency of IDR slices to SPS/PPS.
RTP packets containing SPS/PPS have correct width/height stored in them during parsing of SPS in RtpDepacketizerH264::ProcessStapAOrSingleNalu
IDR packets refer to SPS using ppsid, spsid and the width/height fields get transferred from packet containing SPS to IDR packet in H264SpsPpsTracker::CopyAndFixBitstream.

When packets are assembled into a single encoded H264 frame in PacketBuffer::FindFrames, the loop goes through all the packets/nalus in backward scan from last RTP packet of IDR to first one.
Hence the order of NALUs during this scan is : Last parts of IDR Slice -> Mid parts of IDR Slice RTP packet -> first IDR slice Packet (this should have correct width / height) -> RTP packet containing SPS/PPS (this should have correct width/height)
start_index points to the first RTP packet of the frame and its passed into RtpFrameObject's constructor. RtpFrameObject will use the width/height stored in first RTP packet.

This works fine as long as the first RTP packet has width/height, which will be the case if first RTP packet is IDR or SPS.
In H.264 first RTP packet may be AUD, SEI in those cases, RtpFrameObject will create IDR with width/height = 0 and this causes problem for Android hardware decoders.
On Android hardware decoders rely on correct width/height to initialize the hardware decoder.

Verified on real scenario that we have.
Simulated on AppRTCMobile on IOS Simulator
Added unit tests : ninja -C out/Default && ./out/Default/modules_unittests --gtest_filter=*FrameResolution*

Bug: webrtc:11025
Change-Id: Ie2273aae5e81fd62497e1add084876a3aa05af4d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/156260
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Commit-Queue: Shyam Sadhwani <shyamsadhwani@fb.com>
Cr-Commit-Position: refs/heads/master@{#29515}
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index 7f0266d..b5aeb04 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -313,7 +313,8 @@
       bool has_h264_pps = false;
       bool has_h264_idr = false;
       bool is_h264_keyframe = false;
-
+      int idr_width = -1;
+      int idr_height = -1;
       while (true) {
         ++tested_packets;
         frame_size += data_buffer_[start_index].sizeBytes;
@@ -355,6 +356,15 @@
                has_h264_pps) ||
               (!sps_pps_idr_is_h264_keyframe_ && has_h264_idr)) {
             is_h264_keyframe = true;
+            // Store the resolution of key frame which is the packet with
+            // smallest index and valid resolution; typically its IDR or SPS
+            // packet; there may be packet preceeding this packet, IDR's
+            // resolution will be applied to them.
+            if (data_buffer_[start_index].width() > 0 &&
+                data_buffer_[start_index].height() > 0) {
+              idr_width = data_buffer_[start_index].width();
+              idr_height = data_buffer_[start_index].height();
+            }
           }
         }
 
@@ -401,6 +411,12 @@
         if (is_h264_keyframe) {
           data_buffer_[first_packet_index].video_header.frame_type =
               VideoFrameType::kVideoFrameKey;
+          if (idr_width > 0 && idr_height > 0) {
+            // IDR frame was finalized and we have the correct resolution for
+            // IDR; update first packet to have same resolution as IDR.
+            data_buffer_[first_packet_index].video_header.width = idr_width;
+            data_buffer_[first_packet_index].video_header.height = idr_height;
+          }
         } else {
           data_buffer_[first_packet_index].video_header.frame_type =
               VideoFrameType::kVideoFrameDelta;
diff --git a/modules/video_coding/video_packet_buffer_unittest.cc b/modules/video_coding/video_packet_buffer_unittest.cc
index efe2ecc..67f2806 100644
--- a/modules/video_coding/video_packet_buffer_unittest.cc
+++ b/modules/video_coding/video_packet_buffer_unittest.cc
@@ -547,13 +547,15 @@
                              : ""),
         sps_pps_idr_is_keyframe_(sps_pps_idr_is_keyframe) {}
 
-  bool InsertH264(uint16_t seq_num,           // packet sequence number
-                  IsKeyFrame keyframe,        // is keyframe
-                  IsFirst first,              // is first packet of frame
-                  IsLast last,                // is last packet of frame
-                  uint32_t timestamp,         // rtp timestamp
-                  int data_size = 0,          // size of data
-                  uint8_t* data = nullptr) {  // data pointer
+  bool InsertH264(uint16_t seq_num,         // packet sequence number
+                  IsKeyFrame keyframe,      // is keyframe
+                  IsFirst first,            // is first packet of frame
+                  IsLast last,              // is last packet of frame
+                  uint32_t timestamp,       // rtp timestamp
+                  int data_size = 0,        // size of data
+                  uint8_t* data = nullptr,  // data pointer
+                  uint32_t width = 0,       // width of frame (SPS/IDR)
+                  uint32_t height = 0) {    // height of frame (SPS/IDR)
     VCMPacket packet;
     packet.video_header.codec = kVideoCodecH264;
     auto& h264_header =
@@ -571,6 +573,8 @@
         h264_header.nalus_length = 1;
       }
     }
+    packet.video_header.width = width;
+    packet.video_header.height = height;
     packet.video_header.is_first_packet_in_frame = first == kFirst;
     packet.video_header.is_last_packet_in_frame = last == kLast;
     packet.sizeBytes = data_size;
@@ -579,6 +583,43 @@
     return packet_buffer_.InsertPacket(&packet);
   }
 
+  bool InsertH264KeyFrameWithAud(
+      uint16_t seq_num,         // packet sequence number
+      IsKeyFrame keyframe,      // is keyframe
+      IsFirst first,            // is first packet of frame
+      IsLast last,              // is last packet of frame
+      uint32_t timestamp,       // rtp timestamp
+      int data_size = 0,        // size of data
+      uint8_t* data = nullptr,  // data pointer
+      uint32_t width = 0,       // width of frame (SPS/IDR)
+      uint32_t height = 0) {    // height of frame (SPS/IDR)
+    VCMPacket packet;
+    packet.video_header.codec = kVideoCodecH264;
+    auto& h264_header =
+        packet.video_header.video_type_header.emplace<RTPVideoHeaderH264>();
+    packet.seqNum = seq_num;
+    packet.timestamp = timestamp;
+
+    // this should be the start of frame
+    if (kFirst != first) {
+      return false;
+    }
+
+    // Insert a AUD NALU / packet without width/height.
+    h264_header.nalus[0].type = H264::NaluType::kAud;
+    h264_header.nalus_length = 1;
+    packet.video_header.is_first_packet_in_frame = true;
+    packet.video_header.is_last_packet_in_frame = false;
+    packet.sizeBytes = 0;
+    packet.dataPtr = nullptr;
+    if (packet_buffer_.InsertPacket(&packet)) {
+      // insert IDR
+      return InsertH264(seq_num + 1, keyframe, kNotFirst, last, timestamp,
+                        data_size, data, width, height);
+    }
+    return false;
+  }
+
   const bool sps_pps_idr_is_keyframe_;
 };
 
@@ -660,6 +701,61 @@
             0);
 }
 
+TEST_P(TestPacketBufferH264Parameterized, FrameResolution) {
+  uint16_t seq_num = 100;
+  uint8_t data_data[] = "some plain old data";
+  uint8_t* data = new uint8_t[sizeof(data_data)];
+  memcpy(data, data_data, sizeof(data_data));
+  uint32_t width = 640;
+  uint32_t height = 360;
+  uint32_t timestamp = 1000;
+
+  EXPECT_TRUE(InsertH264(seq_num, kKeyFrame, kFirst, kLast, timestamp,
+                         sizeof(data_data), data, width, height));
+
+  ASSERT_EQ(1UL, frames_from_callback_.size());
+  EXPECT_EQ(frames_from_callback_[seq_num]->EncodedImage().size(),
+            sizeof(data_data));
+  EXPECT_EQ(frames_from_callback_[seq_num]->EncodedImage().capacity(),
+            sizeof(data_data));
+  EXPECT_EQ(width,
+            frames_from_callback_[seq_num]->EncodedImage()._encodedWidth);
+  EXPECT_EQ(height,
+            frames_from_callback_[seq_num]->EncodedImage()._encodedHeight);
+  EXPECT_EQ(memcmp(frames_from_callback_[seq_num]->data(), data_data,
+                   sizeof(data_data)),
+            0);
+}
+
+TEST_P(TestPacketBufferH264Parameterized, FrameResolutionNaluBeforeSPS) {
+  uint16_t seq_num = 100;
+  uint8_t data_data[] = "some plain old data";
+  uint8_t* data = new uint8_t[sizeof(data_data)];
+  memcpy(data, data_data, sizeof(data_data));
+  uint32_t width = 640;
+  uint32_t height = 360;
+  uint32_t timestamp = 1000;
+
+  EXPECT_TRUE(InsertH264KeyFrameWithAud(seq_num, kKeyFrame, kFirst, kLast,
+                                        timestamp, sizeof(data_data), data,
+                                        width, height));
+
+  CheckFrame(seq_num);
+  ASSERT_EQ(1UL, frames_from_callback_.size());
+  EXPECT_EQ(frames_from_callback_[seq_num]->EncodedImage().size(),
+            sizeof(data_data));
+  EXPECT_EQ(frames_from_callback_[seq_num]->EncodedImage().capacity(),
+            sizeof(data_data));
+  EXPECT_EQ(width,
+            frames_from_callback_[seq_num]->EncodedImage()._encodedWidth);
+  EXPECT_EQ(height,
+            frames_from_callback_[seq_num]->EncodedImage()._encodedHeight);
+
+  EXPECT_EQ(memcmp(frames_from_callback_[seq_num]->data(), data_data,
+                   sizeof(data_data)),
+            0);
+}
+
 TEST_F(TestPacketBuffer, FreeSlotsOnFrameCreation) {
   const uint16_t seq_num = Rand();