Ignore RTPFragmentationHeader when rewriting H264 SPS

RTPFragmentationHeader is already ignored by H264 packetizer
and thus doesn't need to be provided and calculated.

Bug: webrtc:6471
Change-Id: I45bc22827f0dc811457e3ebe477a16293501c2fb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/179843
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Marco Paniconi <marpan@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31791}
diff --git a/common_video/h264/sps_vui_rewriter.cc b/common_video/h264/sps_vui_rewriter.cc
index 1c420a9..3fc794f 100644
--- a/common_video/h264/sps_vui_rewriter.cc
+++ b/common_video/h264/sps_vui_rewriter.cc
@@ -210,32 +210,25 @@
   return result;
 }
 
-void SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
+rtc::Buffer SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
     rtc::ArrayView<const uint8_t> buffer,
-    size_t num_nalus,
-    const size_t* nalu_offsets,
-    const size_t* nalu_lengths,
-    const webrtc::ColorSpace* color_space,
-    rtc::Buffer* output_buffer,
-    size_t* output_nalu_offsets,
-    size_t* output_nalu_lengths) {
+    const webrtc::ColorSpace* color_space) {
+  std::vector<H264::NaluIndex> nalus =
+      H264::FindNaluIndices(buffer.data(), buffer.size());
+
   // Allocate some extra space for potentially adding a missing VUI.
-  output_buffer->EnsureCapacity(buffer.size() + num_nalus * kMaxVuiSpsIncrease);
+  rtc::Buffer output_buffer(/*size=*/0, /*capacity=*/buffer.size() +
+                                            nalus.size() * kMaxVuiSpsIncrease);
 
-  const uint8_t* prev_nalu_ptr = buffer.data();
-  size_t prev_nalu_length = 0;
-
-  for (size_t i = 0; i < num_nalus; ++i) {
-    const uint8_t* nalu_ptr = buffer.data() + nalu_offsets[i];
-    const size_t nalu_length = nalu_lengths[i];
-
+  for (const H264::NaluIndex& nalu : nalus) {
     // Copy NAL unit start code.
-    const uint8_t* start_code_ptr = prev_nalu_ptr + prev_nalu_length;
+    const uint8_t* start_code_ptr = buffer.data() + nalu.start_offset;
     const size_t start_code_length =
-        (nalu_ptr - prev_nalu_ptr) - prev_nalu_length;
-    output_buffer->AppendData(start_code_ptr, start_code_length);
+        nalu.payload_start_offset - nalu.start_offset;
+    output_buffer.AppendData(start_code_ptr, start_code_length);
 
-    bool updated_sps = false;
+    const uint8_t* nalu_ptr = buffer.data() + nalu.payload_start_offset;
+    const size_t nalu_length = nalu.payload_size;
 
     if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kSps) {
       // Check if stream uses picture order count type 0, and if so rewrite it
@@ -260,22 +253,15 @@
           nalu_ptr + H264::kNaluTypeSize, nalu_length - H264::kNaluTypeSize,
           &sps, color_space, &output_nalu, Direction::kOutgoing);
       if (result == ParseResult::kVuiRewritten) {
-        updated_sps = true;
-        output_nalu_offsets[i] = output_buffer->size();
-        output_nalu_lengths[i] = output_nalu.size();
-        output_buffer->AppendData(output_nalu.data(), output_nalu.size());
+        output_buffer.AppendData(output_nalu.data(), output_nalu.size());
+        continue;
       }
     }
 
-    if (!updated_sps) {
-      output_nalu_offsets[i] = output_buffer->size();
-      output_nalu_lengths[i] = nalu_length;
-      output_buffer->AppendData(nalu_ptr, nalu_length);
-    }
-
-    prev_nalu_ptr = nalu_ptr;
-    prev_nalu_length = nalu_length;
+    // vui wasn't rewritten, copy the nal unit as is.
+    output_buffer.AppendData(nalu_ptr, nalu_length);
   }
+  return output_buffer;
 }
 
 namespace {
diff --git a/common_video/h264/sps_vui_rewriter.h b/common_video/h264/sps_vui_rewriter.h
index 4cd4cb9..9e79c3f 100644
--- a/common_video/h264/sps_vui_rewriter.h
+++ b/common_video/h264/sps_vui_rewriter.h
@@ -50,20 +50,10 @@
       rtc::Buffer* destination,
       Direction Direction);
 
-  // Parses NAL units from |buffer| based on |nalu_offsets| and |nalu_lengths|
-  // and rewrites VUI in SPS blocks if necessary.
-  // The result is written to |output_buffer| and modified NAL unit offsets
-  // and lenghts are written to |output_nalu_offsets| and |output_nalu_lenghts|
-  // to account for any added data.
-  static void ParseOutgoingBitstreamAndRewriteSps(
+  // Parses NAL units from |buffer| and rewrites VUI in SPS blocks if necessary.
+  static rtc::Buffer ParseOutgoingBitstreamAndRewriteSps(
       rtc::ArrayView<const uint8_t> buffer,
-      size_t num_nalus,
-      const size_t* nalu_offsets,
-      const size_t* nalu_lengths,
-      const ColorSpace* color_space,
-      rtc::Buffer* output_buffer,
-      size_t* output_nalu_offsets,
-      size_t* output_nalu_lengths);
+      const ColorSpace* color_space);
 
  private:
   static ParseResult ParseAndRewriteSps(
diff --git a/common_video/h264/sps_vui_rewriter_unittest.cc b/common_video/h264/sps_vui_rewriter_unittest.cc
index e008948..9a9cce3 100644
--- a/common_video/h264/sps_vui_rewriter_unittest.cc
+++ b/common_video/h264/sps_vui_rewriter_unittest.cc
@@ -396,36 +396,14 @@
   GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps);
 
   rtc::Buffer buffer;
-  const size_t kNumNalus = 2;
-  size_t nalu_offsets[kNumNalus];
-  size_t nalu_lengths[kNumNalus];
   buffer.AppendData(kStartSequence);
-  nalu_offsets[0] = buffer.size();
-  nalu_lengths[0] = optimal_sps.size();
   buffer.AppendData(optimal_sps);
   buffer.AppendData(kStartSequence);
-  nalu_offsets[1] = buffer.size();
-  nalu_lengths[1] = sizeof(kIdr1);
   buffer.AppendData(kIdr1);
 
-  rtc::Buffer modified_buffer;
-  size_t modified_nalu_offsets[kNumNalus];
-  size_t modified_nalu_lengths[kNumNalus];
-
-  SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
-      buffer, kNumNalus, nalu_offsets, nalu_lengths, nullptr, &modified_buffer,
-      modified_nalu_offsets, modified_nalu_lengths);
-
   EXPECT_THAT(
-      std::vector<uint8_t>(modified_buffer.data(),
-                           modified_buffer.data() + modified_buffer.size()),
-      ::testing::ElementsAreArray(buffer.data(), buffer.size()));
-  EXPECT_THAT(std::vector<size_t>(modified_nalu_offsets,
-                                  modified_nalu_offsets + kNumNalus),
-              ::testing::ElementsAreArray(nalu_offsets, kNumNalus));
-  EXPECT_THAT(std::vector<size_t>(modified_nalu_lengths,
-                                  modified_nalu_lengths + kNumNalus),
-              ::testing::ElementsAreArray(nalu_lengths, kNumNalus));
+      SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(buffer, nullptr),
+      ::testing::ElementsAreArray(buffer));
 }
 
 TEST(SpsVuiRewriterOutgoingVuiTest, ParseOutgoingBitstreamNoVui) {
@@ -435,61 +413,28 @@
   GenerateFakeSps(kVuiNotPresent, &sps);
 
   rtc::Buffer buffer;
-  const size_t kNumNalus = 3;
-  size_t nalu_offsets[kNumNalus];
-  size_t nalu_lengths[kNumNalus];
   buffer.AppendData(kStartSequence);
-  nalu_offsets[0] = buffer.size();
-  nalu_lengths[0] = sizeof(kIdr1);
   buffer.AppendData(kIdr1);
   buffer.AppendData(kStartSequence);
-  nalu_offsets[1] = buffer.size();
-  nalu_lengths[1] = sizeof(kSpsNaluType) + sps.size();
   buffer.AppendData(kSpsNaluType);
   buffer.AppendData(sps);
   buffer.AppendData(kStartSequence);
-  nalu_offsets[2] = buffer.size();
-  nalu_lengths[2] = sizeof(kIdr2);
   buffer.AppendData(kIdr2);
 
   rtc::Buffer optimal_sps;
   GenerateFakeSps(kVuiNoFrameBuffering, &optimal_sps);
 
   rtc::Buffer expected_buffer;
-  size_t expected_nalu_offsets[kNumNalus];
-  size_t expected_nalu_lengths[kNumNalus];
   expected_buffer.AppendData(kStartSequence);
-  expected_nalu_offsets[0] = expected_buffer.size();
-  expected_nalu_lengths[0] = sizeof(kIdr1);
   expected_buffer.AppendData(kIdr1);
   expected_buffer.AppendData(kStartSequence);
-  expected_nalu_offsets[1] = expected_buffer.size();
-  expected_nalu_lengths[1] = sizeof(kSpsNaluType) + optimal_sps.size();
   expected_buffer.AppendData(kSpsNaluType);
   expected_buffer.AppendData(optimal_sps);
   expected_buffer.AppendData(kStartSequence);
-  expected_nalu_offsets[2] = expected_buffer.size();
-  expected_nalu_lengths[2] = sizeof(kIdr2);
   expected_buffer.AppendData(kIdr2);
 
-  rtc::Buffer modified_buffer;
-  size_t modified_nalu_offsets[kNumNalus];
-  size_t modified_nalu_lengths[kNumNalus];
-
-  SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
-      buffer, kNumNalus, nalu_offsets, nalu_lengths, nullptr, &modified_buffer,
-      modified_nalu_offsets, modified_nalu_lengths);
-
   EXPECT_THAT(
-      std::vector<uint8_t>(modified_buffer.data(),
-                           modified_buffer.data() + modified_buffer.size()),
-      ::testing::ElementsAreArray(expected_buffer.data(),
-                                  expected_buffer.size()));
-  EXPECT_THAT(std::vector<size_t>(modified_nalu_offsets,
-                                  modified_nalu_offsets + kNumNalus),
-              ::testing::ElementsAreArray(expected_nalu_offsets, kNumNalus));
-  EXPECT_THAT(std::vector<size_t>(modified_nalu_lengths,
-                                  modified_nalu_lengths + kNumNalus),
-              ::testing::ElementsAreArray(expected_nalu_lengths, kNumNalus));
+      SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(buffer, nullptr),
+      ::testing::ElementsAreArray(expected_buffer));
 }
 }  // namespace webrtc
diff --git a/video/frame_encode_metadata_writer.cc b/video/frame_encode_metadata_writer.cc
index 8ffb3ae..b30eeea 100644
--- a/video/frame_encode_metadata_writer.cc
+++ b/video/frame_encode_metadata_writer.cc
@@ -11,7 +11,6 @@
 #include "video/frame_encode_metadata_writer.h"
 
 #include <algorithm>
-#include <memory>
 #include <utility>
 
 #include "common_video/h264/sps_vui_rewriter.h"
@@ -202,36 +201,24 @@
   }
 }
 
-std::unique_ptr<RTPFragmentationHeader>
-FrameEncodeMetadataWriter::UpdateBitstream(
+void FrameEncodeMetadataWriter::UpdateBitstream(
     const CodecSpecificInfo* codec_specific_info,
-    const RTPFragmentationHeader* fragmentation,
     EncodedImage* encoded_image) {
   if (!codec_specific_info ||
-      codec_specific_info->codecType != kVideoCodecH264 || !fragmentation ||
+      codec_specific_info->codecType != kVideoCodecH264 ||
       encoded_image->_frameType != VideoFrameType::kVideoFrameKey) {
-    return nullptr;
+    return;
   }
 
-  rtc::Buffer modified_buffer;
-  std::unique_ptr<RTPFragmentationHeader> modified_fragmentation =
-      std::make_unique<RTPFragmentationHeader>();
-  modified_fragmentation->CopyFrom(*fragmentation);
-
   // Make sure that the data is not copied if owned by EncodedImage.
   const EncodedImage& buffer = *encoded_image;
-  SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
-      buffer, fragmentation->fragmentationVectorSize,
-      fragmentation->fragmentationOffset, fragmentation->fragmentationLength,
-      encoded_image->ColorSpace(), &modified_buffer,
-      modified_fragmentation->fragmentationOffset,
-      modified_fragmentation->fragmentationLength);
+  rtc::Buffer modified_buffer =
+      SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
+          buffer, encoded_image->ColorSpace());
 
   encoded_image->SetEncodedData(
       new rtc::RefCountedObject<EncodedImageBufferWrapper>(
           std::move(modified_buffer)));
-
-  return modified_fragmentation;
 }
 
 void FrameEncodeMetadataWriter::Reset() {
diff --git a/video/frame_encode_metadata_writer.h b/video/frame_encode_metadata_writer.h
index 32b5872..8847145 100644
--- a/video/frame_encode_metadata_writer.h
+++ b/video/frame_encode_metadata_writer.h
@@ -12,7 +12,6 @@
 #define VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
 
 #include <list>
-#include <memory>
 #include <vector>
 
 #include "absl/types/optional.h"
@@ -37,10 +36,8 @@
 
   void FillTimingInfo(size_t simulcast_svc_idx, EncodedImage* encoded_image);
 
-  std::unique_ptr<RTPFragmentationHeader> UpdateBitstream(
-      const CodecSpecificInfo* codec_specific_info,
-      const RTPFragmentationHeader* fragmentation,
-      EncodedImage* encoded_image);
+  void UpdateBitstream(const CodecSpecificInfo* codec_specific_info,
+                       EncodedImage* encoded_image);
 
   void Reset();
 
diff --git a/video/frame_encode_metadata_writer_unittest.cc b/video/frame_encode_metadata_writer_unittest.cc
index 2f74599..936ebc4 100644
--- a/video/frame_encode_metadata_writer_unittest.cc
+++ b/video/frame_encode_metadata_writer_unittest.cc
@@ -463,13 +463,10 @@
 TEST(FrameEncodeMetadataWriterTest, DoesNotRewriteBitstreamWithoutCodecInfo) {
   uint8_t buffer[] = {1, 2, 3};
   EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
-  const RTPFragmentationHeader fragmentation;
 
   FakeEncodedImageCallback sink;
   FrameEncodeMetadataWriter encode_metadata_writer(&sink);
-  EXPECT_EQ(
-      encode_metadata_writer.UpdateBitstream(nullptr, &fragmentation, &image),
-      nullptr);
+  encode_metadata_writer.UpdateBitstream(nullptr, &image);
   EXPECT_EQ(image.data(), buffer);
   EXPECT_EQ(image.size(), sizeof(buffer));
 }
@@ -479,29 +476,10 @@
   EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
   CodecSpecificInfo codec_specific_info;
   codec_specific_info.codecType = kVideoCodecVP8;
-  const RTPFragmentationHeader fragmentation;
 
   FakeEncodedImageCallback sink;
   FrameEncodeMetadataWriter encode_metadata_writer(&sink);
-  EXPECT_EQ(encode_metadata_writer.UpdateBitstream(&codec_specific_info,
-                                                   &fragmentation, &image),
-            nullptr);
-  EXPECT_EQ(image.data(), buffer);
-  EXPECT_EQ(image.size(), sizeof(buffer));
-}
-
-TEST(FrameEncodeMetadataWriterTest,
-     DoesNotRewriteH264BitstreamWithoutFragmentation) {
-  uint8_t buffer[] = {1, 2, 3};
-  EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
-  CodecSpecificInfo codec_specific_info;
-  codec_specific_info.codecType = kVideoCodecH264;
-
-  FakeEncodedImageCallback sink;
-  FrameEncodeMetadataWriter encode_metadata_writer(&sink);
-  EXPECT_EQ(encode_metadata_writer.UpdateBitstream(&codec_specific_info,
-                                                   nullptr, &image),
-            nullptr);
+  encode_metadata_writer.UpdateBitstream(&codec_specific_info, &image);
   EXPECT_EQ(image.data(), buffer);
   EXPECT_EQ(image.size(), sizeof(buffer));
 }
@@ -521,24 +499,12 @@
   CodecSpecificInfo codec_specific_info;
   codec_specific_info.codecType = kVideoCodecH264;
 
-  RTPFragmentationHeader fragmentation;
-  fragmentation.VerifyAndAllocateFragmentationHeader(1);
-  fragmentation.fragmentationOffset[0] = 4;
-  fragmentation.fragmentationLength[0] = sizeof(original_sps) - 4;
-
   FakeEncodedImageCallback sink;
   FrameEncodeMetadataWriter encode_metadata_writer(&sink);
-  std::unique_ptr<RTPFragmentationHeader> modified_fragmentation =
-      encode_metadata_writer.UpdateBitstream(&codec_specific_info,
-                                             &fragmentation, &image);
+  encode_metadata_writer.UpdateBitstream(&codec_specific_info, &image);
 
-  ASSERT_NE(modified_fragmentation, nullptr);
   EXPECT_THAT(std::vector<uint8_t>(image.data(), image.data() + image.size()),
               testing::ElementsAreArray(kRewrittenSps));
-  ASSERT_THAT(modified_fragmentation->fragmentationVectorSize, 1U);
-  EXPECT_EQ(modified_fragmentation->fragmentationOffset[0], 4U);
-  EXPECT_EQ(modified_fragmentation->fragmentationLength[0],
-            sizeof(kRewrittenSps) - 4);
 }
 
 }  // namespace test
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index c0fcd1f..80b7963 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -1537,7 +1537,7 @@
 EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage(
     const EncodedImage& encoded_image,
     const CodecSpecificInfo* codec_specific_info,
-    const RTPFragmentationHeader* fragmentation) {
+    const RTPFragmentationHeader* /*fragmentation*/) {
   TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
                        "timestamp", encoded_image.Timestamp());
   const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
@@ -1545,9 +1545,8 @@
 
   frame_encode_metadata_writer_.FillTimingInfo(spatial_idx, &image_copy);
 
-  std::unique_ptr<RTPFragmentationHeader> fragmentation_copy =
-      frame_encode_metadata_writer_.UpdateBitstream(codec_specific_info,
-                                                    fragmentation, &image_copy);
+  frame_encode_metadata_writer_.UpdateBitstream(codec_specific_info,
+                                                &image_copy);
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   const uint8_t experiment_id =
@@ -1613,9 +1612,8 @@
     simulcast_id = encoded_image.SpatialIndex().value_or(0);
   }
 
-  EncodedImageCallback::Result result = sink_->OnEncodedImage(
-      image_copy, codec_specific_info,
-      fragmentation_copy ? fragmentation_copy.get() : fragmentation);
+  EncodedImageCallback::Result result =
+      sink_->OnEncodedImage(image_copy, codec_specific_info, nullptr);
 
   // We are only interested in propagating the meta-data about the image, not
   // encoded data itself, to the post encode function. Since we cannot be sure
diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc
index 46f218e..b651c7b 100644
--- a/video/video_stream_encoder_unittest.cc
+++ b/video/video_stream_encoder_unittest.cc
@@ -1217,23 +1217,15 @@
       return std::move(last_encoded_image_data_);
     }
 
-    RTPFragmentationHeader GetLastFragmentation() {
-      MutexLock lock(&mutex_);
-      return std::move(last_fragmentation_);
-    }
-
    private:
     Result OnEncodedImage(
         const EncodedImage& encoded_image,
         const CodecSpecificInfo* codec_specific_info,
-        const RTPFragmentationHeader* fragmentation) override {
+        const RTPFragmentationHeader* /*fragmentation*/) override {
       MutexLock lock(&mutex_);
       EXPECT_TRUE(expect_frames_);
       last_encoded_image_data_ = std::vector<uint8_t>(
           encoded_image.data(), encoded_image.data() + encoded_image.size());
-      if (fragmentation) {
-        last_fragmentation_.CopyFrom(*fragmentation);
-      }
       uint32_t timestamp = encoded_image.Timestamp();
       if (last_timestamp_ != timestamp) {
         num_received_layers_ = 1;
@@ -1265,7 +1257,6 @@
     TestEncoder* test_encoder_;
     rtc::Event encoded_frame_event_;
     std::vector<uint8_t> last_encoded_image_data_;
-    RTPFragmentationHeader last_fragmentation_;
     uint32_t last_timestamp_ = 0;
     int64_t last_capture_time_ms_ = 0;
     uint32_t last_height_ = 0;
@@ -5445,10 +5436,6 @@
 
   EXPECT_THAT(sink_.GetLastEncodedImageData(),
               testing::ElementsAreArray(optimal_sps));
-  RTPFragmentationHeader last_fragmentation = sink_.GetLastFragmentation();
-  ASSERT_THAT(last_fragmentation.fragmentationVectorSize, 1U);
-  EXPECT_EQ(last_fragmentation.fragmentationOffset[0], 4U);
-  EXPECT_EQ(last_fragmentation.fragmentationLength[0], sizeof(optimal_sps) - 4);
 
   video_stream_encoder_->Stop();
 }
@@ -5476,10 +5463,6 @@
 
   EXPECT_THAT(sink_.GetLastEncodedImageData(),
               testing::ElementsAreArray(optimal_sps));
-  RTPFragmentationHeader last_fragmentation = sink_.GetLastFragmentation();
-  ASSERT_THAT(last_fragmentation.fragmentationVectorSize, 1U);
-  EXPECT_EQ(last_fragmentation.fragmentationOffset[0], 4U);
-  EXPECT_EQ(last_fragmentation.fragmentationLength[0], sizeof(optimal_sps) - 4);
 
   video_stream_encoder_->Stop();
 }