Padding is now used to check for continuity in the packet sequence.

BUG=webrtc:5514

Review-Url: https://codereview.webrtc.org/2051453002
Cr-Original-Commit-Position: refs/heads/master@{#13383}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: 9b2ce6be093c24a4b8bca76841e757151701ddad
diff --git a/modules/video_coding/packet_buffer.cc b/modules/video_coding/packet_buffer.cc
index ad0054f..c5745d2 100644
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@@ -66,6 +66,12 @@
   if (AheadOf(seq_num, last_seq_num_))
     last_seq_num_ = seq_num;
 
+  // If this is a padding or FEC packet, don't insert it.
+  if (packet.sizeBytes == 0) {
+    reference_finder_.PaddingReceived(packet.seqNum);
+    return true;
+  }
+
   sequence_buffer_[index].frame_begin = packet.isFirstPacket;
   sequence_buffer_[index].frame_end = packet.markerBit;
   sequence_buffer_[index].seq_num = packet.seqNum;
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 7e87d38..9f372b6 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -51,6 +51,42 @@
   }
 }
 
+void RtpFrameReferenceFinder::PaddingReceived(uint16_t seq_num) {
+  rtc::CritScope lock(&crit_);
+  auto clean_padding_to =
+      stashed_padding_.lower_bound(seq_num - kMaxPaddingAge);
+  stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to);
+  stashed_padding_.insert(seq_num);
+  UpdateLastPictureIdWithPadding(seq_num);
+  RetryStashedFrames();
+}
+
+void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
+  auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num);
+
+  // If this padding packet "belongs" to a group of pictures that we don't track
+  // anymore, do nothing.
+  if (gop_seq_num_it == last_seq_num_gop_.begin())
+    return;
+  --gop_seq_num_it;
+
+  // Calculate the next contiuous sequence number and search for it in
+  // the padding packets we have stashed.
+  uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1;
+  auto padding_seq_num_it =
+      stashed_padding_.lower_bound(next_seq_num_with_padding);
+
+  // While there still are padding packets and those padding packets are
+  // continuous, then advance the "last-picture-id-with-padding" and remove
+  // the stashed padding packet.
+  while (padding_seq_num_it != stashed_padding_.end() &&
+         *padding_seq_num_it == next_seq_num_with_padding) {
+    gop_seq_num_it->second.second = next_seq_num_with_padding;
+    ++next_seq_num_with_padding;
+    padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
+  }
+}
+
 void RtpFrameReferenceFinder::RetryStashedFrames() {
   size_t num_stashed_frames = stashed_frames_.size();
 
@@ -84,8 +120,11 @@
     return;
   }
 
-  if (frame->frame_type() == kVideoFrameKey)
-    last_seq_num_gop_[frame->last_seq_num()] = frame->last_seq_num();
+  if (frame->frame_type() == kVideoFrameKey) {
+    last_seq_num_gop_.insert(std::make_pair(
+        frame->last_seq_num(),
+        std::make_pair(frame->last_seq_num(), frame->last_seq_num())));
+  }
 
   // We have received a frame but not yet a keyframe, stash this frame.
   if (last_seq_num_gop_.empty()) {
@@ -102,13 +141,21 @@
   // Find the last sequence number of the last frame for the keyframe
   // that this frame indirectly references.
   auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
+  if (seq_num_it == last_seq_num_gop_.begin()) {
+    LOG(LS_WARNING) << "Generic frame with packet range ["
+                    << frame->first_seq_num() << ", " << frame->last_seq_num()
+                    << "] has no Gop, dropping frame.";
+    return;
+  }
   seq_num_it--;
 
   // Make sure the packet sequence numbers are continuous, otherwise stash
   // this frame.
+  uint16_t last_picture_id_gop = seq_num_it->second.first;
+  uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second;
   if (frame->frame_type() == kVideoFrameDelta) {
-    if (seq_num_it->second !=
-        static_cast<uint16_t>(frame->first_seq_num() - 1)) {
+    uint16_t prev_seq_num = frame->first_seq_num() - 1;
+    if (prev_seq_num != last_picture_id_with_padding_gop) {
       stashed_frames_.emplace(std::move(frame));
       return;
     }
@@ -120,10 +167,14 @@
   // picture id according to some incrementing counter.
   frame->picture_id = frame->last_seq_num();
   frame->num_references = frame->frame_type() == kVideoFrameDelta;
-  frame->references[0] = seq_num_it->second;
-  seq_num_it->second = frame->picture_id;
+  frame->references[0] = last_picture_id_gop;
+  if (AheadOf(frame->picture_id, last_picture_id_gop)) {
+    seq_num_it->second.first = frame->picture_id;
+    seq_num_it->second.second = frame->picture_id;
+  }
 
   last_picture_id_ = frame->picture_id;
+  UpdateLastPictureIdWithPadding(frame->picture_id);
   frame_callback_->OnCompleteFrame(std::move(frame));
   RetryStashedFrames();
 }
diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h
index 5fd67ab..23c36c0 100644
--- a/modules/video_coding/rtp_frame_reference_finder.h
+++ b/modules/video_coding/rtp_frame_reference_finder.h
@@ -33,6 +33,7 @@
  public:
   explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback);
   void ManageFrame(std::unique_ptr<RtpFrameObject> frame);
+  void PaddingReceived(uint16_t seq_num);
 
  private:
   static const uint16_t kPicIdLength = 1 << 7;
@@ -41,9 +42,15 @@
   static const int kMaxStashedFrames = 10;
   static const int kMaxNotYetReceivedFrames = 20;
   static const int kMaxGofSaved = 15;
+  static const int kMaxPaddingAge = 100;
 
   rtc::CriticalSection crit_;
 
+  // Find the relevant group of pictures and update its "last-picture-id-with
+  // padding" sequence number.
+  void UpdateLastPictureIdWithPadding(uint16_t seq_num)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+
   // Retry finding references for all frames that previously didn't have
   // all information needed.
   void RetryStashedFrames() EXCLUSIVE_LOCKS_REQUIRED(crit_);
@@ -93,15 +100,25 @@
   // All picture ids are unwrapped to 16 bits.
   uint16_t UnwrapPictureId(uint16_t picture_id) EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
-  // Holds the last sequence number of the last frame that has been created
-  // given the last sequence number of a given keyframe.
-  std::map<uint16_t, uint16_t, DescendingSeqNumComp<uint16_t>> last_seq_num_gop_
-      GUARDED_BY(crit_);
+
+  // For every group of pictures, hold two sequence numbers. The first being
+  // the sequence number of the last packet of the last completed frame, and
+  // the second being the sequence number of the last packet of the last
+  // completed frame advanced by any potential continuous packets of padding.
+  std::map<uint16_t,
+           std::pair<uint16_t, uint16_t>,
+           DescendingSeqNumComp<uint16_t>>
+      last_seq_num_gop_ GUARDED_BY(crit_);
 
   // Save the last picture id in order to detect when there is a gap in frames
   // that have not yet been fully received.
   int last_picture_id_ GUARDED_BY(crit_);
 
+  // Padding packets that have been received but that are not yet continuous
+  // with any group of pictures.
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> stashed_padding_
+      GUARDED_BY(crit_);
+
   // The last unwrapped picture id. Used to unwrap the picture id from a length
   // of |kPicIdLength| to 16 bits.
   int last_unwrap_ GUARDED_BY(crit_);
diff --git a/modules/video_coding/video_packet_buffer_unittest.cc b/modules/video_coding/video_packet_buffer_unittest.cc
index cffd392..8c3f555 100644
--- a/modules/video_coding/video_packet_buffer_unittest.cc
+++ b/modules/video_coding/video_packet_buffer_unittest.cc
@@ -29,7 +29,8 @@
   TestPacketBuffer()
       : rand_(0x8739211),
         packet_buffer_(new PacketBuffer(kStartSize, kMaxSize, this)),
-        frames_from_callback_(FrameComp()) {}
+        frames_from_callback_(FrameComp()),
+        dummy_data_(new uint8_t[kDummyDataSize]()) {}
 
   uint16_t Rand() { return rand_.Rand(std::numeric_limits<uint16_t>::max()); }
 
@@ -61,12 +62,17 @@
   };
 
   // Insert a generic packet into the packet buffer.
-  void InsertGeneric(uint16_t seq_num,              // packet sequence number
-                     bool keyframe,                 // is keyframe
-                     bool first,                    // is first packet of frame
-                     bool last,                     // is last packet of frame
-                     size_t data_size = 0,          // size of data
-                     uint8_t* data = nullptr) {     // data pointer
+  void InsertGeneric(uint16_t seq_num,           // packet sequence number
+                     bool keyframe,              // is keyframe
+                     bool first,                 // is first packet of frame
+                     bool last,                  // is last packet of frame
+                     int data_size = -1,         // size of data
+                     uint8_t* data = nullptr) {  // data pointer
+    if (data_size == -1) {
+      data_size = kDummyDataSize;
+      data = dummy_data_.get();
+    }
+
     VCMPacket packet;
     packet.codec = kVideoCodecGeneric;
     packet.seqNum = seq_num;
@@ -88,8 +94,13 @@
                  int32_t pid = kNoPictureId,    // picture id
                  uint8_t tid = kNoTemporalIdx,  // temporal id
                  int32_t tl0 = kNoTl0PicIdx,    // tl0 pic index
-                 size_t data_size = 0,          // size of data
+                 int data_size = -1,            // size of data
                  uint8_t* data = nullptr) {     // data pointer
+    if (data_size == -1) {
+      data_size = kDummyDataSize;
+      data = dummy_data_.get();
+    }
+
     VCMPacket packet;
     packet.codec = kVideoCodecVP8;
     packet.seqNum = seq_num;
@@ -117,8 +128,13 @@
                     uint8_t tid = kNoTemporalIdx,  // temporal id
                     int32_t tl0 = kNoTl0PicIdx,    // tl0 pic index
                     GofInfoVP9* ss = nullptr,      // scalability structure
-                    size_t data_size = 0,          // size of data
+                    int data_size = -1,            // size of data
                     uint8_t* data = nullptr) {     // data pointer
+    if (data_size == -1) {
+      data_size = kDummyDataSize;
+      data = dummy_data_.get();
+    }
+
     VCMPacket packet;
     packet.codec = kVideoCodecVP9;
     packet.seqNum = seq_num;
@@ -142,19 +158,24 @@
   }
 
   // Insert a Vp9 packet into the packet buffer.
-  void InsertVp9Flex(uint16_t seq_num,              // packet sequence number
-                     bool keyframe,                 // is keyframe
-                     bool first,                    // is first packet of frame
-                     bool last,                     // is last packet of frame
-                     bool inter,                    // depends on S-1 layer
-                     int32_t pid = kNoPictureId,    // picture id
-                     uint8_t sid = kNoSpatialIdx,   // spatial id
-                     uint8_t tid = kNoTemporalIdx,  // temporal id
-                     int32_t tl0 = kNoTl0PicIdx,    // tl0 pic index
-                     std::vector<uint8_t> refs =
-                       std::vector<uint8_t>(),      // frame references
-                     size_t data_size = 0,          // size of data
-                     uint8_t* data = nullptr) {     // data pointer
+  void InsertVp9Flex(
+      uint16_t seq_num,              // packet sequence number
+      bool keyframe,                 // is keyframe
+      bool first,                    // is first packet of frame
+      bool last,                     // is last packet of frame
+      bool inter,                    // depends on S-1 layer
+      int32_t pid = kNoPictureId,    // picture id
+      uint8_t sid = kNoSpatialIdx,   // spatial id
+      uint8_t tid = kNoTemporalIdx,  // temporal id
+      int32_t tl0 = kNoTl0PicIdx,    // tl0 pic index
+      std::vector<uint8_t> refs = std::vector<uint8_t>(),  // frame references
+      int data_size = -1,                                  // size of data
+      uint8_t* data = nullptr) {                           // data pointer
+    if (data_size == -1) {
+      data_size = kDummyDataSize;
+      data = dummy_data_.get();
+    }
+
     VCMPacket packet;
     packet.codec = kVideoCodecVP9;
     packet.seqNum = seq_num;
@@ -224,6 +245,7 @@
 
   const int kStartSize = 16;
   const int kMaxSize = 64;
+  const int kDummyDataSize = 4;
 
   Random rand_;
   std::unique_ptr<PacketBuffer> packet_buffer_;
@@ -238,6 +260,8 @@
   std::map<std::pair<uint16_t, uint8_t>,
            std::unique_ptr<FrameObject>,
            FrameComp> frames_from_callback_;
+
+  std::unique_ptr<uint8_t[]> dummy_data_;
 };
 
 TEST_F(TestPacketBuffer, InsertOnePacket) {
@@ -274,8 +298,8 @@
   packet.frameType = kVideoFrameKey;
   packet.isFirstPacket = true;
   packet.markerBit = false;
-  packet.sizeBytes = 0;
-  packet.dataPtr = nullptr;
+  packet.dataPtr = dummy_data_.get();
+  packet.sizeBytes = kDummyDataSize;
   packet.timesNacked = 0;
 
   packet_buffer_->InsertPacket(packet);
@@ -378,6 +402,8 @@
 TEST_F(TestPacketBuffer, DiscardOldPacket) {
   uint16_t seq_num = Rand();
   VCMPacket packet;
+  packet.dataPtr = dummy_data_.get();
+  packet.sizeBytes = kDummyDataSize;
   packet.seqNum = Rand();
   EXPECT_TRUE(packet_buffer_->InsertPacket(packet));
   packet.seqNum += 2;
@@ -397,6 +423,8 @@
 TEST_F(TestPacketBuffer, DiscardMultipleOldPackets) {
   uint16_t seq_num = Rand();
   VCMPacket packet;
+  packet.dataPtr = dummy_data_.get();
+  packet.sizeBytes = kDummyDataSize;
   packet.seqNum = seq_num;
   EXPECT_TRUE(packet_buffer_->InsertPacket(packet));
   packet.seqNum += 2;
@@ -472,9 +500,7 @@
   CheckReferencesVp8(seq_num + 3);
   EXPECT_TRUE(frames_from_callback_[std::make_pair(seq_num + 3, 0)]->
                                                           GetBitstream(result));
-  EXPECT_EQ(std::strcmp("many bitstream, such data",
-            reinterpret_cast<char*>(result)),
-            0);
+  EXPECT_EQ(memcmp(result, "many bitstream, such data", sizeof(result)), 0);
 }
 
 TEST_F(TestPacketBuffer, FreeSlotsOnFrameDestruction) {
@@ -515,6 +541,8 @@
 
 TEST_F(TestPacketBuffer, InvalidateFrameByClearing) {
   VCMPacket packet;
+  packet.dataPtr = dummy_data_.get();
+  packet.sizeBytes = kDummyDataSize;
   packet.codec = kVideoCodecGeneric;
   packet.frameType = kVideoFrameKey;
   packet.isFirstPacket = kT;
@@ -527,6 +555,49 @@
   EXPECT_FALSE(frames_from_callback_.begin()->second->GetBitstream(nullptr));
 }
 
+TEST_F(TestPacketBuffer, PaddingPackets) {
+  uint16_t seq_num = Rand();
+
+  //            seq_num    , kf, frst, lst
+  InsertGeneric(seq_num    , kT, kT  , kT);
+  InsertGeneric(seq_num + 1, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 3, kF, kT  , kT);
+  EXPECT_EQ(1UL, frames_from_callback_.size());
+  InsertGeneric(seq_num + 2, kF, kF  , kF, 0, nullptr);
+  EXPECT_EQ(2UL, frames_from_callback_.size());
+}
+
+TEST_F(TestPacketBuffer, PaddingPacketsReordered) {
+  uint16_t seq_num = Rand();
+
+  //            seq_num    , kf, frst, lst
+  InsertGeneric(seq_num    , kT, kT  , kT);
+  InsertGeneric(seq_num + 1, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 2, kF, kT  , kF);
+  InsertGeneric(seq_num + 4, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 3, kF, kF  , kT);
+  EXPECT_EQ(2UL, frames_from_callback_.size());
+  CheckReferencesGeneric(seq_num);
+  CheckReferencesGeneric(seq_num + 3, seq_num);
+}
+
+TEST_F(TestPacketBuffer, PaddingPacketsReorderedMultipleKeyframes) {
+  uint16_t seq_num = Rand();
+
+  //            seq_num    , kf, frst, lst
+  InsertGeneric(seq_num    , kT, kT  , kT);
+  InsertGeneric(seq_num + 2, kF, kT  , kF);
+  InsertGeneric(seq_num + 1, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 4, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 5, kT, kT  , kT);
+  InsertGeneric(seq_num + 3, kF, kF  , kT);
+  InsertGeneric(seq_num + 6, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 9, kF, kF  , kF, 0, nullptr);
+  InsertGeneric(seq_num + 8, kF, kF  , kT);
+  InsertGeneric(seq_num + 7, kF, kT  , kF);
+  EXPECT_EQ(4UL, frames_from_callback_.size());
+}
+
 TEST_F(TestPacketBuffer, Vp8NoPictureId) {
   uint16_t seq_num = Rand();