Reset |reference_finder_| on codec switch.

In this CL:
 - Moved critical section out of RtpFrameReferenceFinder.
 - RtpFrameReferenceFinder can now assign picture ids with an offset.
 - RtpVideoStreamReceiver will now reset the |reference_finder_| in case
   of a codec switch.

Bug: webrtc:10795, webrtc:10828
Change-Id: I22631c121a465c434de24af5ce8be2a647fe3556
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/154353
Reviewed-by: Åsa Persson <asapersson@webrtc.org>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29317}
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 0f79218..4932c70 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -25,17 +25,21 @@
 
 RtpFrameReferenceFinder::RtpFrameReferenceFinder(
     OnCompleteFrameCallback* frame_callback)
+    : RtpFrameReferenceFinder(frame_callback, 0) {}
+
+RtpFrameReferenceFinder::RtpFrameReferenceFinder(
+    OnCompleteFrameCallback* frame_callback,
+    int64_t picture_id_offset)
     : last_picture_id_(-1),
       current_ss_idx_(0),
       cleared_to_seq_num_(-1),
-      frame_callback_(frame_callback) {}
+      frame_callback_(frame_callback),
+      picture_id_offset_(picture_id_offset) {}
 
 RtpFrameReferenceFinder::~RtpFrameReferenceFinder() = default;
 
 void RtpFrameReferenceFinder::ManageFrame(
     std::unique_ptr<RtpFrameObject> frame) {
-  rtc::CritScope lock(&crit_);
-
   // If we have cleared past this frame, drop it.
   if (cleared_to_seq_num_ != -1 &&
       AheadOf<uint16_t>(cleared_to_seq_num_, frame->first_seq_num())) {
@@ -51,7 +55,7 @@
       stashed_frames_.push_front(std::move(frame));
       break;
     case kHandOff:
-      frame_callback_->OnCompleteFrame(std::move(frame));
+      HandOffFrame(std::move(frame));
       RetryStashedFrames();
       break;
     case kDrop:
@@ -73,7 +77,7 @@
           break;
         case kHandOff:
           complete_frame = true;
-          frame_callback_->OnCompleteFrame(std::move(*frame_it));
+          HandOffFrame(std::move(*frame_it));
           RTC_FALLTHROUGH();
         case kDrop:
           frame_it = stashed_frames_.erase(frame_it);
@@ -82,6 +86,16 @@
   } while (complete_frame);
 }
 
+void RtpFrameReferenceFinder::HandOffFrame(
+    std::unique_ptr<RtpFrameObject> frame) {
+  frame->id.picture_id += picture_id_offset_;
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] += picture_id_offset_;
+  }
+
+  frame_callback_->OnCompleteFrame(std::move(frame));
+}
+
 RtpFrameReferenceFinder::FrameDecision
 RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
   absl::optional<RtpGenericFrameDescriptor> generic_descriptor =
@@ -110,7 +124,6 @@
 }
 
 void RtpFrameReferenceFinder::PaddingReceived(uint16_t seq_num) {
-  rtc::CritScope lock(&crit_);
   auto clean_padding_to =
       stashed_padding_.lower_bound(seq_num - kMaxPaddingAge);
   stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to);
@@ -120,7 +133,6 @@
 }
 
 void RtpFrameReferenceFinder::ClearTo(uint16_t seq_num) {
-  rtc::CritScope lock(&crit_);
   cleared_to_seq_num_ = seq_num;
 
   auto it = stashed_frames_.begin();
diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h
index 176bb66..715c1dd 100644
--- a/modules/video_coding/rtp_frame_reference_finder.h
+++ b/modules/video_coding/rtp_frame_reference_finder.h
@@ -42,6 +42,8 @@
 class RtpFrameReferenceFinder {
  public:
   explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback);
+  explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback,
+                                   int64_t picture_id_offset);
   ~RtpFrameReferenceFinder();
 
   // Manage this frame until:
@@ -77,83 +79,70 @@
     uint16_t last_picture_id;
   };
 
-  rtc::CriticalSection crit_;
-
   // Find the relevant group of pictures and update its "last-picture-id-with
   // padding" sequence number.
-  void UpdateLastPictureIdWithPadding(uint16_t seq_num)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void UpdateLastPictureIdWithPadding(uint16_t seq_num);
 
   // Retry stashed frames until no more complete frames are found.
-  void RetryStashedFrames() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void RetryStashedFrames();
 
-  FrameDecision ManageFrameInternal(RtpFrameObject* frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void HandOffFrame(std::unique_ptr<RtpFrameObject> frame);
+
+  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
 
   FrameDecision ManageFrameGeneric(RtpFrameObject* frame,
-                                   const RtpGenericFrameDescriptor& descriptor)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+                                   const RtpGenericFrameDescriptor& descriptor);
 
   // Find references for frames with no or very limited information in the
   // descriptor. If |picture_id| is unspecified then packet sequence numbers
   // will be used to determine the references of the frames.
-  FrameDecision ManageFramePidOrSeqNum(RtpFrameObject* frame, int picture_id)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  FrameDecision ManageFramePidOrSeqNum(RtpFrameObject* frame, int picture_id);
 
   // Find references for Vp8 frames
-  FrameDecision ManageFrameVp8(RtpFrameObject* frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  FrameDecision ManageFrameVp8(RtpFrameObject* frame);
 
   // Updates necessary layer info state used to determine frame references for
   // Vp8.
   void UpdateLayerInfoVp8(RtpFrameObject* frame,
                           int64_t unwrapped_tl0,
-                          uint8_t temporal_idx)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+                          uint8_t temporal_idx);
 
   // Find references for Vp9 frames
-  FrameDecision ManageFrameVp9(RtpFrameObject* frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  FrameDecision ManageFrameVp9(RtpFrameObject* frame);
 
   // Check if we are missing a frame necessary to determine the references
   // for this frame.
-  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
 
   // Updates which frames that have been received. If there is a gap,
   // missing frames will be added to |missing_frames_for_layer_| or
   // if this is an already missing frame then it will be removed.
-  void FrameReceivedVp9(uint16_t picture_id, GofInfo* info)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void FrameReceivedVp9(uint16_t picture_id, GofInfo* info);
 
   // Check if there is a frame with the up-switch flag set in the interval
   // (|pid_ref|, |picture_id|) with temporal layer smaller than |temporal_idx|.
   bool UpSwitchInIntervalVp9(uint16_t picture_id,
                              uint8_t temporal_idx,
-                             uint16_t pid_ref)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+                             uint16_t pid_ref);
 
   // Unwrap |frame|s picture id and its references to 16 bits.
-  void UnwrapPictureIds(RtpFrameObject* frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void UnwrapPictureIds(RtpFrameObject* frame);
 
   // Find references for H264 frames
-  FrameDecision ManageFrameH264(RtpFrameObject* frame)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  FrameDecision ManageFrameH264(RtpFrameObject* frame);
 
   // Update "last-picture-id-with-padding" sequence number for H264.
-  void UpdateLastPictureIdWithPaddingH264() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void UpdateLastPictureIdWithPaddingH264();
 
   // Update H264 layer info state used to determine frame references.
   void UpdateLayerInfoH264(RtpFrameObject* frame,
                            int64_t unwrapped_tl0,
-                           uint8_t temporal_idx)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+                           uint8_t temporal_idx);
 
   // Update H264 state for decodeable frames.
   void UpdateDataH264(RtpFrameObject* frame,
                       int64_t unwrapped_tl0,
-                      uint8_t temporal_idx) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
+                      uint8_t temporal_idx);
 
   // For every group of pictures, hold two sequence numbers. The first being
   // the sequence number of the last packet of the last completed frame, and
@@ -162,76 +151,73 @@
   std::map<uint16_t,
            std::pair<uint16_t, uint16_t>,
            DescendingSeqNumComp<uint16_t>>
-      last_seq_num_gop_ RTC_GUARDED_BY(crit_);
+      last_seq_num_gop_;
 
   // Save the last picture id in order to detect when there is a gap in frames
   // that have not yet been fully received.
-  int last_picture_id_ RTC_GUARDED_BY(crit_);
+  int last_picture_id_;
 
   // Padding packets that have been received but that are not yet continuous
   // with any group of pictures.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> stashed_padding_
-      RTC_GUARDED_BY(crit_);
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> stashed_padding_;
 
   // Frames earlier than the last received frame that have not yet been
   // fully received.
   std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
-      not_yet_received_frames_ RTC_GUARDED_BY(crit_);
+      not_yet_received_frames_;
 
   // Sequence numbers of frames earlier than the last received frame that
   // have not yet been fully received.
-  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> not_yet_received_seq_num_
-      RTC_GUARDED_BY(crit_);
+  std::set<uint16_t, DescendingSeqNumComp<uint16_t>> not_yet_received_seq_num_;
 
   // Frames that have been fully received but didn't have all the information
   // needed to determine their references.
-  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_
-      RTC_GUARDED_BY(crit_);
+  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
 
   // Holds the information about the last completed frame for a given temporal
   // layer given an unwrapped Tl0 picture index.
-  std::map<int64_t, std::array<int64_t, kMaxTemporalLayers>> layer_info_
-      RTC_GUARDED_BY(crit_);
+  std::map<int64_t, std::array<int64_t, kMaxTemporalLayers>> layer_info_;
 
   // Where the current scalability structure is in the
   // |scalability_structures_| array.
   uint8_t current_ss_idx_;
 
   // Holds received scalability structures.
-  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_
-      RTC_GUARDED_BY(crit_);
+  std::array<GofInfoVP9, kMaxGofSaved> scalability_structures_;
 
   // Holds the the Gof information for a given unwrapped TL0 picture index.
-  std::map<int64_t, GofInfo> gof_info_ RTC_GUARDED_BY(crit_);
+  std::map<int64_t, GofInfo> gof_info_;
 
   // Keep track of which picture id and which temporal layer that had the
   // up switch flag set.
   std::map<uint16_t, uint8_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>
-      up_switch_ RTC_GUARDED_BY(crit_);
+      up_switch_;
 
   // For every temporal layer, keep a set of which frames that are missing.
   std::array<std::set<uint16_t, DescendingSeqNumComp<uint16_t, kPicIdLength>>,
              kMaxTemporalLayers>
-      missing_frames_for_layer_ RTC_GUARDED_BY(crit_);
+      missing_frames_for_layer_;
 
   // How far frames have been cleared by sequence number. A frame will be
   // cleared if it contains a packet with a sequence number older than
   // |cleared_to_seq_num_|.
-  int cleared_to_seq_num_ RTC_GUARDED_BY(crit_);
+  int cleared_to_seq_num_;
 
   OnCompleteFrameCallback* frame_callback_;
 
-  SeqNumUnwrapper<uint16_t> generic_frame_id_unwrapper_ RTC_GUARDED_BY(crit_);
+  SeqNumUnwrapper<uint16_t> generic_frame_id_unwrapper_;
 
   // Unwrapper used to unwrap generic RTP streams. In a generic stream we derive
   // a picture id from the packet sequence number.
-  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_ RTC_GUARDED_BY(crit_);
+  SeqNumUnwrapper<uint16_t> rtp_seq_num_unwrapper_;
 
   // Unwrapper used to unwrap VP8/VP9 streams which have their picture id
   // specified.
-  SeqNumUnwrapper<uint16_t, kPicIdLength> unwrapper_ RTC_GUARDED_BY(crit_);
+  SeqNumUnwrapper<uint16_t, kPicIdLength> unwrapper_;
 
-  SeqNumUnwrapper<uint8_t> tl0_unwrapper_ RTC_GUARDED_BY(crit_);
+  SeqNumUnwrapper<uint8_t> tl0_unwrapper_;
+
+  const int64_t picture_id_offset_;
 };
 
 }  // namespace video_coding
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index ae9a3ca..dcc015d 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -10,6 +10,8 @@
 
 #include "video/rtp_video_stream_receiver.h"
 
+#include <algorithm>
+#include <limits>
 #include <utility>
 #include <vector>
 
@@ -539,6 +541,37 @@
     has_received_frame_ = true;
   }
 
+  rtc::CritScope lock(&reference_finder_lock_);
+  // Reset |reference_finder_| if |frame| is new and the codec have changed.
+  if (current_codec_) {
+    bool frame_is_newer =
+        AheadOf(frame->Timestamp(), last_assembled_frame_rtp_timestamp_);
+
+    if (frame->codec_type() != current_codec_) {
+      if (frame_is_newer) {
+        // When we reset the |reference_finder_| we don't want new picture ids
+        // to overlap with old picture ids. To ensure that doesn't happen we
+        // start from the |last_completed_picture_id_| and add an offset in case
+        // of reordering.
+        reference_finder_ =
+            std::make_unique<video_coding::RtpFrameReferenceFinder>(
+                this, last_completed_picture_id_ +
+                          std::numeric_limits<uint16_t>::max());
+        current_codec_ = frame->codec_type();
+      } else {
+        // Old frame from before the codec switch, discard it.
+        return;
+      }
+    }
+
+    if (frame_is_newer) {
+      last_assembled_frame_rtp_timestamp_ = frame->Timestamp();
+    }
+  } else {
+    current_codec_ = frame->codec_type();
+    last_assembled_frame_rtp_timestamp_ = frame->Timestamp();
+  }
+
   if (buffered_frame_decryptor_ == nullptr) {
     reference_finder_->ManageFrame(std::move(frame));
   } else {
@@ -555,11 +588,14 @@
     last_seq_num_for_pic_id_[rtp_frame->id.picture_id] =
         rtp_frame->last_seq_num();
   }
+  last_completed_picture_id_ =
+      std::max(last_completed_picture_id_, frame->id.picture_id);
   complete_frame_callback_->OnCompleteFrame(std::move(frame));
 }
 
 void RtpVideoStreamReceiver::OnDecryptedFrame(
     std::unique_ptr<video_coding::RtpFrameObject> frame) {
+  rtc::CritScope lock(&reference_finder_lock_);
   reference_finder_->ManageFrame(std::move(frame));
 }
 
@@ -750,7 +786,10 @@
 // RtpFrameReferenceFinder will need to know about padding to
 // correctly calculate frame references.
 void RtpVideoStreamReceiver::NotifyReceiverOfEmptyPacket(uint16_t seq_num) {
-  reference_finder_->PaddingReceived(seq_num);
+  {
+    rtc::CritScope lock(&reference_finder_lock_);
+    reference_finder_->PaddingReceived(seq_num);
+  }
   packet_buffer_.PaddingReceived(seq_num);
   if (nack_module_) {
     nack_module_->OnReceivedPacket(seq_num, /* is_keyframe = */ false,
@@ -828,6 +867,7 @@
   }
   if (seq_num != -1) {
     packet_buffer_.ClearTo(seq_num);
+    rtc::CritScope lock(&reference_finder_lock_);
     reference_finder_->ClearTo(seq_num);
   }
 }
diff --git a/video/rtp_video_stream_receiver.h b/video/rtp_video_stream_receiver.h
index 392bf55..1779fa6 100644
--- a/video/rtp_video_stream_receiver.h
+++ b/video/rtp_video_stream_receiver.h
@@ -274,7 +274,13 @@
   std::unique_ptr<LossNotificationController> loss_notification_controller_;
 
   video_coding::PacketBuffer packet_buffer_;
-  std::unique_ptr<video_coding::RtpFrameReferenceFinder> reference_finder_;
+
+  rtc::CriticalSection reference_finder_lock_;
+  std::unique_ptr<video_coding::RtpFrameReferenceFinder> reference_finder_
+      RTC_GUARDED_BY(reference_finder_lock_);
+  absl::optional<VideoCodecType> current_codec_;
+  uint32_t last_assembled_frame_rtp_timestamp_;
+
   rtc::CriticalSection last_seq_num_cs_;
   std::map<int64_t, uint16_t> last_seq_num_for_pic_id_
       RTC_GUARDED_BY(last_seq_num_cs_);
@@ -311,6 +317,8 @@
       RTC_PT_GUARDED_BY(network_tc_);
   std::atomic<bool> frames_decryptable_;
   absl::optional<ColorSpace> last_color_space_;
+
+  int64_t last_completed_picture_id_ = 0;
 };
 
 }  // namespace webrtc