Add BufferedFrameDecryptor to cleanly deal with receiving encrypted frames.

This change introduces a new class BufferedFrameDecryptor that is responsible
for decrypting received encrypted frames and passing them on to the
RtpReferenceFinder. This decoupling refactoring was triggered by a new
optimization also introduced in this patch to stash a small number of
undecryptable frames if no frames have ever been decrypted. The goal of this
optimization is to prevent re-fectching of key frames on low bandwidth networks
simply because the key to decrypt them had not arrived yet.

The optimization will stash 24 frames (about 1 second of video) in a ring buffer
and will attempt to re-decrypt previously received frames on the first valid
decryption. This allows the decoder to receive the key frame without having
to request due to short key delivery latencies. In testing this is actually hit
quite often and saves an entire RTT which can be up to 200ms on a bad network.

As the scope of frame encryption increases in WebRTC and has more specialized
optimizations that do not apply to the general flow it makes sense to move it
to a more explicit bump in the stack protocol that is decoupled from the WebRTC
main flow, similar to how SRTP is utilized with srtp_protect and srtp_unprotect.

One advantage of this approach is the BufferedFrameDecryptor isn't even
constructed if FrameEncryption is not in use.

I have decided against merging the RtpReferenceFinder and EncryptedFrame stash
because it introduced a lot of complexity around the mixed scenario where some
of the frames in the stash are encrypted and others are not. In this case we
would need to mark certain frames as decrypted which appeared to introduce more
complexity than this simple decoupling.

Bug: webrtc:10022
Change-Id: Iab74f7b7d25ef1cdd15c4a76b5daae1cfa24932c
Reviewed-on: https://webrtc-review.googlesource.com/c/112221
Commit-Queue: Benjamin Wright <benwright@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25865}
diff --git a/modules/video_coding/frame_object.h b/modules/video_coding/frame_object.h
index b5a35e7..bb4513c 100644
--- a/modules/video_coding/frame_object.h
+++ b/modules/video_coding/frame_object.h
@@ -31,7 +31,7 @@
                  int times_nacked,
                  int64_t received_time);
 
-  ~RtpFrameObject();
+  ~RtpFrameObject() override;
   uint16_t first_seq_num() const;
   uint16_t last_seq_num() const;
   int times_nacked() const;
diff --git a/video/BUILD.gn b/video/BUILD.gn
index f7a6673..4a3efa2 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -10,6 +10,8 @@
 
 rtc_static_library("video") {
   sources = [
+    "buffered_frame_decryptor.cc",
+    "buffered_frame_decryptor.h",
     "call_stats.cc",
     "call_stats.h",
     "encoder_rtcp_feedback.cc",
@@ -439,6 +441,7 @@
 
     defines = []
     sources = [
+      "buffered_frame_decryptor_unittest.cc",
       "call_stats_unittest.cc",
       "cpu_scaling_tests.cc",
       "encoder_rtcp_feedback_unittest.cc",
@@ -485,6 +488,7 @@
       "../api:fake_frame_decryptor",
       "../api:fake_frame_encryptor",
       "../api:libjingle_peerconnection_api",
+      "../api:mock_frame_decryptor",
       "../api:simulated_network_api",
       "../api/test/video:function_video_factory",
       "../api/video:builtin_video_bitrate_allocator_factory",
diff --git a/video/buffered_frame_decryptor.cc b/video/buffered_frame_decryptor.cc
new file mode 100644
index 0000000..5aab132
--- /dev/null
+++ b/video/buffered_frame_decryptor.cc
@@ -0,0 +1,103 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "video/buffered_frame_decryptor.h"
+
+#include <utility>
+
+#include "rtc_base/logging.h"
+#include "rtc_base/system/fallthrough.h"
+
+namespace webrtc {
+
+BufferedFrameDecryptor::BufferedFrameDecryptor(
+    OnDecryptedFrameCallback* decrypted_frame_callback,
+    rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor)
+    : frame_decryptor_(std::move(frame_decryptor)),
+      decrypted_frame_callback_(decrypted_frame_callback) {}
+
+BufferedFrameDecryptor::~BufferedFrameDecryptor() {}
+
+void BufferedFrameDecryptor::ManageEncryptedFrame(
+    std::unique_ptr<video_coding::RtpFrameObject> encrypted_frame) {
+  switch (DecryptFrame(encrypted_frame.get())) {
+    case FrameDecision::kStash:
+      if (stashed_frames_.size() >= kMaxStashedFrames) {
+        stashed_frames_.pop_front();
+      }
+      stashed_frames_.push_back(std::move(encrypted_frame));
+      break;
+    case FrameDecision::kDecrypted:
+      RetryStashedFrames();
+      decrypted_frame_callback_->OnDecryptedFrame(std::move(encrypted_frame));
+      break;
+    case FrameDecision::kDrop:
+      break;
+  }
+}
+
+BufferedFrameDecryptor::FrameDecision BufferedFrameDecryptor::DecryptFrame(
+    video_coding::RtpFrameObject* frame) {
+  // Optionally attempt to decrypt the raw video frame if it was provided.
+  if (frame_decryptor_ == nullptr) {
+    RTC_LOG(LS_WARNING) << "Frame decryption required but not attached to this "
+                           "stream. Dropping frame.";
+    return FrameDecision::kDrop;
+  }
+  // When using encryption we expect the frame to have the generic descriptor.
+  absl::optional<RtpGenericFrameDescriptor> descriptor =
+      frame->GetGenericFrameDescriptor();
+  if (!descriptor) {
+    RTC_LOG(LS_ERROR) << "No generic frame descriptor found dropping frame.";
+    return FrameDecision::kDrop;
+  }
+  // Retrieve the bitstream of the encrypted video frame.
+  rtc::ArrayView<const uint8_t> encrypted_frame_bitstream(frame->Buffer(),
+                                                          frame->size());
+  // Retrieve the maximum possible size of the decrypted payload.
+  const size_t max_plaintext_byte_size =
+      frame_decryptor_->GetMaxPlaintextByteSize(cricket::MEDIA_TYPE_VIDEO,
+                                                frame->size());
+  RTC_CHECK_LE(max_plaintext_byte_size, frame->size());
+  // Place the decrypted frame inline into the existing frame.
+  rtc::ArrayView<uint8_t> inline_decrypted_bitstream(frame->MutableBuffer(),
+                                                     max_plaintext_byte_size);
+  // Attempt to decrypt the video frame.
+  size_t bytes_written = 0;
+  if (frame_decryptor_->Decrypt(
+          cricket::MEDIA_TYPE_VIDEO, /*csrcs=*/{},
+          /*additional_data=*/nullptr, encrypted_frame_bitstream,
+          inline_decrypted_bitstream, &bytes_written) != 0) {
+    // Only stash frames if we have never decrypted a frame before.
+    return first_frame_decrypted_ ? FrameDecision::kDrop
+                                  : FrameDecision::kStash;
+  }
+  RTC_CHECK_LE(bytes_written, max_plaintext_byte_size);
+  // Update the frame to contain just the written bytes.
+  frame->SetSize(bytes_written);
+
+  // Indicate that all future fail to decrypt frames should be dropped.
+  if (!first_frame_decrypted_) {
+    first_frame_decrypted_ = true;
+  }
+
+  return FrameDecision::kDecrypted;
+}
+
+void BufferedFrameDecryptor::RetryStashedFrames() {
+  for (auto& frame : stashed_frames_) {
+    if (DecryptFrame(frame.get()) == FrameDecision::kDecrypted) {
+      decrypted_frame_callback_->OnDecryptedFrame(std::move(frame));
+    }
+  }
+  stashed_frames_.clear();
+}
+
+}  // namespace webrtc
diff --git a/video/buffered_frame_decryptor.h b/video/buffered_frame_decryptor.h
new file mode 100644
index 0000000..be6ff9a
--- /dev/null
+++ b/video/buffered_frame_decryptor.h
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VIDEO_BUFFERED_FRAME_DECRYPTOR_H_
+#define VIDEO_BUFFERED_FRAME_DECRYPTOR_H_
+
+#include <deque>
+#include <memory>
+
+#include "api/crypto/cryptooptions.h"
+#include "api/crypto/framedecryptorinterface.h"
+#include "modules/include/module_common_types.h"
+#include "modules/video_coding/frame_object.h"
+
+namespace webrtc {
+
+// This callback is provided during the construction of the
+// BufferedFrameDecryptor and is called each time a frame is sucessfully
+// decrypted by the buffer.
+class OnDecryptedFrameCallback {
+ public:
+  virtual ~OnDecryptedFrameCallback() = default;
+  // Called each time a decrypted frame is returned.
+  virtual void OnDecryptedFrame(
+      std::unique_ptr<video_coding::RtpFrameObject> frame) = 0;
+};
+
+// The BufferedFrameDecryptor is responsible for deciding when to pass
+// decrypted received frames onto the OnDecryptedFrameCallback. Frames can be
+// delayed when frame encryption is enabled but the key hasn't arrived yet. In
+// this case we stash about 1 second of encrypted frames instead of dropping
+// them to prevent re-requesting the key frame. This optimization is
+// particularly important on low bandwidth networks. Note stashing is only ever
+// done if we have never sucessfully decrypted a frame before. After the first
+// successful decryption payloads will never be stashed.
+class BufferedFrameDecryptor final {
+ public:
+  // Constructs a new BufferedFrameDecryptor that can hold
+  explicit BufferedFrameDecryptor(
+      OnDecryptedFrameCallback* decrypted_frame_callback,
+      rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor);
+  ~BufferedFrameDecryptor();
+  // This object cannot be copied.
+  BufferedFrameDecryptor(const BufferedFrameDecryptor&) = delete;
+  BufferedFrameDecryptor& operator=(const BufferedFrameDecryptor&) = delete;
+  // Determines whether the frame should be stashed, dropped or handed off to
+  // the OnDecryptedFrameCallback.
+  void ManageEncryptedFrame(
+      std::unique_ptr<video_coding::RtpFrameObject> encrypted_frame);
+
+ private:
+  // Represents what should be done with a given frame.
+  enum class FrameDecision { kStash, kDecrypted, kDrop };
+
+  // Attempts to decrypt the frame, if it fails and no prior frames have been
+  // decrypted it will return kStash. Otherwise fail to decrypts will return
+  // kDrop. Successful decryptions will always return kDecrypted.
+  FrameDecision DecryptFrame(video_coding::RtpFrameObject* frame);
+  // Retries all the stashed frames this is triggered each time a kDecrypted
+  // event occurs.
+  void RetryStashedFrames();
+
+  static const size_t kMaxStashedFrames = 24;
+
+  bool first_frame_decrypted_ = false;
+  const rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor_;
+  OnDecryptedFrameCallback* const decrypted_frame_callback_;
+  std::deque<std::unique_ptr<video_coding::RtpFrameObject>> stashed_frames_;
+};
+
+}  // namespace webrtc
+
+#endif  // VIDEO_BUFFERED_FRAME_DECRYPTOR_H_
diff --git a/video/buffered_frame_decryptor_unittest.cc b/video/buffered_frame_decryptor_unittest.cc
new file mode 100644
index 0000000..95c647e
--- /dev/null
+++ b/video/buffered_frame_decryptor_unittest.cc
@@ -0,0 +1,206 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "video/buffered_frame_decryptor.h"
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "api/test/mock_frame_decryptor.h"
+#include "modules/video_coding/packet_buffer.h"
+#include "rtc_base/refcountedobject.h"
+#include "system_wrappers/include/clock.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+using ::testing::Return;
+
+namespace webrtc {
+namespace {
+
+class FakePacketBuffer : public video_coding::PacketBuffer {
+ public:
+  FakePacketBuffer() : PacketBuffer(nullptr, 0, 0, nullptr) {}
+  ~FakePacketBuffer() override {}
+
+  VCMPacket* GetPacket(uint16_t seq_num) override {
+    auto packet_it = packets_.find(seq_num);
+    return packet_it == packets_.end() ? nullptr : &packet_it->second;
+  }
+
+  bool InsertPacket(VCMPacket* packet) override {
+    packets_[packet->seqNum] = *packet;
+    return true;
+  }
+
+  bool GetBitstream(const video_coding::RtpFrameObject& frame,
+                    uint8_t* destination) override {
+    return true;
+  }
+
+  void ReturnFrame(video_coding::RtpFrameObject* frame) override {
+    packets_.erase(frame->first_seq_num());
+  }
+
+ private:
+  std::map<uint16_t, VCMPacket> packets_;
+};
+
+}  // namespace
+
+class BufferedFrameDecryptorTest
+    : public ::testing::Test,
+      public OnDecryptedFrameCallback,
+      public video_coding::OnReceivedFrameCallback {
+ public:
+  // Implements the OnDecryptedFrameCallbackInterface
+  void OnDecryptedFrame(
+      std::unique_ptr<video_coding::RtpFrameObject> frame) override {
+    decrypted_frame_call_count_++;
+  }
+
+  // Implements the OnReceivedFrameCallback interface.
+  void OnReceivedFrame(
+      std::unique_ptr<video_coding::RtpFrameObject> frame) override {}
+
+  // Returns a new fake RtpFrameObject it abstracts the difficult construction
+  // of the RtpFrameObject to simplify testing.
+  std::unique_ptr<video_coding::RtpFrameObject> CreateRtpFrameObject(
+      bool key_frame) {
+    seq_num_++;
+
+    VCMPacket packet;
+    packet.codec = kVideoCodecGeneric;
+    packet.seqNum = seq_num_;
+    packet.frameType = key_frame ? kVideoFrameKey : kVideoFrameDelta;
+    packet.generic_descriptor = RtpGenericFrameDescriptor();
+    fake_packet_buffer_->InsertPacket(&packet);
+    packet.seqNum = seq_num_;
+    packet.is_last_packet_in_frame = true;
+    fake_packet_buffer_->InsertPacket(&packet);
+
+    return std::unique_ptr<video_coding::RtpFrameObject>(
+        new video_coding::RtpFrameObject(fake_packet_buffer_.get(), seq_num_,
+                                         seq_num_, 0, 0, 0));
+  }
+
+ protected:
+  BufferedFrameDecryptorTest() : fake_packet_buffer_(new FakePacketBuffer()) {}
+  void SetUp() override {
+    fake_packet_data_ = std::vector<uint8_t>(100);
+    decrypted_frame_call_count_ = 0;
+    seq_num_ = 0;
+    mock_frame_decryptor_ = new rtc::RefCountedObject<MockFrameDecryptor>();
+    buffered_frame_decryptor_ = absl::make_unique<BufferedFrameDecryptor>(
+        this, mock_frame_decryptor_.get());
+  }
+
+  static const size_t kMaxStashedFrames;
+
+  std::vector<uint8_t> fake_packet_data_;
+  rtc::scoped_refptr<FakePacketBuffer> fake_packet_buffer_;
+  rtc::scoped_refptr<MockFrameDecryptor> mock_frame_decryptor_;
+  std::unique_ptr<BufferedFrameDecryptor> buffered_frame_decryptor_;
+  size_t decrypted_frame_call_count_;
+  uint16_t seq_num_;
+};
+
+const size_t BufferedFrameDecryptorTest::kMaxStashedFrames = 24;
+
+// Callback should always be triggered on a successful decryption.
+TEST_F(BufferedFrameDecryptorTest, CallbackCalledOnSuccessfulDecryption) {
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt).Times(1).WillOnce(Return(0));
+  EXPECT_CALL(*mock_frame_decryptor_, GetMaxPlaintextByteSize)
+      .Times(1)
+      .WillOnce(Return(0));
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(1));
+}
+
+// An initial fail to decrypt should not trigger the callback.
+TEST_F(BufferedFrameDecryptorTest, CallbackNotCalledOnFailedDecryption) {
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt).Times(1).WillOnce(Return(1));
+  EXPECT_CALL(*mock_frame_decryptor_, GetMaxPlaintextByteSize)
+      .Times(1)
+      .WillOnce(Return(0));
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(0));
+}
+
+// Initial failures should be stored and retried after the first successful
+// decryption.
+TEST_F(BufferedFrameDecryptorTest, DelayedCallbackOnBufferedFrames) {
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt)
+      .Times(3)
+      .WillOnce(Return(1))
+      .WillOnce(Return(0))
+      .WillOnce(Return(0));
+  EXPECT_CALL(*mock_frame_decryptor_, GetMaxPlaintextByteSize)
+      .Times(3)
+      .WillRepeatedly(Return(0));
+
+  // The first decrypt will fail stashing the first frame.
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(0));
+  // The second call will succeed playing back both frames.
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(false));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(2));
+}
+
+// Subsequent failure to decrypts after the first successful decryption should
+// fail to decryptk
+TEST_F(BufferedFrameDecryptorTest, FTDDiscardedAfterFirstSuccess) {
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt)
+      .Times(4)
+      .WillOnce(Return(1))
+      .WillOnce(Return(0))
+      .WillOnce(Return(0))
+      .WillOnce(Return(1));
+  EXPECT_CALL(*mock_frame_decryptor_, GetMaxPlaintextByteSize)
+      .Times(4)
+      .WillRepeatedly(Return(0));
+
+  // The first decrypt will fail stashing the first frame.
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(0));
+  // The second call will succeed playing back both frames.
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(false));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(2));
+  // A new failure call will not result in an additional decrypted frame
+  // callback.
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(2));
+}
+
+// Validate that the maximum number of stashed frames cannot be exceeded even if
+// more than its maximum arrives before the first successful decryption.
+TEST_F(BufferedFrameDecryptorTest, MaximumNumberOfFramesStored) {
+  const size_t failed_to_decrypt_count = kMaxStashedFrames * 2;
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt)
+      .Times(failed_to_decrypt_count)
+      .WillRepeatedly(Return(1));
+  EXPECT_CALL(*mock_frame_decryptor_, GetMaxPlaintextByteSize)
+      .WillRepeatedly(Return(0));
+
+  for (size_t i = 0; i < failed_to_decrypt_count; ++i) {
+    buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  }
+  EXPECT_EQ(decrypted_frame_call_count_, static_cast<size_t>(0));
+
+  EXPECT_CALL(*mock_frame_decryptor_, Decrypt)
+      .Times(kMaxStashedFrames + 1)
+      .WillRepeatedly(Return(0));
+  buffered_frame_decryptor_->ManageEncryptedFrame(CreateRtpFrameObject(true));
+  EXPECT_EQ(decrypted_frame_call_count_, kMaxStashedFrames + 1);
+}
+
+}  // namespace webrtc
diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc
index 4116cad..cfc5082 100644
--- a/video/rtp_video_stream_receiver.cc
+++ b/video/rtp_video_stream_receiver.cc
@@ -113,8 +113,7 @@
                                     packet_router)),
       complete_frame_callback_(complete_frame_callback),
       keyframe_request_sender_(keyframe_request_sender),
-      has_received_frame_(false),
-      frame_decryptor_(frame_decryptor) {
+      has_received_frame_(false) {
   constexpr bool remb_candidate = true;
   packet_router_->AddReceiveRtpModule(rtp_rtcp_.get(), remb_candidate);
 
@@ -169,6 +168,12 @@
       clock_, kPacketBufferStartSize, packet_buffer_max_size, this);
   reference_finder_ =
       absl::make_unique<video_coding::RtpFrameReferenceFinder>(this);
+  // Only construct the encrypted receiver if frame encryption is enabled.
+  if (frame_decryptor != nullptr ||
+      config_.crypto_options.sframe.require_frame_encryption) {
+    buffered_frame_decryptor_ =
+        absl::make_unique<BufferedFrameDecryptor>(this, frame_decryptor);
+  }
 }
 
 RtpVideoStreamReceiver::~RtpVideoStreamReceiver() {
@@ -319,7 +324,7 @@
 
       std::vector<uint32_t> csrcs = packet.Csrcs();
       contributing_sources_.Update(now_ms, csrcs,
-                                   /* audio level */absl::nullopt);
+                                   /* audio level */ absl::nullopt);
     }
     // Periodically log the RTP header of incoming packets.
     if (now_ms - last_packet_log_ms_ > kPacketLogIntervalMs) {
@@ -380,6 +385,7 @@
 
 void RtpVideoStreamReceiver::OnReceivedFrame(
     std::unique_ptr<video_coding::RtpFrameObject> frame) {
+  RTC_DCHECK_RUN_ON(&network_tc_);
   // Request a key frame as soon as possible.
   bool key_frame_requested = false;
   if (!has_received_frame_) {
@@ -389,55 +395,11 @@
       keyframe_request_sender_->RequestKeyFrame();
     }
   }
-
-  // Optionally attempt to decrypt the raw video frame if it was provided.
-  if (frame_decryptor_ != nullptr) {
-    // When using encryption we expect the frame to have the generic descriptor.
-    absl::optional<RtpGenericFrameDescriptor> descriptor =
-        frame->GetGenericFrameDescriptor();
-    if (!descriptor) {
-      RTC_LOG(LS_ERROR) << "No generic frame descriptor found dropping frame.";
-      return;
-    }
-
-    // Retrieve the bitstream of the encrypted video frame.
-    rtc::ArrayView<const uint8_t> encrypted_frame_bitstream(frame->Buffer(),
-                                                            frame->size());
-    // Retrieve the maximum possible size of the decrypted payload.
-    const size_t max_plaintext_byte_size =
-        frame_decryptor_->GetMaxPlaintextByteSize(cricket::MEDIA_TYPE_VIDEO,
-                                                  frame->size());
-    RTC_CHECK(max_plaintext_byte_size <= frame->size());
-    // Place the decrypted frame inline into the existing frame.
-    rtc::ArrayView<uint8_t> inline_decrypted_bitstream(frame->MutableBuffer(),
-                                                       max_plaintext_byte_size);
-
-    // Attempt to decrypt the video frame.
-    size_t bytes_written = 0;
-    if (frame_decryptor_->Decrypt(
-            cricket::MEDIA_TYPE_VIDEO, /*csrcs=*/{},
-            /*additional_data=*/nullptr, encrypted_frame_bitstream,
-            inline_decrypted_bitstream, &bytes_written) != 0) {
-      return;
-    }
-
-    if (!has_received_decrypted_frame_ && !key_frame_requested) {
-      has_received_decrypted_frame_ = true;
-      if (frame->FrameType() != kVideoFrameKey) {
-        keyframe_request_sender_->RequestKeyFrame();
-      }
-    }
-
-    RTC_CHECK(bytes_written <= max_plaintext_byte_size);
-    // Update the frame to contain just the written bytes.
-    frame->SetSize(bytes_written);
-  } else if (config_.crypto_options.sframe.require_frame_encryption) {
-    RTC_LOG(LS_WARNING) << "Frame decryption required but not attached to this "
-                           "stream. Dropping  frame.";
-    return;
+  if (buffered_frame_decryptor_ == nullptr) {
+    reference_finder_->ManageFrame(std::move(frame));
+  } else {
+    buffered_frame_decryptor_->ManageEncryptedFrame(std::move(frame));
   }
-
-  reference_finder_->ManageFrame(std::move(frame));
 }
 
 void RtpVideoStreamReceiver::OnCompleteFrame(
@@ -452,6 +414,11 @@
   complete_frame_callback_->OnCompleteFrame(std::move(frame));
 }
 
+void RtpVideoStreamReceiver::OnDecryptedFrame(
+    std::unique_ptr<video_coding::RtpFrameObject> frame) {
+  reference_finder_->ManageFrame(std::move(frame));
+}
+
 void RtpVideoStreamReceiver::UpdateRtt(int64_t max_rtt_ms) {
   if (nack_module_)
     nack_module_->UpdateRtt(max_rtt_ms);
diff --git a/video/rtp_video_stream_receiver.h b/video/rtp_video_stream_receiver.h
index a2006d1..ec3f354 100644
--- a/video/rtp_video_stream_receiver.h
+++ b/video/rtp_video_stream_receiver.h
@@ -38,6 +38,9 @@
 #include "rtc_base/criticalsection.h"
 #include "rtc_base/numerics/sequence_number_util.h"
 #include "rtc_base/sequenced_task_checker.h"
+#include "rtc_base/thread_annotations.h"
+#include "rtc_base/thread_checker.h"
+#include "video/buffered_frame_decryptor.h"
 
 namespace webrtc {
 
@@ -56,7 +59,8 @@
                                public VCMFrameTypeCallback,
                                public VCMPacketRequestCallback,
                                public video_coding::OnReceivedFrameCallback,
-                               public video_coding::OnCompleteFrameCallback {
+                               public video_coding::OnCompleteFrameCallback,
+                               public OnDecryptedFrameCallback {
  public:
   RtpVideoStreamReceiver(
       Transport* transport,
@@ -130,6 +134,10 @@
   void OnCompleteFrame(
       std::unique_ptr<video_coding::EncodedFrame> frame) override;
 
+  // Implements OnDecryptedFrameCallback.
+  void OnDecryptedFrame(
+      std::unique_ptr<video_coding::RtpFrameObject> frame) override;
+
   // Called by VideoReceiveStream when stats are updated.
   void UpdateRtt(int64_t max_rtt_ms);
 
@@ -207,10 +215,13 @@
   absl::optional<int64_t> last_received_rtp_system_time_ms_
       RTC_GUARDED_BY(rtp_sources_lock_);
 
-  // E2EE Video Frame Decryptor (Optional)
-  rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor_;
-  // Set to true on the first successsfully decrypted frame.
-  bool has_received_decrypted_frame_ = false;
+  // Used to validate the buffered frame decryptor is always run on the correct
+  // thread.
+  rtc::ThreadChecker network_tc_;
+  // Handles incoming encrypted frames and forwards them to the
+  // rtp_reference_finder if they are decryptable.
+  std::unique_ptr<BufferedFrameDecryptor> buffered_frame_decryptor_
+      RTC_PT_GUARDED_BY(network_tc_);
 };
 
 }  // namespace webrtc