Skip cropping for frames that can't be converted to i420.

Some downstream clients have custom frame types that can't be converted.
The rest of EncodeVideoFrame is protected against these frames, but the
crop code assumes ToI420 always succeeds.

Bug: None
Change-Id: I8f4279e3975d3ae8cd1da59f7e84fafe0404fd15
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/141646
Commit-Queue: Noah Richards <noahric@chromium.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28256}
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index 942b82e..ebaaa44 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -1218,6 +1218,12 @@
   VideoFrame out_frame(video_frame);
   // Crop frame if needed.
   if (crop_width_ > 0 || crop_height_ > 0) {
+    // If the frame can't be converted to I420, drop it.
+    auto i420_buffer = video_frame.video_frame_buffer()->ToI420();
+    if (!i420_buffer) {
+      RTC_LOG(LS_ERROR) << "Frame conversion for crop failed, dropping frame.";
+      return;
+    }
     int cropped_width = video_frame.width() - crop_width_;
     int cropped_height = video_frame.height() - crop_height_;
     rtc::scoped_refptr<I420Buffer> cropped_buffer =
@@ -1226,17 +1232,16 @@
     // happen after SinkWants signaled correctly from ReconfigureEncoder.
     VideoFrame::UpdateRect update_rect = video_frame.update_rect();
     if (crop_width_ < 4 && crop_height_ < 4) {
-      cropped_buffer->CropAndScaleFrom(
-          *video_frame.video_frame_buffer()->ToI420(), crop_width_ / 2,
-          crop_height_ / 2, cropped_width, cropped_height);
+      cropped_buffer->CropAndScaleFrom(*i420_buffer, crop_width_ / 2,
+                                       crop_height_ / 2, cropped_width,
+                                       cropped_height);
       update_rect.offset_x -= crop_width_ / 2;
       update_rect.offset_y -= crop_height_ / 2;
       update_rect.Intersect(
           VideoFrame::UpdateRect{0, 0, cropped_width, cropped_height});
 
     } else {
-      cropped_buffer->ScaleFrom(
-          *video_frame.video_frame_buffer()->ToI420().get());
+      cropped_buffer->ScaleFrom(*i420_buffer);
       if (!update_rect.IsEmpty()) {
         // Since we can't reason about pixels after scaling, we invalidate whole
         // picture, if anything changed.
diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc
index 12e2163..7214a64 100644
--- a/video/video_stream_encoder_unittest.cc
+++ b/video/video_stream_encoder_unittest.cc
@@ -24,6 +24,7 @@
 #include "api/video_codecs/vp8_temporal_layers.h"
 #include "api/video_codecs/vp8_temporal_layers_factory.h"
 #include "common_video/h264/h264_common.h"
+#include "common_video/include/video_frame_buffer.h"
 #include "media/base/video_adapter.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
 #include "modules/video_coding/utility/default_video_bitrate_allocator.h"
@@ -80,6 +81,29 @@
   rtc::Event* const event_;
 };
 
+// A fake native buffer that can't be converted to I420.
+class FakeNativeBuffer : public webrtc::VideoFrameBuffer {
+ public:
+  FakeNativeBuffer(rtc::Event* event, int width, int height)
+      : event_(event), width_(width), height_(height) {}
+  webrtc::VideoFrameBuffer::Type type() const override { return Type::kNative; }
+  int width() const override { return width_; }
+  int height() const override { return height_; }
+  rtc::scoped_refptr<webrtc::I420BufferInterface> ToI420() override {
+    return nullptr;
+  }
+
+ private:
+  friend class rtc::RefCountedObject<FakeNativeBuffer>;
+  ~FakeNativeBuffer() override {
+    if (event_)
+      event_->Set();
+  }
+  rtc::Event* const event_;
+  const int width_;
+  const int height_;
+};
+
 class CpuOveruseDetectorProxy : public OveruseFrameDetector {
  public:
   explicit CpuOveruseDetectorProxy(CpuOveruseMetricsObserver* metrics_observer)
@@ -174,6 +198,35 @@
   const int framerate_;
 };
 
+// Simulates simulcast behavior and makes highest stream resolutions divisible
+// by 4.
+class CroppingVideoStreamFactory
+    : public VideoEncoderConfig::VideoStreamFactoryInterface {
+ public:
+  explicit CroppingVideoStreamFactory(size_t num_temporal_layers, int framerate)
+      : num_temporal_layers_(num_temporal_layers), framerate_(framerate) {
+    EXPECT_GT(num_temporal_layers, 0u);
+    EXPECT_GT(framerate, 0);
+  }
+
+ private:
+  std::vector<VideoStream> CreateEncoderStreams(
+      int width,
+      int height,
+      const VideoEncoderConfig& encoder_config) override {
+    std::vector<VideoStream> streams = test::CreateVideoStreams(
+        width - width % 4, height - height % 4, encoder_config);
+    for (VideoStream& stream : streams) {
+      stream.num_temporal_layers = num_temporal_layers_;
+      stream.max_framerate = framerate_;
+    }
+    return streams;
+  }
+
+  const size_t num_temporal_layers_;
+  const int framerate_;
+};
+
 class AdaptingFrameForwarder : public test::FrameForwarder {
  public:
   AdaptingFrameForwarder() : adaptation_enabled_(false) {}
@@ -420,6 +473,28 @@
     return frame;
   }
 
+  VideoFrame CreateFakeNativeFrame(int64_t ntp_time_ms,
+                                   rtc::Event* destruction_event,
+                                   int width,
+                                   int height) const {
+    VideoFrame frame =
+        VideoFrame::Builder()
+            .set_video_frame_buffer(new rtc::RefCountedObject<FakeNativeBuffer>(
+                destruction_event, width, height))
+            .set_timestamp_rtp(99)
+            .set_timestamp_ms(99)
+            .set_rotation(kVideoRotation_0)
+            .build();
+    frame.set_ntp_time_ms(ntp_time_ms);
+    return frame;
+  }
+
+  VideoFrame CreateFakeNativeFrame(int64_t ntp_time_ms,
+                                   rtc::Event* destruction_event) const {
+    return CreateFakeNativeFrame(ntp_time_ms, destruction_event, codec_width_,
+                                 codec_height_);
+  }
+
   void VerifyAllocatedBitrate(const VideoBitrateAllocation& expected_bitrate) {
     MockBitrateObserver bitrate_observer;
     video_stream_encoder_->SetBitrateAllocationObserver(&bitrate_observer);
@@ -1057,6 +1132,46 @@
   video_stream_encoder_->Stop();
 }
 
+TEST_F(VideoStreamEncoderTest, DropFrameWithFailedI420Conversion) {
+  video_stream_encoder_->OnBitrateUpdated(
+      DataRate::bps(kTargetBitrateBps), DataRate::bps(kTargetBitrateBps), 0, 0);
+
+  rtc::Event frame_destroyed_event;
+  video_source_.IncomingCapturedFrame(
+      CreateFakeNativeFrame(1, &frame_destroyed_event));
+  ExpectDroppedFrame();
+  EXPECT_TRUE(frame_destroyed_event.Wait(kDefaultTimeoutMs));
+  video_stream_encoder_->Stop();
+}
+
+TEST_F(VideoStreamEncoderTest, DropFrameWithFailedI420ConversionWithCrop) {
+  // Use the cropping factory.
+  video_encoder_config_.video_stream_factory =
+      new rtc::RefCountedObject<CroppingVideoStreamFactory>(1, 30);
+  video_stream_encoder_->ConfigureEncoder(std::move(video_encoder_config_),
+                                          kMaxPayloadLength);
+  video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+
+  // Capture a frame at codec_width_/codec_height_.
+  video_stream_encoder_->OnBitrateUpdated(
+      DataRate::bps(kTargetBitrateBps), DataRate::bps(kTargetBitrateBps), 0, 0);
+  video_source_.IncomingCapturedFrame(CreateFrame(1, nullptr));
+  WaitForEncodedFrame(1);
+  // The encoder will have been configured once.
+  EXPECT_EQ(1, sink_.number_of_reconfigurations());
+  EXPECT_EQ(codec_width_, fake_encoder_.codec_config().width);
+  EXPECT_EQ(codec_height_, fake_encoder_.codec_config().height);
+
+  // Now send in a fake frame that needs to be cropped as the width/height
+  // aren't divisible by 4 (see CreateEncoderStreams above).
+  rtc::Event frame_destroyed_event;
+  video_source_.IncomingCapturedFrame(CreateFakeNativeFrame(
+      2, &frame_destroyed_event, codec_width_ + 1, codec_height_ + 1));
+  ExpectDroppedFrame();
+  EXPECT_TRUE(frame_destroyed_event.Wait(kDefaultTimeoutMs));
+  video_stream_encoder_->Stop();
+}
+
 TEST_F(VideoStreamEncoderTest,
        ConfigureEncoderTriggersOnEncoderConfigurationChanged) {
   video_stream_encoder_->OnBitrateUpdated(
@@ -3397,36 +3512,6 @@
 }
 
 TEST_F(VideoStreamEncoderTest, AcceptsFullHdAdaptedDownSimulcastFrames) {
-  // Simulates simulcast behavior and makes highest stream resolutions divisible
-  // by 4.
-  class CroppingVideoStreamFactory
-      : public VideoEncoderConfig::VideoStreamFactoryInterface {
-   public:
-    explicit CroppingVideoStreamFactory(size_t num_temporal_layers,
-                                        int framerate)
-        : num_temporal_layers_(num_temporal_layers), framerate_(framerate) {
-      EXPECT_GT(num_temporal_layers, 0u);
-      EXPECT_GT(framerate, 0);
-    }
-
-   private:
-    std::vector<VideoStream> CreateEncoderStreams(
-        int width,
-        int height,
-        const VideoEncoderConfig& encoder_config) override {
-      std::vector<VideoStream> streams = test::CreateVideoStreams(
-          width - width % 4, height - height % 4, encoder_config);
-      for (VideoStream& stream : streams) {
-        stream.num_temporal_layers = num_temporal_layers_;
-        stream.max_framerate = framerate_;
-      }
-      return streams;
-    }
-
-    const size_t num_temporal_layers_;
-    const int framerate_;
-  };
-
   const int kFrameWidth = 1920;
   const int kFrameHeight = 1080;
   // 3/4 of 1920.