generatekeyframe: allow simulcast_encoder_adapter to request per-layer

in the case of separate encoders.

drive-by: add helper function to expect per-layer keyframes

BUG=chromium:1354101

Change-Id: Ib645a621add899f035bea319f035dcb0b2617510
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/281002
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Philipp Hancke <phancke@microsoft.com>
Cr-Commit-Position: refs/heads/main@{#38734}
diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc
index e7f6205..38e06b3 100644
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@@ -461,24 +461,14 @@
     }
   }
 
-  // All active streams should generate a key frame if
-  // a key frame is requested by any stream.
   bool is_keyframe_needed = false;
-  if (frame_types) {
-    for (const auto& frame_type : *frame_types) {
-      if (frame_type == VideoFrameType::kVideoFrameKey) {
-        is_keyframe_needed = true;
-        break;
-      }
-    }
-  }
-
-  if (!is_keyframe_needed) {
-    for (const auto& layer : stream_contexts_) {
-      if (layer.is_keyframe_needed()) {
-        is_keyframe_needed = true;
-        break;
-      }
+  for (const auto& layer : stream_contexts_) {
+    if (layer.is_keyframe_needed()) {
+      // This is legacy behavior, generating a keyframe on all layers
+      // when generating one for a layer that became active for the first time
+      // or after being disabled
+      is_keyframe_needed = true;
+      break;
     }
   }
 
@@ -501,17 +491,38 @@
     // frame types for all streams should be passed to the encoder unchanged.
     // Otherwise a single per-encoder frame type is passed.
     std::vector<VideoFrameType> stream_frame_types(
-        bypass_mode_ ? total_streams_count_ : 1);
+        bypass_mode_ ? total_streams_count_ : 1,
+        VideoFrameType::kVideoFrameDelta);
+    bool keyframe_requested = false;
     if (is_keyframe_needed) {
       std::fill(stream_frame_types.begin(), stream_frame_types.end(),
                 VideoFrameType::kVideoFrameKey);
-      layer.OnKeyframe(frame_timestamp);
-    } else {
-      if (layer.ShouldDropFrame(frame_timestamp)) {
-        continue;
+      keyframe_requested = true;
+    } else if (frame_types) {
+      if (bypass_mode_) {
+        // In bypass mode, requesting a key frame on any layer triggers a
+        // key frame request on all layers.
+        for (const auto& frame_type : *frame_types) {
+          if (frame_type == VideoFrameType::kVideoFrameKey) {
+            std::fill(stream_frame_types.begin(), stream_frame_types.end(),
+                      VideoFrameType::kVideoFrameKey);
+            keyframe_requested = true;
+            break;
+          }
+        }
+      } else {
+        size_t stream_idx = static_cast<size_t>(layer.stream_idx());
+        if (frame_types->size() >= stream_idx &&
+            (*frame_types)[stream_idx] == VideoFrameType::kVideoFrameKey) {
+          stream_frame_types[0] = VideoFrameType::kVideoFrameKey;
+          keyframe_requested = true;
+        }
       }
-      std::fill(stream_frame_types.begin(), stream_frame_types.end(),
-                VideoFrameType::kVideoFrameDelta);
+    }
+    if (keyframe_requested) {
+      layer.OnKeyframe(frame_timestamp);
+    } else if (layer.ShouldDropFrame(frame_timestamp)) {
+      continue;
     }
 
     // If scaling isn't required, because the input resolution
diff --git a/media/engine/simulcast_encoder_adapter_unittest.cc b/media/engine/simulcast_encoder_adapter_unittest.cc
index e0e3ea8..76ab152 100644
--- a/media/engine/simulcast_encoder_adapter_unittest.cc
+++ b/media/engine/simulcast_encoder_adapter_unittest.cc
@@ -1089,6 +1089,89 @@
   EXPECT_EQ(0, adapter_->Encode(input_frame, &frame_types));
 }
 
+TEST_F(TestSimulcastEncoderAdapterFake, GeneratesKeyFramesOnRequestedLayers) {
+  // Set up common settings for three streams.
+  SimulcastTestFixtureImpl::DefaultSettings(
+      &codec_, static_cast<const int*>(kTestTemporalLayerProfile),
+      kVideoCodecVP8);
+  rate_allocator_.reset(new SimulcastRateAllocator(codec_));
+  adapter_->RegisterEncodeCompleteCallback(this);
+
+  // Input data.
+  rtc::scoped_refptr<VideoFrameBuffer> buffer(I420Buffer::Create(1280, 720));
+
+  // Encode with three streams.
+  codec_.startBitrate = 3000;
+  EXPECT_EQ(0, adapter_->InitEncode(&codec_, kSettings));
+
+  std::vector<VideoFrameType> frame_types;
+  frame_types.resize(3, VideoFrameType::kVideoFrameKey);
+
+  std::vector<VideoFrameType> expected_keyframe(1,
+                                                VideoFrameType::kVideoFrameKey);
+  std::vector<VideoFrameType> expected_deltaframe(
+      1, VideoFrameType::kVideoFrameDelta);
+
+  std::vector<MockVideoEncoder*> original_encoders =
+      helper_->factory()->encoders();
+  ASSERT_EQ(3u, original_encoders.size());
+  EXPECT_CALL(*original_encoders[0],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_keyframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[1],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_keyframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[2],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_keyframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  VideoFrame first_frame = VideoFrame::Builder()
+                               .set_video_frame_buffer(buffer)
+                               .set_timestamp_rtp(0)
+                               .set_timestamp_ms(0)
+                               .build();
+  EXPECT_EQ(0, adapter_->Encode(first_frame, &frame_types));
+
+  // Request [key, delta, delta].
+  EXPECT_CALL(*original_encoders[0],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_keyframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[1],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_deltaframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[2],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_deltaframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  frame_types[1] = VideoFrameType::kVideoFrameKey;
+  frame_types[1] = VideoFrameType::kVideoFrameDelta;
+  frame_types[2] = VideoFrameType::kVideoFrameDelta;
+  VideoFrame second_frame = VideoFrame::Builder()
+                                .set_video_frame_buffer(buffer)
+                                .set_timestamp_rtp(10000)
+                                .set_timestamp_ms(100000)
+                                .build();
+  EXPECT_EQ(0, adapter_->Encode(second_frame, &frame_types));
+
+  // Request [delta, key, delta].
+  EXPECT_CALL(*original_encoders[0],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_deltaframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[1],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_keyframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  EXPECT_CALL(*original_encoders[2],
+              Encode(_, ::testing::Pointee(::testing::Eq(expected_deltaframe))))
+      .WillOnce(Return(WEBRTC_VIDEO_CODEC_OK));
+  frame_types[0] = VideoFrameType::kVideoFrameDelta;
+  frame_types[1] = VideoFrameType::kVideoFrameKey;
+  frame_types[2] = VideoFrameType::kVideoFrameDelta;
+  VideoFrame third_frame = VideoFrame::Builder()
+                               .set_video_frame_buffer(buffer)
+                               .set_timestamp_rtp(20000)
+                               .set_timestamp_ms(200000)
+                               .build();
+  EXPECT_EQ(0, adapter_->Encode(third_frame, &frame_types));
+}
+
 TEST_F(TestSimulcastEncoderAdapterFake, TestFailureReturnCodesFromEncodeCalls) {
   SimulcastTestFixtureImpl::DefaultSettings(
       &codec_, static_cast<const int*>(kTestTemporalLayerProfile),
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index 84cd2e1..1bb60f5 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -333,45 +333,34 @@
   EXPECT_EQ(0, encoder_->InitEncode(&settings_, kSettings));
 }
 
+void SimulcastTestFixtureImpl::ExpectStream(VideoFrameType frame_type,
+                                            int scaleResolutionDownBy) {
+  EXPECT_CALL(
+      encoder_callback_,
+      OnEncodedImage(AllOf(Field(&EncodedImage::_frameType, frame_type),
+                           Field(&EncodedImage::_encodedWidth,
+                                 kDefaultWidth / scaleResolutionDownBy),
+                           Field(&EncodedImage::_encodedHeight,
+                                 kDefaultHeight / scaleResolutionDownBy)),
+                     _))
+      .Times(1)
+      .WillRepeatedly(Return(
+          EncodedImageCallback::Result(EncodedImageCallback::Result::OK, 0)));
+}
+
 void SimulcastTestFixtureImpl::ExpectStreams(
     VideoFrameType frame_type,
     const std::vector<bool> expected_streams_active) {
   ASSERT_EQ(static_cast<int>(expected_streams_active.size()),
             kNumberOfSimulcastStreams);
   if (expected_streams_active[0]) {
-    EXPECT_CALL(
-        encoder_callback_,
-        OnEncodedImage(
-            AllOf(Field(&EncodedImage::_frameType, frame_type),
-                  Field(&EncodedImage::_encodedWidth, kDefaultWidth / 4),
-                  Field(&EncodedImage::_encodedHeight, kDefaultHeight / 4)),
-            _))
-        .Times(1)
-        .WillRepeatedly(Return(
-            EncodedImageCallback::Result(EncodedImageCallback::Result::OK, 0)));
+    ExpectStream(frame_type, 4);
   }
   if (expected_streams_active[1]) {
-    EXPECT_CALL(
-        encoder_callback_,
-        OnEncodedImage(
-            AllOf(Field(&EncodedImage::_frameType, frame_type),
-                  Field(&EncodedImage::_encodedWidth, kDefaultWidth / 2),
-                  Field(&EncodedImage::_encodedHeight, kDefaultHeight / 2)),
-            _))
-        .Times(1)
-        .WillRepeatedly(Return(
-            EncodedImageCallback::Result(EncodedImageCallback::Result::OK, 0)));
+    ExpectStream(frame_type, 2);
   }
   if (expected_streams_active[2]) {
-    EXPECT_CALL(encoder_callback_,
-                OnEncodedImage(
-                    AllOf(Field(&EncodedImage::_frameType, frame_type),
-                          Field(&EncodedImage::_encodedWidth, kDefaultWidth),
-                          Field(&EncodedImage::_encodedHeight, kDefaultHeight)),
-                    _))
-        .Times(1)
-        .WillRepeatedly(Return(
-            EncodedImageCallback::Result(EncodedImageCallback::Result::OK, 0)));
+    ExpectStream(frame_type, 1);
   }
 }
 
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.h b/modules/video_coding/utility/simulcast_test_fixture_impl.h
index cdfdc60..4e7bc97 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.h
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.h
@@ -66,6 +66,7 @@
   void SetRates(uint32_t bitrate_kbps, uint32_t fps);
   void RunActiveStreamsTest(std::vector<bool> active_streams);
   void UpdateActiveStreams(std::vector<bool> active_streams);
+  void ExpectStream(VideoFrameType frame_type, int scaleResolutionDownBy);
   void ExpectStreams(VideoFrameType frame_type,
                      std::vector<bool> expected_streams_active);
   void ExpectStreams(VideoFrameType frame_type, int expected_video_streams);