Always call aom_codec_encode for every spatial layer in the libaom AV1 encoder wrapper.

Bug: none
Change-Id: I8556c4ba14393b958f4012fe9942af5523aae356
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/236341
Reviewed-by: Marco Paniconi <marpan@google.com>
Reviewed-by: Jerome Jiang <jianj@google.com>
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#35280}
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 9c7cc89..5990772 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -616,20 +616,38 @@
   const uint32_t duration =
       kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
 
-  for (size_t i = 0; i < layer_frames.size(); ++i) {
-    ScalableVideoController::LayerFrameConfig& layer_frame = layer_frames[i];
-    const bool end_of_picture = i == layer_frames.size() - 1;
+  const size_t num_spatial_layers =
+      svc_params_ ? svc_params_->number_spatial_layers : 1;
+  size_t next_layer_frame_index = 0;
+  for (size_t i = 0; i < num_spatial_layers; ++i) {
+    // The libaom AV1 encoder requires that `aom_codec_encode` is called for
+    // every spatial layer, even if the configured bitrate for that layer is
+    // zero. For zero bitrate spatial layers no frames will be produced.
+    absl::optional<ScalableVideoController::LayerFrameConfig>
+        non_encoded_layer_frame;
+    ScalableVideoController::LayerFrameConfig* layer_frame;
+    if (next_layer_frame_index < layer_frames.size() &&
+        layer_frames[next_layer_frame_index].SpatialId() ==
+            static_cast<int>(i)) {
+      layer_frame = &layer_frames[next_layer_frame_index];
+      next_layer_frame_index++;
+    } else {
+      // For layers that are not encoded only the spatial id matters.
+      non_encoded_layer_frame.emplace().S(i);
+      layer_frame = &*non_encoded_layer_frame;
+    }
+    const bool end_of_picture = (next_layer_frame_index == layer_frames.size());
 
     aom_enc_frame_flags_t flags =
-        layer_frame.IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
+        layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
 
     if (SvcEnabled()) {
-      SetSvcLayerId(layer_frame);
-      SetSvcRefFrameConfig(layer_frame);
+      SetSvcLayerId(*layer_frame);
+      SetSvcRefFrameConfig(*layer_frame);
 
       aom_codec_err_t ret =
           aom_codec_control(&ctx_, AV1E_SET_ERROR_RESILIENT_MODE,
-                            layer_frame.TemporalId() > 0 ? 1 : 0);
+                            layer_frame->TemporalId() > 0 ? 1 : 0);
       if (ret != AOM_CODEC_OK) {
         RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
                             << " on control AV1E_SET_ERROR_RESILIENT_MODE.";
@@ -646,6 +664,10 @@
       return WEBRTC_VIDEO_CODEC_ERROR;
     }
 
+    if (non_encoded_layer_frame) {
+      continue;
+    }
+
     // Get encoded image data.
     EncodedImage encoded_image;
     aom_codec_iter_t iter = nullptr;
@@ -663,9 +685,10 @@
             /*size=*/pkt->data.frame.sz));
 
         if ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0) {
-          layer_frame.Keyframe();
+          layer_frame->Keyframe();
         }
-        encoded_image._frameType = layer_frame.IsKeyframe()
+
+        encoded_image._frameType = layer_frame->IsKeyframe()
                                        ? VideoFrameType::kVideoFrameKey
                                        : VideoFrameType::kVideoFrameDelta;
         encoded_image.SetTimestamp(frame.timestamp());
@@ -675,8 +698,8 @@
         // If encoded image width/height info are added to aom_codec_cx_pkt_t,
         // use those values in lieu of the values in frame.
         if (svc_params_) {
-          int n = svc_params_->scaling_factor_num[layer_frame.SpatialId()];
-          int d = svc_params_->scaling_factor_den[layer_frame.SpatialId()];
+          int n = svc_params_->scaling_factor_num[layer_frame->SpatialId()];
+          int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()];
           encoded_image._encodedWidth = cfg_.g_w * n / d;
           encoded_image._encodedHeight = cfg_.g_h * n / d;
         } else {
@@ -702,9 +725,9 @@
       CodecSpecificInfo codec_specific_info;
       codec_specific_info.codecType = kVideoCodecAV1;
       codec_specific_info.end_of_picture = end_of_picture;
-      bool is_keyframe = layer_frame.IsKeyframe();
+      bool is_keyframe = layer_frame->IsKeyframe();
       codec_specific_info.generic_frame_info =
-          svc_controller_->OnEncodeDone(std::move(layer_frame));
+          svc_controller_->OnEncodeDone(std::move(*layer_frame));
       if (is_keyframe && codec_specific_info.generic_frame_info) {
         codec_specific_info.template_structure =
             svc_controller_->DependencyStructure();
@@ -747,6 +770,7 @@
 
   svc_controller_->OnRatesUpdated(parameters.bitrate);
   cfg_.rc_target_bitrate = parameters.bitrate.get_sum_kbps();
+  aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
 
   if (SvcEnabled()) {
     for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
@@ -769,9 +793,6 @@
   // Set frame rate to closest integer value.
   encoder_settings_.maxFramerate =
       static_cast<uint32_t>(parameters.framerate_fps + 0.5);
-
-  // Update encoder context.
-  aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
   if (error_code != AOM_CODEC_OK) {
     RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: "
                         << error_code;
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
index 6e92e50..0c67e4d 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
@@ -25,6 +25,7 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::Eq;
 using ::testing::Field;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
@@ -85,6 +86,51 @@
             0b01);
 }
 
+TEST(LibaomAv1EncoderTest,
+     SpatialScalabilityInTemporalUnitReportedAsDeltaFrame) {
+  std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder();
+  VideoCodec codec_settings = DefaultCodecSettings();
+  codec_settings.SetScalabilityMode("L2T1");
+  ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()),
+            WEBRTC_VIDEO_CODEC_OK);
+
+  VideoEncoder::RateControlParameters rate_parameters;
+  rate_parameters.framerate_fps = 30;
+  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, 0, 300'000);
+  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, 0, 300'000);
+  encoder->SetRates(rate_parameters);
+
+  std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames =
+      EncodedVideoFrameProducer(*encoder).SetNumInputFrames(1).Encode();
+  ASSERT_THAT(encoded_frames, SizeIs(2));
+  EXPECT_THAT(encoded_frames[0].encoded_image._frameType,
+              Eq(VideoFrameType::kVideoFrameKey));
+  EXPECT_THAT(encoded_frames[1].encoded_image._frameType,
+              Eq(VideoFrameType::kVideoFrameDelta));
+}
+
+TEST(LibaomAv1EncoderTest, NoBitrateOnTopSpatialLayerProduceDeltaFrames) {
+  std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder();
+  VideoCodec codec_settings = DefaultCodecSettings();
+  codec_settings.SetScalabilityMode("L2T1");
+  ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()),
+            WEBRTC_VIDEO_CODEC_OK);
+
+  VideoEncoder::RateControlParameters rate_parameters;
+  rate_parameters.framerate_fps = 30;
+  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, 0, 300'000);
+  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, 0, 0);
+  encoder->SetRates(rate_parameters);
+
+  std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames =
+      EncodedVideoFrameProducer(*encoder).SetNumInputFrames(2).Encode();
+  ASSERT_THAT(encoded_frames, SizeIs(2));
+  EXPECT_THAT(encoded_frames[0].encoded_image._frameType,
+              Eq(VideoFrameType::kVideoFrameKey));
+  EXPECT_THAT(encoded_frames[1].encoded_image._frameType,
+              Eq(VideoFrameType::kVideoFrameDelta));
+}
+
 TEST(LibaomAv1EncoderTest, SetsEndOfPictureForLastFrameInTemporalUnit) {
   VideoBitrateAllocation allocation;
   allocation.SetBitrate(0, 0, 30000);