test/fuzzers/vp9_encoder_references_fuzzer.cc - src - Git at Google

 /*
  *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include <stdint.h>

 #include "absl/algorithm/container.h"
 #include "absl/base/macros.h"
 #include "absl/container/inlined_vector.h"
 #include "api/array_view.h"
 #include "api/transport/webrtc_key_value_config.h"
 #include "api/video/video_frame.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_encoder.h"
 #include "modules/video_coding/codecs/interface/mock_libvpx_interface.h"
 #include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
 #include "modules/video_coding/frame_dependencies_calculator.h"
 #include "rtc_base/numerics/safe_compare.h"
 #include "test/fuzzers/fuzz_data_helper.h"
 #include "test/gmock.h"

 // Fuzzer simulates various svc configurations and libvpx encoder dropping
 // layer frames.
 // Validates vp9 encoder wrapper produces consistent frame references.
 namespace webrtc {
 namespace {

 using test::FuzzDataHelper;
 using ::testing::NiceMock;

 constexpr int kBitrateEnabledBps = 100'000;

 class FrameValidator : public EncodedImageCallback {
  public:
   ~FrameValidator() override = default;

   Result OnEncodedImage(const EncodedImage& encoded_image,
                         const CodecSpecificInfo* codec_specific_info) override {
     RTC_CHECK(codec_specific_info);
     RTC_CHECK_EQ(codec_specific_info->codecType, kVideoCodecVP9);
     if (codec_specific_info->codecSpecific.VP9.first_frame_in_picture) {
       ++picture_id_;
     }
     int64_t frame_id = frame_id_++;
     LayerFrame& layer_frame = frames_[frame_id % kMaxFrameHistorySize];
     layer_frame.picture_id = picture_id_;
     layer_frame.spatial_id = encoded_image.SpatialIndex().value_or(0);
     layer_frame.frame_id = frame_id;
     layer_frame.temporal_id =
         codec_specific_info->codecSpecific.VP9.temporal_idx;
     if (layer_frame.temporal_id == kNoTemporalIdx) {
       layer_frame.temporal_id = 0;
     }
     layer_frame.vp9_non_ref_for_inter_layer_pred =
         codec_specific_info->codecSpecific.VP9.non_ref_for_inter_layer_pred;
     CheckVp9References(layer_frame, codec_specific_info->codecSpecific.VP9);

     if (codec_specific_info->generic_frame_info.has_value()) {
       absl::InlinedVector<int64_t, 5> frame_dependencies =
           dependencies_calculator_.FromBuffersUsage(
               frame_id,
               codec_specific_info->generic_frame_info->encoder_buffers);

       CheckGenericReferences(frame_dependencies,
                              *codec_specific_info->generic_frame_info);
       CheckGenericAndCodecSpecificReferencesAreConsistent(
           frame_dependencies, *codec_specific_info, layer_frame);
     }

     return Result(Result::OK);
   }

  private:
   // With 4 spatial layers and patterns up to 8 pictures, it should be enough to
   // keep the last 32 frames to validate dependencies.
   static constexpr size_t kMaxFrameHistorySize = 32;
   struct LayerFrame {
     int64_t frame_id;
     int64_t picture_id;
     int spatial_id;
     int temporal_id;
     bool vp9_non_ref_for_inter_layer_pred;
   };

   void CheckVp9References(const LayerFrame& layer_frame,
                           const CodecSpecificInfoVP9& vp9_info) {
     if (layer_frame.frame_id == 0) {
       RTC_CHECK(!vp9_info.inter_layer_predicted);
     } else {
       const LayerFrame& previous_frame = Frame(layer_frame.frame_id - 1);
       if (vp9_info.inter_layer_predicted) {
         RTC_CHECK(!previous_frame.vp9_non_ref_for_inter_layer_pred);
         RTC_CHECK_EQ(layer_frame.picture_id, previous_frame.picture_id);
       }
       if (previous_frame.picture_id == layer_frame.picture_id) {
         RTC_CHECK_GT(layer_frame.spatial_id, previous_frame.spatial_id);
         // The check below would fail for temporal shift structures. Remove it
         // or move it to !flexible_mode section when vp9 encoder starts
         // supporting such structures.
         RTC_CHECK_EQ(layer_frame.temporal_id, previous_frame.temporal_id);
       }
     }
     if (!vp9_info.flexible_mode) {
       if (vp9_info.gof.num_frames_in_gof > 0) {
         gof_.CopyGofInfoVP9(vp9_info.gof);
       }
       RTC_CHECK_EQ(gof_.temporal_idx[vp9_info.gof_idx],
                    layer_frame.temporal_id);
     }
   }

   void CheckGenericReferences(rtc::ArrayView<const int64_t> frame_dependencies,
                               const GenericFrameInfo& generic_info) const {
     for (int64_t dependency_frame_id : frame_dependencies) {
       RTC_CHECK_GE(dependency_frame_id, 0);
       const LayerFrame& dependency = Frame(dependency_frame_id);
       RTC_CHECK_GE(generic_info.spatial_id, dependency.spatial_id);
       RTC_CHECK_GE(generic_info.temporal_id, dependency.temporal_id);
     }
   }

   void CheckGenericAndCodecSpecificReferencesAreConsistent(
       rtc::ArrayView<const int64_t> frame_dependencies,
       const CodecSpecificInfo& info,
       const LayerFrame& layer_frame) const {
     const CodecSpecificInfoVP9& vp9_info = info.codecSpecific.VP9;
     const GenericFrameInfo& generic_info = *info.generic_frame_info;

     RTC_CHECK_EQ(generic_info.spatial_id, layer_frame.spatial_id);
     RTC_CHECK_EQ(generic_info.temporal_id, layer_frame.temporal_id);
     auto picture_id_diffs =
         rtc::MakeArrayView(vp9_info.p_diff, vp9_info.num_ref_pics);
     RTC_CHECK_EQ(
         frame_dependencies.size(),
         picture_id_diffs.size() + (vp9_info.inter_layer_predicted ? 1 : 0));
     for (int64_t dependency_frame_id : frame_dependencies) {
       RTC_CHECK_GE(dependency_frame_id, 0);
       const LayerFrame& dependency = Frame(dependency_frame_id);
       if (dependency.spatial_id != layer_frame.spatial_id) {
         RTC_CHECK(vp9_info.inter_layer_predicted);
         RTC_CHECK_EQ(layer_frame.picture_id, dependency.picture_id);
         RTC_CHECK_GT(layer_frame.spatial_id, dependency.spatial_id);
       } else {
         RTC_CHECK(vp9_info.inter_pic_predicted);
         RTC_CHECK_EQ(layer_frame.spatial_id, dependency.spatial_id);
         RTC_CHECK(absl::c_linear_search(
             picture_id_diffs, layer_frame.picture_id - dependency.picture_id));
       }
     }
   }

   const LayerFrame& Frame(int64_t frame_id) const {
     auto& frame = frames_[frame_id % kMaxFrameHistorySize];
     RTC_CHECK_EQ(frame.frame_id, frame_id);
     return frame;
   }

   GofInfoVP9 gof_;
   int64_t frame_id_ = 0;
   int64_t picture_id_ = 1;
   FrameDependenciesCalculator dependencies_calculator_;
   LayerFrame frames_[kMaxFrameHistorySize];
 };

 class FieldTrials : public WebRtcKeyValueConfig {
  public:
   explicit FieldTrials(FuzzDataHelper& config)
       : flags_(config.ReadOrDefaultValue<uint8_t>(0)) {}

   ~FieldTrials() override = default;
   std::string Lookup(absl::string_view key) const override {
     static constexpr absl::string_view kBinaryFieldTrials[] = {
         "WebRTC-Vp9DependencyDescriptor",
         "WebRTC-Vp9ExternalRefCtrl",
         "WebRTC-Vp9IssueKeyFrameOnLayerDeactivation",
     };
     for (size_t i = 0; i < ABSL_ARRAYSIZE(kBinaryFieldTrials); ++i) {
       if (key == kBinaryFieldTrials[i]) {
         return (flags_ & (1u << i)) ? "Enabled" : "Disabled";
       }
     }

     // Ignore following field trials.
     if (key == "WebRTC-CongestionWindow" ||
         key == "WebRTC-UseBaseHeavyVP8TL3RateAllocation" ||
         key == "WebRTC-SimulcastUpswitchHysteresisPercent" ||
         key == "WebRTC-SimulcastScreenshareUpswitchHysteresisPercent" ||
         key == "WebRTC-VideoRateControl" ||
         key == "WebRTC-VP9-PerformanceFlags" ||
         key == "WebRTC-VP9VariableFramerateScreenshare" ||
         key == "WebRTC-VP9QualityScaler") {
       return "";
     }
     // Crash when using unexpected field trial to decide if it should be fuzzed
     // or have a constant value.
     RTC_CHECK(false) << "Unfuzzed field trial " << key << "\n";
   }

  private:
   const uint8_t flags_;
 };

 VideoCodec CodecSettings(FuzzDataHelper& rng) {
   uint16_t config = rng.ReadOrDefaultValue<uint16_t>(0);
   // Test up to to 4 spatial and 4 temporal layers.
   int num_spatial_layers = 1 + (config & 0b11);
   int num_temporal_layers = 1 + ((config >> 2) & 0b11);

   VideoCodec codec_settings = {};
   codec_settings.codecType = kVideoCodecVP9;
   codec_settings.maxFramerate = 30;
   codec_settings.width = 320 << (num_spatial_layers - 1);
   codec_settings.height = 180 << (num_spatial_layers - 1);
   if (num_spatial_layers > 1) {
     for (int sid = 0; sid < num_spatial_layers; ++sid) {
       SpatialLayer& spatial_layer = codec_settings.spatialLayers[sid];
       codec_settings.width = 320 << sid;
       codec_settings.height = 180 << sid;
       spatial_layer.width = codec_settings.width;
       spatial_layer.height = codec_settings.height;
       spatial_layer.targetBitrate = kBitrateEnabledBps * num_temporal_layers;
       spatial_layer.maxFramerate = codec_settings.maxFramerate;
       spatial_layer.numberOfTemporalLayers = num_temporal_layers;
     }
   }
   codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers;
   codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers;
   int inter_layer_pred = (config >> 4) & 0b11;
   // There are only 3 valid values.
   codec_settings.VP9()->interLayerPred = static_cast<InterLayerPredMode>(
       inter_layer_pred < 3 ? inter_layer_pred : 0);
   codec_settings.VP9()->flexibleMode = (config & (1u << 6)) != 0;
   codec_settings.VP9()->frameDroppingOn = (config & (1u << 7)) != 0;
   codec_settings.mode = VideoCodecMode::kRealtimeVideo;
   return codec_settings;
 }

 VideoEncoder::Settings EncoderSettings() {
   return VideoEncoder::Settings(VideoEncoder::Capabilities(false),
                                 /*number_of_cores=*/1,
                                 /*max_payload_size=*/0);
 }

 bool IsSupported(int num_spatial_layers,
                  int num_temporal_layers,
                  const VideoBitrateAllocation& allocation) {
   // VP9 encoder doesn't support certain configurations.
   // BitrateAllocator shouldn't produce them.
   if (allocation.get_sum_bps() == 0) {
     // Ignore allocation that turns off all the layers.
     // In such a case it is up to upper layer code not to call Encode.
     return false;
   }

   for (int tid = 0; tid < num_temporal_layers; ++tid) {
     int min_enabled_spatial_id = -1;
     int max_enabled_spatial_id = -1;
     int num_enabled_spatial_layers = 0;
     for (int sid = 0; sid < num_spatial_layers; ++sid) {
       if (allocation.GetBitrate(sid, tid) > 0) {
         if (min_enabled_spatial_id == -1) {
           min_enabled_spatial_id = sid;
         }
         max_enabled_spatial_id = sid;
         ++num_enabled_spatial_layers;
       }
     }
     if (num_enabled_spatial_layers == 0) {
       // Each temporal layer should be enabled because skipping a full frame is
       // not supported in non-flexible mode.
       return false;
     }
     if (max_enabled_spatial_id - min_enabled_spatial_id + 1 !=
         num_enabled_spatial_layers) {
       // To avoid odd spatial dependencies, there should be no gaps in active
       // spatial layers.
       return false;
     }
   }

   return true;
 }

 struct LibvpxState {
   LibvpxState() {
     pkt.kind = VPX_CODEC_CX_FRAME_PKT;
     pkt.data.frame.buf = pkt_buffer;
     pkt.data.frame.sz = ABSL_ARRAYSIZE(pkt_buffer);
     layer_id.spatial_layer_id = -1;
   }

   uint8_t pkt_buffer[1000] = {};
   vpx_codec_enc_cfg_t config = {};
   vpx_codec_priv_output_cx_pkt_cb_pair_t callback = {};
   vpx_image_t img = {};
   vpx_svc_ref_frame_config_t ref_config = {};
   vpx_svc_layer_id_t layer_id = {};
   vpx_svc_frame_drop_t frame_drop = {};
   vpx_codec_cx_pkt pkt = {};
 };

 class StubLibvpx : public NiceMock<MockLibvpxInterface> {
  public:
   explicit StubLibvpx(LibvpxState* state) : state_(state) { RTC_CHECK(state_); }

   vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface,
                                            vpx_codec_enc_cfg_t* cfg,
                                            unsigned int usage) const override {
     state_->config = *cfg;
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx,
                                  vpx_codec_iface_t* iface,
                                  const vpx_codec_enc_cfg_t* cfg,
                                  vpx_codec_flags_t flags) const override {
     RTC_CHECK(ctx);
     ctx->err = VPX_CODEC_OK;
     return VPX_CODEC_OK;
   }

   vpx_image_t* img_wrap(vpx_image_t* img,
                         vpx_img_fmt_t fmt,
                         unsigned int d_w,
                         unsigned int d_h,
                         unsigned int stride_align,
                         unsigned char* img_data) const override {
     state_->img.fmt = fmt;
     state_->img.d_w = d_w;
     state_->img.d_h = d_h;
     return &state_->img;
   }

   vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx,
                                const vpx_image_t* img,
                                vpx_codec_pts_t pts,
                                uint64_t duration,
                                vpx_enc_frame_flags_t flags,
                                uint64_t deadline) const override {
     if (flags & VPX_EFLAG_FORCE_KF) {
       state_->pkt.data.frame.flags = VPX_FRAME_IS_KEY;
     } else {
       state_->pkt.data.frame.flags = 0;
     }
     state_->pkt.data.frame.duration = duration;
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
                                 vp8e_enc_control_id ctrl_id,
                                 void* param) const override {
     if (ctrl_id == VP9E_REGISTER_CX_CALLBACK) {
       state_->callback =
           *reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t*>(param);
     }
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_control(
       vpx_codec_ctx_t* ctx,
       vp8e_enc_control_id ctrl_id,
       vpx_svc_ref_frame_config_t* param) const override {
     switch (ctrl_id) {
       case VP9E_SET_SVC_REF_FRAME_CONFIG:
         state_->ref_config = *param;
         break;
       case VP9E_GET_SVC_REF_FRAME_CONFIG:
         *param = state_->ref_config;
         break;
       default:
         break;
     }
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
                                 vp8e_enc_control_id ctrl_id,
                                 vpx_svc_layer_id_t* param) const override {
     switch (ctrl_id) {
       case VP9E_SET_SVC_LAYER_ID:
         state_->layer_id = *param;
         break;
       case VP9E_GET_SVC_LAYER_ID:
         *param = state_->layer_id;
         break;
       default:
         break;
     }
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
                                 vp8e_enc_control_id ctrl_id,
                                 vpx_svc_frame_drop_t* param) const override {
     if (ctrl_id == VP9E_SET_SVC_FRAME_DROP_LAYER) {
       state_->frame_drop = *param;
     }
     return VPX_CODEC_OK;
   }

   vpx_codec_err_t codec_enc_config_set(
       vpx_codec_ctx_t* ctx,
       const vpx_codec_enc_cfg_t* cfg) const override {
     state_->config = *cfg;
     return VPX_CODEC_OK;
   }

  private:
   LibvpxState* const state_;
 };

 enum Actions {
   kEncode,
   kSetRates,
 };

 // When a layer frame is marked for drop, drops all layer frames from that
 // pictures with larger spatial ids.
 constexpr bool DropAbove(uint8_t layers_mask, int sid) {
   uint8_t full_mask = (uint8_t{1} << (sid + 1)) - 1;
   return (layers_mask & full_mask) != full_mask;
 }
 // inline unittests
 static_assert(DropAbove(0b1011, /*sid=*/0) == false, "");
 static_assert(DropAbove(0b1011, /*sid=*/1) == false, "");
 static_assert(DropAbove(0b1011, /*sid=*/2) == true, "");
 static_assert(DropAbove(0b1011, /*sid=*/3) == true, "");

 // When a layer frame is marked for drop, drops all layer frames from that
 // pictures with smaller spatial ids.
 constexpr bool DropBelow(uint8_t layers_mask, int sid, int num_layers) {
   return (layers_mask >> sid) != (1 << (num_layers - sid)) - 1;
 }
 // inline unittests
 static_assert(DropBelow(0b1101, /*sid=*/0, 4) == true, "");
 static_assert(DropBelow(0b1101, /*sid=*/1, 4) == true, "");
 static_assert(DropBelow(0b1101, /*sid=*/2, 4) == false, "");
 static_assert(DropBelow(0b1101, /*sid=*/3, 4) == false, "");

 }  // namespace

 void FuzzOneInput(const uint8_t* data, size_t size) {
   FuzzDataHelper helper(rtc::MakeArrayView(data, size));

   FrameValidator validator;
   FieldTrials field_trials(helper);
   // Setup call callbacks for the fake
   LibvpxState state;

   // Initialize encoder
   LibvpxVp9Encoder encoder(cricket::VideoCodec(),
                            std::make_unique<StubLibvpx>(&state), field_trials);
   VideoCodec codec = CodecSettings(helper);
   if (encoder.InitEncode(&codec, EncoderSettings()) != WEBRTC_VIDEO_CODEC_OK) {
     return;
   }
   RTC_CHECK_EQ(encoder.RegisterEncodeCompleteCallback(&validator),
                WEBRTC_VIDEO_CODEC_OK);
   {
     // Enable all the layers initially. Encoder doesn't support producing
     // frames when no layers are enabled.
     LibvpxVp9Encoder::RateControlParameters parameters;
     parameters.framerate_fps = 30.0;
     for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) {
       for (int tid = 0; tid < codec.VP9()->numberOfTemporalLayers; ++tid) {
         parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps);
       }
     }
     encoder.SetRates(parameters);
   }

   std::vector<VideoFrameType> frame_types(1);
   VideoFrame fake_image = VideoFrame::Builder()
                               .set_video_frame_buffer(I420Buffer::Create(
                                   int{codec.width}, int{codec.height}))
                               .build();

   // Start producing frames at random.
   while (helper.CanReadBytes(1)) {
     uint8_t action = helper.Read<uint8_t>();
     switch (action & 0b11) {
       case kEncode: {
         // bitmask of the action: SSSS-K00, where
         // four S bit indicate which spatial layers should be produced,
         // K bit indicates if frame should be a key frame.
         frame_types[0] = (action & 0b100) ? VideoFrameType::kVideoFrameKey
                                           : VideoFrameType::kVideoFrameDelta;
         encoder.Encode(fake_image, &frame_types);
         uint8_t encode_spatial_layers = (action >> 4);
         for (size_t sid = 0; sid < state.config.ss_number_layers; ++sid) {
           if (state.config.ss_target_bitrate[sid] == 0) {
             // Don't encode disabled spatial layers.
             continue;
           }
           bool drop = true;
           switch (state.frame_drop.framedrop_mode) {
             case FULL_SUPERFRAME_DROP:
               drop = encode_spatial_layers == 0;
               break;
             case LAYER_DROP:
               drop = (encode_spatial_layers & (1 << sid)) == 0;
               break;
             case CONSTRAINED_LAYER_DROP:
               drop = DropBelow(encode_spatial_layers, sid,
                                state.config.ss_number_layers);
               break;
             case CONSTRAINED_FROM_ABOVE_DROP:
               drop = DropAbove(encode_spatial_layers, sid);
               break;
           }
           if (!drop) {
             state.layer_id.spatial_layer_id = sid;
             state.callback.output_cx_pkt(&state.pkt, state.callback.user_priv);
           }
         }
       } break;
       case kSetRates: {
         // bitmask of the action: (S2)(S1)(S0)01,
         // where Sx is number of temporal layers to enable for spatial layer x
         // In pariculat Sx = 0 indicates spatial layer x should be disabled.
         LibvpxVp9Encoder::RateControlParameters parameters;
         parameters.framerate_fps = 30.0;
         for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) {
           int temporal_layers = (action >> ((1 + sid) * 2)) & 0b11;
           for (int tid = 0; tid < temporal_layers; ++tid) {
             parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps);
           }
         }
         if (IsSupported(codec.VP9()->numberOfSpatialLayers,
                         codec.VP9()->numberOfTemporalLayers,
                         parameters.bitrate)) {
           encoder.SetRates(parameters);
         }
       } break;
       default:
         // Unspecificed values are noop.
         break;
     }
   }
 }
 }  // namespace webrtc
	/*
	* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include <stdint.h>

	#include "absl/algorithm/container.h"
	#include "absl/base/macros.h"
	#include "absl/container/inlined_vector.h"
	#include "api/array_view.h"
	#include "api/transport/webrtc_key_value_config.h"
	#include "api/video/video_frame.h"
	#include "api/video_codecs/video_codec.h"
	#include "api/video_codecs/video_encoder.h"
	#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h"
	#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
	#include "modules/video_coding/frame_dependencies_calculator.h"
	#include "rtc_base/numerics/safe_compare.h"
	#include "test/fuzzers/fuzz_data_helper.h"
	#include "test/gmock.h"

	// Fuzzer simulates various svc configurations and libvpx encoder dropping
	// layer frames.
	// Validates vp9 encoder wrapper produces consistent frame references.
	namespace webrtc {
	namespace {

	using test::FuzzDataHelper;
	using ::testing::NiceMock;

	constexpr int kBitrateEnabledBps = 100'000;

	class FrameValidator : public EncodedImageCallback {
	public:
	~FrameValidator() override = default;

	Result OnEncodedImage(const EncodedImage& encoded_image,
	const CodecSpecificInfo* codec_specific_info) override {
	RTC_CHECK(codec_specific_info);
	RTC_CHECK_EQ(codec_specific_info->codecType, kVideoCodecVP9);
	if (codec_specific_info->codecSpecific.VP9.first_frame_in_picture) {
	++picture_id_;
	}
	int64_t frame_id = frame_id_++;
	LayerFrame& layer_frame = frames_[frame_id % kMaxFrameHistorySize];
	layer_frame.picture_id = picture_id_;
	layer_frame.spatial_id = encoded_image.SpatialIndex().value_or(0);
	layer_frame.frame_id = frame_id;
	layer_frame.temporal_id =
	codec_specific_info->codecSpecific.VP9.temporal_idx;
	if (layer_frame.temporal_id == kNoTemporalIdx) {
	layer_frame.temporal_id = 0;
	}
	layer_frame.vp9_non_ref_for_inter_layer_pred =
	codec_specific_info->codecSpecific.VP9.non_ref_for_inter_layer_pred;
	CheckVp9References(layer_frame, codec_specific_info->codecSpecific.VP9);

	if (codec_specific_info->generic_frame_info.has_value()) {
	absl::InlinedVector<int64_t, 5> frame_dependencies =
	dependencies_calculator_.FromBuffersUsage(
	frame_id,
	codec_specific_info->generic_frame_info->encoder_buffers);

	CheckGenericReferences(frame_dependencies,
	*codec_specific_info->generic_frame_info);
	CheckGenericAndCodecSpecificReferencesAreConsistent(
	frame_dependencies, *codec_specific_info, layer_frame);
	}

	return Result(Result::OK);
	}

	private:
	// With 4 spatial layers and patterns up to 8 pictures, it should be enough to
	// keep the last 32 frames to validate dependencies.
	static constexpr size_t kMaxFrameHistorySize = 32;
	struct LayerFrame {
	int64_t frame_id;
	int64_t picture_id;
	int spatial_id;
	int temporal_id;
	bool vp9_non_ref_for_inter_layer_pred;
	};

	void CheckVp9References(const LayerFrame& layer_frame,
	const CodecSpecificInfoVP9& vp9_info) {
	if (layer_frame.frame_id == 0) {
	RTC_CHECK(!vp9_info.inter_layer_predicted);
	} else {
	const LayerFrame& previous_frame = Frame(layer_frame.frame_id - 1);
	if (vp9_info.inter_layer_predicted) {
	RTC_CHECK(!previous_frame.vp9_non_ref_for_inter_layer_pred);
	RTC_CHECK_EQ(layer_frame.picture_id, previous_frame.picture_id);
	}
	if (previous_frame.picture_id == layer_frame.picture_id) {
	RTC_CHECK_GT(layer_frame.spatial_id, previous_frame.spatial_id);
	// The check below would fail for temporal shift structures. Remove it
	// or move it to !flexible_mode section when vp9 encoder starts
	// supporting such structures.
	RTC_CHECK_EQ(layer_frame.temporal_id, previous_frame.temporal_id);
	}
	}
	if (!vp9_info.flexible_mode) {
	if (vp9_info.gof.num_frames_in_gof > 0) {
	gof_.CopyGofInfoVP9(vp9_info.gof);
	}
	RTC_CHECK_EQ(gof_.temporal_idx[vp9_info.gof_idx],
	layer_frame.temporal_id);
	}
	}

	void CheckGenericReferences(rtc::ArrayView<const int64_t> frame_dependencies,
	const GenericFrameInfo& generic_info) const {
	for (int64_t dependency_frame_id : frame_dependencies) {
	RTC_CHECK_GE(dependency_frame_id, 0);
	const LayerFrame& dependency = Frame(dependency_frame_id);
	RTC_CHECK_GE(generic_info.spatial_id, dependency.spatial_id);
	RTC_CHECK_GE(generic_info.temporal_id, dependency.temporal_id);
	}
	}

	void CheckGenericAndCodecSpecificReferencesAreConsistent(
	rtc::ArrayView<const int64_t> frame_dependencies,
	const CodecSpecificInfo& info,
	const LayerFrame& layer_frame) const {
	const CodecSpecificInfoVP9& vp9_info = info.codecSpecific.VP9;
	const GenericFrameInfo& generic_info = *info.generic_frame_info;

	RTC_CHECK_EQ(generic_info.spatial_id, layer_frame.spatial_id);
	RTC_CHECK_EQ(generic_info.temporal_id, layer_frame.temporal_id);
	auto picture_id_diffs =
	rtc::MakeArrayView(vp9_info.p_diff, vp9_info.num_ref_pics);
	RTC_CHECK_EQ(
	frame_dependencies.size(),
	picture_id_diffs.size() + (vp9_info.inter_layer_predicted ? 1 : 0));
	for (int64_t dependency_frame_id : frame_dependencies) {
	RTC_CHECK_GE(dependency_frame_id, 0);
	const LayerFrame& dependency = Frame(dependency_frame_id);
	if (dependency.spatial_id != layer_frame.spatial_id) {
	RTC_CHECK(vp9_info.inter_layer_predicted);
	RTC_CHECK_EQ(layer_frame.picture_id, dependency.picture_id);
	RTC_CHECK_GT(layer_frame.spatial_id, dependency.spatial_id);
	} else {
	RTC_CHECK(vp9_info.inter_pic_predicted);
	RTC_CHECK_EQ(layer_frame.spatial_id, dependency.spatial_id);
	RTC_CHECK(absl::c_linear_search(
	picture_id_diffs, layer_frame.picture_id - dependency.picture_id));
	}
	}
	}

	const LayerFrame& Frame(int64_t frame_id) const {
	auto& frame = frames_[frame_id % kMaxFrameHistorySize];
	RTC_CHECK_EQ(frame.frame_id, frame_id);
	return frame;
	}

	GofInfoVP9 gof_;
	int64_t frame_id_ = 0;
	int64_t picture_id_ = 1;
	FrameDependenciesCalculator dependencies_calculator_;
	LayerFrame frames_[kMaxFrameHistorySize];
	};

	class FieldTrials : public WebRtcKeyValueConfig {
	public:
	explicit FieldTrials(FuzzDataHelper& config)
	: flags_(config.ReadOrDefaultValue<uint8_t>(0)) {}

	~FieldTrials() override = default;
	std::string Lookup(absl::string_view key) const override {
	static constexpr absl::string_view kBinaryFieldTrials[] = {
	"WebRTC-Vp9DependencyDescriptor",
	"WebRTC-Vp9ExternalRefCtrl",
	"WebRTC-Vp9IssueKeyFrameOnLayerDeactivation",
	};
	for (size_t i = 0; i < ABSL_ARRAYSIZE(kBinaryFieldTrials); ++i) {
	if (key == kBinaryFieldTrials[i]) {
	return (flags_ & (1u << i)) ? "Enabled" : "Disabled";
	}
	}

	// Ignore following field trials.
	if (key == "WebRTC-CongestionWindow" \|\|
	key == "WebRTC-UseBaseHeavyVP8TL3RateAllocation" \|\|
	key == "WebRTC-SimulcastUpswitchHysteresisPercent" \|\|
	key == "WebRTC-SimulcastScreenshareUpswitchHysteresisPercent" \|\|
	key == "WebRTC-VideoRateControl" \|\|
	key == "WebRTC-VP9-PerformanceFlags" \|\|
	key == "WebRTC-VP9VariableFramerateScreenshare" \|\|
	key == "WebRTC-VP9QualityScaler") {
	return "";
	}
	// Crash when using unexpected field trial to decide if it should be fuzzed
	// or have a constant value.
	RTC_CHECK(false) << "Unfuzzed field trial " << key << "\n";
	}

	private:
	const uint8_t flags_;
	};

	VideoCodec CodecSettings(FuzzDataHelper& rng) {
	uint16_t config = rng.ReadOrDefaultValue<uint16_t>(0);
	// Test up to to 4 spatial and 4 temporal layers.
	int num_spatial_layers = 1 + (config & 0b11);
	int num_temporal_layers = 1 + ((config >> 2) & 0b11);

	VideoCodec codec_settings = {};
	codec_settings.codecType = kVideoCodecVP9;
	codec_settings.maxFramerate = 30;
	codec_settings.width = 320 << (num_spatial_layers - 1);
	codec_settings.height = 180 << (num_spatial_layers - 1);
	if (num_spatial_layers > 1) {
	for (int sid = 0; sid < num_spatial_layers; ++sid) {
	SpatialLayer& spatial_layer = codec_settings.spatialLayers[sid];
	codec_settings.width = 320 << sid;
	codec_settings.height = 180 << sid;
	spatial_layer.width = codec_settings.width;
	spatial_layer.height = codec_settings.height;
	spatial_layer.targetBitrate = kBitrateEnabledBps * num_temporal_layers;
	spatial_layer.maxFramerate = codec_settings.maxFramerate;
	spatial_layer.numberOfTemporalLayers = num_temporal_layers;
	}
	}
	codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers;
	codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers;
	int inter_layer_pred = (config >> 4) & 0b11;
	// There are only 3 valid values.
	codec_settings.VP9()->interLayerPred = static_cast<InterLayerPredMode>(
	inter_layer_pred < 3 ? inter_layer_pred : 0);
	codec_settings.VP9()->flexibleMode = (config & (1u << 6)) != 0;
	codec_settings.VP9()->frameDroppingOn = (config & (1u << 7)) != 0;
	codec_settings.mode = VideoCodecMode::kRealtimeVideo;
	return codec_settings;
	}

	VideoEncoder::Settings EncoderSettings() {
	return VideoEncoder::Settings(VideoEncoder::Capabilities(false),
	/number_of_cores=/1,
	/max_payload_size=/0);
	}

	bool IsSupported(int num_spatial_layers,
	int num_temporal_layers,
	const VideoBitrateAllocation& allocation) {
	// VP9 encoder doesn't support certain configurations.
	// BitrateAllocator shouldn't produce them.
	if (allocation.get_sum_bps() == 0) {
	// Ignore allocation that turns off all the layers.
	// In such a case it is up to upper layer code not to call Encode.
	return false;
	}

	for (int tid = 0; tid < num_temporal_layers; ++tid) {
	int min_enabled_spatial_id = -1;
	int max_enabled_spatial_id = -1;
	int num_enabled_spatial_layers = 0;
	for (int sid = 0; sid < num_spatial_layers; ++sid) {
	if (allocation.GetBitrate(sid, tid) > 0) {
	if (min_enabled_spatial_id == -1) {
	min_enabled_spatial_id = sid;
	}
	max_enabled_spatial_id = sid;
	++num_enabled_spatial_layers;
	}
	}
	if (num_enabled_spatial_layers == 0) {
	// Each temporal layer should be enabled because skipping a full frame is
	// not supported in non-flexible mode.
	return false;
	}
	if (max_enabled_spatial_id - min_enabled_spatial_id + 1 !=
	num_enabled_spatial_layers) {
	// To avoid odd spatial dependencies, there should be no gaps in active
	// spatial layers.
	return false;
	}
	}

	return true;
	}

	struct LibvpxState {
	LibvpxState() {
	pkt.kind = VPX_CODEC_CX_FRAME_PKT;
	pkt.data.frame.buf = pkt_buffer;
	pkt.data.frame.sz = ABSL_ARRAYSIZE(pkt_buffer);
	layer_id.spatial_layer_id = -1;
	}

	uint8_t pkt_buffer[1000] = {};
	vpx_codec_enc_cfg_t config = {};
	vpx_codec_priv_output_cx_pkt_cb_pair_t callback = {};
	vpx_image_t img = {};
	vpx_svc_ref_frame_config_t ref_config = {};
	vpx_svc_layer_id_t layer_id = {};
	vpx_svc_frame_drop_t frame_drop = {};
	vpx_codec_cx_pkt pkt = {};
	};

	class StubLibvpx : public NiceMock<MockLibvpxInterface> {
	public:
	explicit StubLibvpx(LibvpxState* state) : state_(state) { RTC_CHECK(state_); }

	vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface,
	vpx_codec_enc_cfg_t* cfg,
	unsigned int usage) const override {
	state_->config = *cfg;
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx,
	vpx_codec_iface_t* iface,
	const vpx_codec_enc_cfg_t* cfg,
	vpx_codec_flags_t flags) const override {
	RTC_CHECK(ctx);
	ctx->err = VPX_CODEC_OK;
	return VPX_CODEC_OK;
	}

	vpx_image_t* img_wrap(vpx_image_t* img,
	vpx_img_fmt_t fmt,
	unsigned int d_w,
	unsigned int d_h,
	unsigned int stride_align,
	unsigned char* img_data) const override {
	state_->img.fmt = fmt;
	state_->img.d_w = d_w;
	state_->img.d_h = d_h;
	return &state_->img;
	}

	vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx,
	const vpx_image_t* img,
	vpx_codec_pts_t pts,
	uint64_t duration,
	vpx_enc_frame_flags_t flags,
	uint64_t deadline) const override {
	if (flags & VPX_EFLAG_FORCE_KF) {
	state_->pkt.data.frame.flags = VPX_FRAME_IS_KEY;
	} else {
	state_->pkt.data.frame.flags = 0;
	}
	state_->pkt.data.frame.duration = duration;
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
	vp8e_enc_control_id ctrl_id,
	void* param) const override {
	if (ctrl_id == VP9E_REGISTER_CX_CALLBACK) {
	state_->callback =
	reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t>(param);
	}
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_control(
	vpx_codec_ctx_t* ctx,
	vp8e_enc_control_id ctrl_id,
	vpx_svc_ref_frame_config_t* param) const override {
	switch (ctrl_id) {
	case VP9E_SET_SVC_REF_FRAME_CONFIG:
	state_->ref_config = *param;
	break;
	case VP9E_GET_SVC_REF_FRAME_CONFIG:
	*param = state_->ref_config;
	break;
	default:
	break;
	}
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
	vp8e_enc_control_id ctrl_id,
	vpx_svc_layer_id_t* param) const override {
	switch (ctrl_id) {
	case VP9E_SET_SVC_LAYER_ID:
	state_->layer_id = *param;
	break;
	case VP9E_GET_SVC_LAYER_ID:
	*param = state_->layer_id;
	break;
	default:
	break;
	}
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx,
	vp8e_enc_control_id ctrl_id,
	vpx_svc_frame_drop_t* param) const override {
	if (ctrl_id == VP9E_SET_SVC_FRAME_DROP_LAYER) {
	state_->frame_drop = *param;
	}
	return VPX_CODEC_OK;
	}

	vpx_codec_err_t codec_enc_config_set(
	vpx_codec_ctx_t* ctx,
	const vpx_codec_enc_cfg_t* cfg) const override {
	state_->config = *cfg;
	return VPX_CODEC_OK;
	}

	private:
	LibvpxState* const state_;
	};

	enum Actions {
	kEncode,
	kSetRates,
	};

	// When a layer frame is marked for drop, drops all layer frames from that
	// pictures with larger spatial ids.
	constexpr bool DropAbove(uint8_t layers_mask, int sid) {
	uint8_t full_mask = (uint8_t{1} << (sid + 1)) - 1;
	return (layers_mask & full_mask) != full_mask;
	}
	// inline unittests
	static_assert(DropAbove(0b1011, /sid=/0) == false, "");
	static_assert(DropAbove(0b1011, /sid=/1) == false, "");
	static_assert(DropAbove(0b1011, /sid=/2) == true, "");
	static_assert(DropAbove(0b1011, /sid=/3) == true, "");

	// When a layer frame is marked for drop, drops all layer frames from that
	// pictures with smaller spatial ids.
	constexpr bool DropBelow(uint8_t layers_mask, int sid, int num_layers) {
	return (layers_mask >> sid) != (1 << (num_layers - sid)) - 1;
	}
	// inline unittests
	static_assert(DropBelow(0b1101, /sid=/0, 4) == true, "");
	static_assert(DropBelow(0b1101, /sid=/1, 4) == true, "");
	static_assert(DropBelow(0b1101, /sid=/2, 4) == false, "");
	static_assert(DropBelow(0b1101, /sid=/3, 4) == false, "");

	} // namespace

	void FuzzOneInput(const uint8_t* data, size_t size) {
	FuzzDataHelper helper(rtc::MakeArrayView(data, size));

	FrameValidator validator;
	FieldTrials field_trials(helper);
	// Setup call callbacks for the fake
	LibvpxState state;

	// Initialize encoder
	LibvpxVp9Encoder encoder(cricket::VideoCodec(),
	std::make_unique<StubLibvpx>(&state), field_trials);
	VideoCodec codec = CodecSettings(helper);
	if (encoder.InitEncode(&codec, EncoderSettings()) != WEBRTC_VIDEO_CODEC_OK) {
	return;
	}
	RTC_CHECK_EQ(encoder.RegisterEncodeCompleteCallback(&validator),
	WEBRTC_VIDEO_CODEC_OK);
	{
	// Enable all the layers initially. Encoder doesn't support producing
	// frames when no layers are enabled.
	LibvpxVp9Encoder::RateControlParameters parameters;
	parameters.framerate_fps = 30.0;
	for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) {
	for (int tid = 0; tid < codec.VP9()->numberOfTemporalLayers; ++tid) {
	parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps);
	}
	}
	encoder.SetRates(parameters);
	}

	std::vector<VideoFrameType> frame_types(1);
	VideoFrame fake_image = VideoFrame::Builder()
	.set_video_frame_buffer(I420Buffer::Create(
	int{codec.width}, int{codec.height}))
	.build();

	// Start producing frames at random.
	while (helper.CanReadBytes(1)) {
	uint8_t action = helper.Read<uint8_t>();
	switch (action & 0b11) {
	case kEncode: {
	// bitmask of the action: SSSS-K00, where
	// four S bit indicate which spatial layers should be produced,
	// K bit indicates if frame should be a key frame.
	frame_types[0] = (action & 0b100) ? VideoFrameType::kVideoFrameKey
	: VideoFrameType::kVideoFrameDelta;
	encoder.Encode(fake_image, &frame_types);
	uint8_t encode_spatial_layers = (action >> 4);
	for (size_t sid = 0; sid < state.config.ss_number_layers; ++sid) {
	if (state.config.ss_target_bitrate[sid] == 0) {
	// Don't encode disabled spatial layers.
	continue;
	}
	bool drop = true;
	switch (state.frame_drop.framedrop_mode) {
	case FULL_SUPERFRAME_DROP:
	drop = encode_spatial_layers == 0;
	break;
	case LAYER_DROP:
	drop = (encode_spatial_layers & (1 << sid)) == 0;
	break;
	case CONSTRAINED_LAYER_DROP:
	drop = DropBelow(encode_spatial_layers, sid,
	state.config.ss_number_layers);
	break;
	case CONSTRAINED_FROM_ABOVE_DROP:
	drop = DropAbove(encode_spatial_layers, sid);
	break;
	}
	if (!drop) {
	state.layer_id.spatial_layer_id = sid;
	state.callback.output_cx_pkt(&state.pkt, state.callback.user_priv);
	}
	}
	} break;
	case kSetRates: {
	// bitmask of the action: (S2)(S1)(S0)01,
	// where Sx is number of temporal layers to enable for spatial layer x
	// In pariculat Sx = 0 indicates spatial layer x should be disabled.
	LibvpxVp9Encoder::RateControlParameters parameters;
	parameters.framerate_fps = 30.0;
	for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) {
	int temporal_layers = (action >> ((1 + sid) * 2)) & 0b11;
	for (int tid = 0; tid < temporal_layers; ++tid) {
	parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps);
	}
	}
	if (IsSupported(codec.VP9()->numberOfSpatialLayers,
	codec.VP9()->numberOfTemporalLayers,
	parameters.bitrate)) {
	encoder.SetRates(parameters);
	}
	} break;
	default:
	// Unspecificed values are noop.
	break;
	}
	}
	}
	} // namespace webrtc