| /* |
| * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include <stdint.h> |
| |
| #include "absl/algorithm/container.h" |
| #include "absl/base/macros.h" |
| #include "absl/container/inlined_vector.h" |
| #include "api/array_view.h" |
| #include "api/transport/webrtc_key_value_config.h" |
| #include "api/video/video_frame.h" |
| #include "api/video_codecs/video_codec.h" |
| #include "api/video_codecs/video_encoder.h" |
| #include "modules/video_coding/codecs/interface/mock_libvpx_interface.h" |
| #include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" |
| #include "modules/video_coding/frame_dependencies_calculator.h" |
| #include "rtc_base/numerics/safe_compare.h" |
| #include "test/fuzzers/fuzz_data_helper.h" |
| #include "test/gmock.h" |
| |
| // Fuzzer simulates various svc configurations and libvpx encoder dropping |
| // layer frames. |
| // Validates vp9 encoder wrapper produces consistent frame references. |
| namespace webrtc { |
| namespace { |
| |
| using test::FuzzDataHelper; |
| using ::testing::NiceMock; |
| |
| constexpr int kBitrateEnabledBps = 100'000; |
| |
| class FrameValidator : public EncodedImageCallback { |
| public: |
| ~FrameValidator() override = default; |
| |
| Result OnEncodedImage(const EncodedImage& encoded_image, |
| const CodecSpecificInfo* codec_specific_info) override { |
| RTC_CHECK(codec_specific_info); |
| RTC_CHECK_EQ(codec_specific_info->codecType, kVideoCodecVP9); |
| if (codec_specific_info->codecSpecific.VP9.first_frame_in_picture) { |
| ++picture_id_; |
| } |
| int64_t frame_id = frame_id_++; |
| LayerFrame& layer_frame = frames_[frame_id % kMaxFrameHistorySize]; |
| layer_frame.picture_id = picture_id_; |
| layer_frame.spatial_id = encoded_image.SpatialIndex().value_or(0); |
| layer_frame.frame_id = frame_id; |
| layer_frame.temporal_id = |
| codec_specific_info->codecSpecific.VP9.temporal_idx; |
| if (layer_frame.temporal_id == kNoTemporalIdx) { |
| layer_frame.temporal_id = 0; |
| } |
| layer_frame.vp9_non_ref_for_inter_layer_pred = |
| codec_specific_info->codecSpecific.VP9.non_ref_for_inter_layer_pred; |
| CheckVp9References(layer_frame, codec_specific_info->codecSpecific.VP9); |
| |
| if (codec_specific_info->generic_frame_info.has_value()) { |
| absl::InlinedVector<int64_t, 5> frame_dependencies = |
| dependencies_calculator_.FromBuffersUsage( |
| frame_id, |
| codec_specific_info->generic_frame_info->encoder_buffers); |
| |
| CheckGenericReferences(frame_dependencies, |
| *codec_specific_info->generic_frame_info); |
| CheckGenericAndCodecSpecificReferencesAreConsistent( |
| frame_dependencies, *codec_specific_info, layer_frame); |
| } |
| |
| return Result(Result::OK); |
| } |
| |
| private: |
| // With 4 spatial layers and patterns up to 8 pictures, it should be enough to |
| // keep the last 32 frames to validate dependencies. |
| static constexpr size_t kMaxFrameHistorySize = 32; |
| struct LayerFrame { |
| int64_t frame_id; |
| int64_t picture_id; |
| int spatial_id; |
| int temporal_id; |
| bool vp9_non_ref_for_inter_layer_pred; |
| }; |
| |
| void CheckVp9References(const LayerFrame& layer_frame, |
| const CodecSpecificInfoVP9& vp9_info) { |
| if (layer_frame.frame_id == 0) { |
| RTC_CHECK(!vp9_info.inter_layer_predicted); |
| } else { |
| const LayerFrame& previous_frame = Frame(layer_frame.frame_id - 1); |
| if (vp9_info.inter_layer_predicted) { |
| RTC_CHECK(!previous_frame.vp9_non_ref_for_inter_layer_pred); |
| RTC_CHECK_EQ(layer_frame.picture_id, previous_frame.picture_id); |
| } |
| if (previous_frame.picture_id == layer_frame.picture_id) { |
| RTC_CHECK_GT(layer_frame.spatial_id, previous_frame.spatial_id); |
| // The check below would fail for temporal shift structures. Remove it |
| // or move it to !flexible_mode section when vp9 encoder starts |
| // supporting such structures. |
| RTC_CHECK_EQ(layer_frame.temporal_id, previous_frame.temporal_id); |
| } |
| } |
| if (!vp9_info.flexible_mode) { |
| if (vp9_info.gof.num_frames_in_gof > 0) { |
| gof_.CopyGofInfoVP9(vp9_info.gof); |
| } |
| RTC_CHECK_EQ(gof_.temporal_idx[vp9_info.gof_idx], |
| layer_frame.temporal_id); |
| } |
| } |
| |
| void CheckGenericReferences(rtc::ArrayView<const int64_t> frame_dependencies, |
| const GenericFrameInfo& generic_info) const { |
| for (int64_t dependency_frame_id : frame_dependencies) { |
| RTC_CHECK_GE(dependency_frame_id, 0); |
| const LayerFrame& dependency = Frame(dependency_frame_id); |
| RTC_CHECK_GE(generic_info.spatial_id, dependency.spatial_id); |
| RTC_CHECK_GE(generic_info.temporal_id, dependency.temporal_id); |
| } |
| } |
| |
| void CheckGenericAndCodecSpecificReferencesAreConsistent( |
| rtc::ArrayView<const int64_t> frame_dependencies, |
| const CodecSpecificInfo& info, |
| const LayerFrame& layer_frame) const { |
| const CodecSpecificInfoVP9& vp9_info = info.codecSpecific.VP9; |
| const GenericFrameInfo& generic_info = *info.generic_frame_info; |
| |
| RTC_CHECK_EQ(generic_info.spatial_id, layer_frame.spatial_id); |
| RTC_CHECK_EQ(generic_info.temporal_id, layer_frame.temporal_id); |
| auto picture_id_diffs = |
| rtc::MakeArrayView(vp9_info.p_diff, vp9_info.num_ref_pics); |
| RTC_CHECK_EQ( |
| frame_dependencies.size(), |
| picture_id_diffs.size() + (vp9_info.inter_layer_predicted ? 1 : 0)); |
| for (int64_t dependency_frame_id : frame_dependencies) { |
| RTC_CHECK_GE(dependency_frame_id, 0); |
| const LayerFrame& dependency = Frame(dependency_frame_id); |
| if (dependency.spatial_id != layer_frame.spatial_id) { |
| RTC_CHECK(vp9_info.inter_layer_predicted); |
| RTC_CHECK_EQ(layer_frame.picture_id, dependency.picture_id); |
| RTC_CHECK_GT(layer_frame.spatial_id, dependency.spatial_id); |
| } else { |
| RTC_CHECK(vp9_info.inter_pic_predicted); |
| RTC_CHECK_EQ(layer_frame.spatial_id, dependency.spatial_id); |
| RTC_CHECK(absl::c_linear_search( |
| picture_id_diffs, layer_frame.picture_id - dependency.picture_id)); |
| } |
| } |
| } |
| |
| const LayerFrame& Frame(int64_t frame_id) const { |
| auto& frame = frames_[frame_id % kMaxFrameHistorySize]; |
| RTC_CHECK_EQ(frame.frame_id, frame_id); |
| return frame; |
| } |
| |
| GofInfoVP9 gof_; |
| int64_t frame_id_ = 0; |
| int64_t picture_id_ = 1; |
| FrameDependenciesCalculator dependencies_calculator_; |
| LayerFrame frames_[kMaxFrameHistorySize]; |
| }; |
| |
| class FieldTrials : public WebRtcKeyValueConfig { |
| public: |
| explicit FieldTrials(FuzzDataHelper& config) |
| : flags_(config.ReadOrDefaultValue<uint8_t>(0)) {} |
| |
| ~FieldTrials() override = default; |
| std::string Lookup(absl::string_view key) const override { |
| static constexpr absl::string_view kBinaryFieldTrials[] = { |
| "WebRTC-Vp9DependencyDescriptor", |
| "WebRTC-Vp9ExternalRefCtrl", |
| "WebRTC-Vp9IssueKeyFrameOnLayerDeactivation", |
| }; |
| for (size_t i = 0; i < ABSL_ARRAYSIZE(kBinaryFieldTrials); ++i) { |
| if (key == kBinaryFieldTrials[i]) { |
| return (flags_ & (1u << i)) ? "Enabled" : "Disabled"; |
| } |
| } |
| |
| // Ignore following field trials. |
| if (key == "WebRTC-CongestionWindow" || |
| key == "WebRTC-UseBaseHeavyVP8TL3RateAllocation" || |
| key == "WebRTC-SimulcastUpswitchHysteresisPercent" || |
| key == "WebRTC-SimulcastScreenshareUpswitchHysteresisPercent" || |
| key == "WebRTC-VideoRateControl" || |
| key == "WebRTC-VP9-PerformanceFlags" || |
| key == "WebRTC-VP9VariableFramerateScreenshare" || |
| key == "WebRTC-VP9QualityScaler") { |
| return ""; |
| } |
| // Crash when using unexpected field trial to decide if it should be fuzzed |
| // or have a constant value. |
| RTC_CHECK(false) << "Unfuzzed field trial " << key << "\n"; |
| } |
| |
| private: |
| const uint8_t flags_; |
| }; |
| |
| VideoCodec CodecSettings(FuzzDataHelper& rng) { |
| uint16_t config = rng.ReadOrDefaultValue<uint16_t>(0); |
| // Test up to to 4 spatial and 4 temporal layers. |
| int num_spatial_layers = 1 + (config & 0b11); |
| int num_temporal_layers = 1 + ((config >> 2) & 0b11); |
| |
| VideoCodec codec_settings = {}; |
| codec_settings.codecType = kVideoCodecVP9; |
| codec_settings.maxFramerate = 30; |
| codec_settings.width = 320 << (num_spatial_layers - 1); |
| codec_settings.height = 180 << (num_spatial_layers - 1); |
| if (num_spatial_layers > 1) { |
| for (int sid = 0; sid < num_spatial_layers; ++sid) { |
| SpatialLayer& spatial_layer = codec_settings.spatialLayers[sid]; |
| codec_settings.width = 320 << sid; |
| codec_settings.height = 180 << sid; |
| spatial_layer.width = codec_settings.width; |
| spatial_layer.height = codec_settings.height; |
| spatial_layer.targetBitrate = kBitrateEnabledBps * num_temporal_layers; |
| spatial_layer.maxFramerate = codec_settings.maxFramerate; |
| spatial_layer.numberOfTemporalLayers = num_temporal_layers; |
| } |
| } |
| codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers; |
| codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers; |
| int inter_layer_pred = (config >> 4) & 0b11; |
| // There are only 3 valid values. |
| codec_settings.VP9()->interLayerPred = static_cast<InterLayerPredMode>( |
| inter_layer_pred < 3 ? inter_layer_pred : 0); |
| codec_settings.VP9()->flexibleMode = (config & (1u << 6)) != 0; |
| codec_settings.VP9()->frameDroppingOn = (config & (1u << 7)) != 0; |
| codec_settings.mode = VideoCodecMode::kRealtimeVideo; |
| return codec_settings; |
| } |
| |
| VideoEncoder::Settings EncoderSettings() { |
| return VideoEncoder::Settings(VideoEncoder::Capabilities(false), |
| /*number_of_cores=*/1, |
| /*max_payload_size=*/0); |
| } |
| |
| bool IsSupported(int num_spatial_layers, |
| int num_temporal_layers, |
| const VideoBitrateAllocation& allocation) { |
| // VP9 encoder doesn't support certain configurations. |
| // BitrateAllocator shouldn't produce them. |
| if (allocation.get_sum_bps() == 0) { |
| // Ignore allocation that turns off all the layers. |
| // In such a case it is up to upper layer code not to call Encode. |
| return false; |
| } |
| |
| for (int tid = 0; tid < num_temporal_layers; ++tid) { |
| int min_enabled_spatial_id = -1; |
| int max_enabled_spatial_id = -1; |
| int num_enabled_spatial_layers = 0; |
| for (int sid = 0; sid < num_spatial_layers; ++sid) { |
| if (allocation.GetBitrate(sid, tid) > 0) { |
| if (min_enabled_spatial_id == -1) { |
| min_enabled_spatial_id = sid; |
| } |
| max_enabled_spatial_id = sid; |
| ++num_enabled_spatial_layers; |
| } |
| } |
| if (num_enabled_spatial_layers == 0) { |
| // Each temporal layer should be enabled because skipping a full frame is |
| // not supported in non-flexible mode. |
| return false; |
| } |
| if (max_enabled_spatial_id - min_enabled_spatial_id + 1 != |
| num_enabled_spatial_layers) { |
| // To avoid odd spatial dependencies, there should be no gaps in active |
| // spatial layers. |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| struct LibvpxState { |
| LibvpxState() { |
| pkt.kind = VPX_CODEC_CX_FRAME_PKT; |
| pkt.data.frame.buf = pkt_buffer; |
| pkt.data.frame.sz = ABSL_ARRAYSIZE(pkt_buffer); |
| layer_id.spatial_layer_id = -1; |
| } |
| |
| uint8_t pkt_buffer[1000] = {}; |
| vpx_codec_enc_cfg_t config = {}; |
| vpx_codec_priv_output_cx_pkt_cb_pair_t callback = {}; |
| vpx_image_t img = {}; |
| vpx_svc_ref_frame_config_t ref_config = {}; |
| vpx_svc_layer_id_t layer_id = {}; |
| vpx_svc_frame_drop_t frame_drop = {}; |
| vpx_codec_cx_pkt pkt = {}; |
| }; |
| |
| class StubLibvpx : public NiceMock<MockLibvpxInterface> { |
| public: |
| explicit StubLibvpx(LibvpxState* state) : state_(state) { RTC_CHECK(state_); } |
| |
| vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface, |
| vpx_codec_enc_cfg_t* cfg, |
| unsigned int usage) const override { |
| state_->config = *cfg; |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx, |
| vpx_codec_iface_t* iface, |
| const vpx_codec_enc_cfg_t* cfg, |
| vpx_codec_flags_t flags) const override { |
| RTC_CHECK(ctx); |
| ctx->err = VPX_CODEC_OK; |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_image_t* img_wrap(vpx_image_t* img, |
| vpx_img_fmt_t fmt, |
| unsigned int d_w, |
| unsigned int d_h, |
| unsigned int stride_align, |
| unsigned char* img_data) const override { |
| state_->img.fmt = fmt; |
| state_->img.d_w = d_w; |
| state_->img.d_h = d_h; |
| return &state_->img; |
| } |
| |
| vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx, |
| const vpx_image_t* img, |
| vpx_codec_pts_t pts, |
| uint64_t duration, |
| vpx_enc_frame_flags_t flags, |
| uint64_t deadline) const override { |
| if (flags & VPX_EFLAG_FORCE_KF) { |
| state_->pkt.data.frame.flags = VPX_FRAME_IS_KEY; |
| } else { |
| state_->pkt.data.frame.flags = 0; |
| } |
| state_->pkt.data.frame.duration = duration; |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, |
| vp8e_enc_control_id ctrl_id, |
| void* param) const override { |
| if (ctrl_id == VP9E_REGISTER_CX_CALLBACK) { |
| state_->callback = |
| *reinterpret_cast<vpx_codec_priv_output_cx_pkt_cb_pair_t*>(param); |
| } |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_control( |
| vpx_codec_ctx_t* ctx, |
| vp8e_enc_control_id ctrl_id, |
| vpx_svc_ref_frame_config_t* param) const override { |
| switch (ctrl_id) { |
| case VP9E_SET_SVC_REF_FRAME_CONFIG: |
| state_->ref_config = *param; |
| break; |
| case VP9E_GET_SVC_REF_FRAME_CONFIG: |
| *param = state_->ref_config; |
| break; |
| default: |
| break; |
| } |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, |
| vp8e_enc_control_id ctrl_id, |
| vpx_svc_layer_id_t* param) const override { |
| switch (ctrl_id) { |
| case VP9E_SET_SVC_LAYER_ID: |
| state_->layer_id = *param; |
| break; |
| case VP9E_GET_SVC_LAYER_ID: |
| *param = state_->layer_id; |
| break; |
| default: |
| break; |
| } |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, |
| vp8e_enc_control_id ctrl_id, |
| vpx_svc_frame_drop_t* param) const override { |
| if (ctrl_id == VP9E_SET_SVC_FRAME_DROP_LAYER) { |
| state_->frame_drop = *param; |
| } |
| return VPX_CODEC_OK; |
| } |
| |
| vpx_codec_err_t codec_enc_config_set( |
| vpx_codec_ctx_t* ctx, |
| const vpx_codec_enc_cfg_t* cfg) const override { |
| state_->config = *cfg; |
| return VPX_CODEC_OK; |
| } |
| |
| private: |
| LibvpxState* const state_; |
| }; |
| |
| enum Actions { |
| kEncode, |
| kSetRates, |
| }; |
| |
| // When a layer frame is marked for drop, drops all layer frames from that |
| // pictures with larger spatial ids. |
| constexpr bool DropAbove(uint8_t layers_mask, int sid) { |
| uint8_t full_mask = (uint8_t{1} << (sid + 1)) - 1; |
| return (layers_mask & full_mask) != full_mask; |
| } |
| // inline unittests |
| static_assert(DropAbove(0b1011, /*sid=*/0) == false, ""); |
| static_assert(DropAbove(0b1011, /*sid=*/1) == false, ""); |
| static_assert(DropAbove(0b1011, /*sid=*/2) == true, ""); |
| static_assert(DropAbove(0b1011, /*sid=*/3) == true, ""); |
| |
| // When a layer frame is marked for drop, drops all layer frames from that |
| // pictures with smaller spatial ids. |
| constexpr bool DropBelow(uint8_t layers_mask, int sid, int num_layers) { |
| return (layers_mask >> sid) != (1 << (num_layers - sid)) - 1; |
| } |
| // inline unittests |
| static_assert(DropBelow(0b1101, /*sid=*/0, 4) == true, ""); |
| static_assert(DropBelow(0b1101, /*sid=*/1, 4) == true, ""); |
| static_assert(DropBelow(0b1101, /*sid=*/2, 4) == false, ""); |
| static_assert(DropBelow(0b1101, /*sid=*/3, 4) == false, ""); |
| |
| } // namespace |
| |
| void FuzzOneInput(const uint8_t* data, size_t size) { |
| FuzzDataHelper helper(rtc::MakeArrayView(data, size)); |
| |
| FrameValidator validator; |
| FieldTrials field_trials(helper); |
| // Setup call callbacks for the fake |
| LibvpxState state; |
| |
| // Initialize encoder |
| LibvpxVp9Encoder encoder(cricket::VideoCodec(), |
| std::make_unique<StubLibvpx>(&state), field_trials); |
| VideoCodec codec = CodecSettings(helper); |
| if (encoder.InitEncode(&codec, EncoderSettings()) != WEBRTC_VIDEO_CODEC_OK) { |
| return; |
| } |
| RTC_CHECK_EQ(encoder.RegisterEncodeCompleteCallback(&validator), |
| WEBRTC_VIDEO_CODEC_OK); |
| { |
| // Enable all the layers initially. Encoder doesn't support producing |
| // frames when no layers are enabled. |
| LibvpxVp9Encoder::RateControlParameters parameters; |
| parameters.framerate_fps = 30.0; |
| for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) { |
| for (int tid = 0; tid < codec.VP9()->numberOfTemporalLayers; ++tid) { |
| parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps); |
| } |
| } |
| encoder.SetRates(parameters); |
| } |
| |
| std::vector<VideoFrameType> frame_types(1); |
| VideoFrame fake_image = VideoFrame::Builder() |
| .set_video_frame_buffer(I420Buffer::Create( |
| int{codec.width}, int{codec.height})) |
| .build(); |
| |
| // Start producing frames at random. |
| while (helper.CanReadBytes(1)) { |
| uint8_t action = helper.Read<uint8_t>(); |
| switch (action & 0b11) { |
| case kEncode: { |
| // bitmask of the action: SSSS-K00, where |
| // four S bit indicate which spatial layers should be produced, |
| // K bit indicates if frame should be a key frame. |
| frame_types[0] = (action & 0b100) ? VideoFrameType::kVideoFrameKey |
| : VideoFrameType::kVideoFrameDelta; |
| encoder.Encode(fake_image, &frame_types); |
| uint8_t encode_spatial_layers = (action >> 4); |
| for (size_t sid = 0; sid < state.config.ss_number_layers; ++sid) { |
| if (state.config.ss_target_bitrate[sid] == 0) { |
| // Don't encode disabled spatial layers. |
| continue; |
| } |
| bool drop = true; |
| switch (state.frame_drop.framedrop_mode) { |
| case FULL_SUPERFRAME_DROP: |
| drop = encode_spatial_layers == 0; |
| break; |
| case LAYER_DROP: |
| drop = (encode_spatial_layers & (1 << sid)) == 0; |
| break; |
| case CONSTRAINED_LAYER_DROP: |
| drop = DropBelow(encode_spatial_layers, sid, |
| state.config.ss_number_layers); |
| break; |
| case CONSTRAINED_FROM_ABOVE_DROP: |
| drop = DropAbove(encode_spatial_layers, sid); |
| break; |
| } |
| if (!drop) { |
| state.layer_id.spatial_layer_id = sid; |
| state.callback.output_cx_pkt(&state.pkt, state.callback.user_priv); |
| } |
| } |
| } break; |
| case kSetRates: { |
| // bitmask of the action: (S2)(S1)(S0)01, |
| // where Sx is number of temporal layers to enable for spatial layer x |
| // In pariculat Sx = 0 indicates spatial layer x should be disabled. |
| LibvpxVp9Encoder::RateControlParameters parameters; |
| parameters.framerate_fps = 30.0; |
| for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) { |
| int temporal_layers = (action >> ((1 + sid) * 2)) & 0b11; |
| for (int tid = 0; tid < temporal_layers; ++tid) { |
| parameters.bitrate.SetBitrate(sid, tid, kBitrateEnabledBps); |
| } |
| } |
| if (IsSupported(codec.VP9()->numberOfSpatialLayers, |
| codec.VP9()->numberOfTemporalLayers, |
| parameters.bitrate)) { |
| encoder.SetRates(parameters); |
| } |
| } break; |
| default: |
| // Unspecificed values are noop. |
| break; |
| } |
| } |
| } |
| } // namespace webrtc |