blob: b5e286954825a200cfb2f0153d541505cb779905 [file] [log] [blame]
/*
* Copyright 2024 The WebRTC project authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "video/corruption_detection/frame_instrumentation_generator.h"
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <optional>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/types/variant.h"
#include "api/scoped_refptr.h"
#include "api/video/encoded_image.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_frame.h"
#include "api/video/video_frame_buffer.h"
#include "api/video/video_frame_type.h"
#include "api/video_codecs/video_codec.h"
#include "common_video/frame_instrumentation_data.h"
#include "modules/include/module_common_types_public.h"
#include "modules/video_coding/utility/qp_parser.h"
#include "rtc_base/logging.h"
#include "video/corruption_detection/generic_mapping_functions.h"
#include "video/corruption_detection/halton_frame_sampler.h"
namespace webrtc {
namespace {
std::optional<FilterSettings> GetCorruptionFilterSettings(
const EncodedImage& encoded_image,
VideoCodecType video_codec_type,
int layer_id) {
/* TODO: b/358039777 - Uncomment when parameters are available in EncodedImage
if (encoded_image.CorruptionDetectionParameters()) {
return FilterSettings{
.std_dev = encoded_image.CorruptionDetectionParameters()->std_dev,
.luma_error_threshold =
encoded_image.CorruptionDetectionParameters()->luma_error_threshold,
.chroma_error_threshold = encoded_image.CorruptionDetectionParameters()
->chroma_error_threshold};
}
*/
int qp = encoded_image.qp_;
if (qp == -1) {
std::optional<uint32_t> parsed_qp = QpParser().Parse(
video_codec_type, layer_id, encoded_image.data(), encoded_image.size());
if (!parsed_qp.has_value()) {
RTC_LOG(LS_VERBOSE) << "Missing QP for "
<< CodecTypeToPayloadString(video_codec_type)
<< " layer " << layer_id << ".";
return std::nullopt;
}
qp = *parsed_qp;
}
return GetCorruptionFilterSettings(qp, video_codec_type);
}
} // namespace
FrameInstrumentationGenerator::FrameInstrumentationGenerator(
VideoCodecType video_codec_type)
: video_codec_type_(video_codec_type) {}
void FrameInstrumentationGenerator::OnCapturedFrame(VideoFrame frame) {
captured_frames_.push(frame);
}
std::optional<
absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>>
FrameInstrumentationGenerator::OnEncodedImage(
const EncodedImage& encoded_image) {
uint32_t rtp_timestamp_encoded_image = encoded_image.RtpTimestamp();
while (!captured_frames_.empty() &&
IsNewerTimestamp(rtp_timestamp_encoded_image,
captured_frames_.front().rtp_timestamp())) {
captured_frames_.pop();
}
if (captured_frames_.empty() ||
captured_frames_.front().rtp_timestamp() != rtp_timestamp_encoded_image) {
RTC_LOG(LS_VERBOSE) << "No captured frames for RTC timestamp "
<< rtp_timestamp_encoded_image << ".";
return std::nullopt;
}
VideoFrame captured_frame = captured_frames_.front();
int layer_id = std::max(encoded_image.SpatialIndex().value_or(0),
encoded_image.SimulcastIndex().value_or(0));
bool is_key_frame =
encoded_image.FrameType() == VideoFrameType::kVideoFrameKey;
if (is_key_frame) {
contexts_.erase(layer_id);
} else {
for (const auto& [unused, context] : contexts_) {
if (context.rtp_timestamp_of_last_key_frame ==
rtp_timestamp_encoded_image) {
// Upper layer of an SVC key frame.
is_key_frame = true;
break;
}
}
}
if (is_key_frame) {
contexts_[layer_id].rtp_timestamp_of_last_key_frame =
encoded_image.RtpTimestamp();
} else if (contexts_.find(layer_id) == contexts_.end()) {
RTC_LOG(LS_INFO) << "The first frame of a spatial or simulcast layer is "
"not a key frame.";
return std::nullopt;
}
int sequence_index = contexts_[layer_id].frame_sampler.GetCurrentIndex();
// TODO: b/358039777 - Maybe allow other sample sizes as well
std::vector<HaltonFrameSampler::Coordinates> sample_coordinates =
contexts_[layer_id]
.frame_sampler.GetSampleCoordinatesForFrameIfFrameShouldBeSampled(
is_key_frame, captured_frame.rtp_timestamp(),
/*sample_size=*/13);
if (sample_coordinates.empty()) {
if (!is_key_frame) {
return std::nullopt;
}
return FrameInstrumentationSyncData{.sequence_index = sequence_index,
.communicate_upper_bits = true};
}
std::optional<FilterSettings> filter_settings =
GetCorruptionFilterSettings(encoded_image, video_codec_type_, layer_id);
if (!filter_settings.has_value()) {
return std::nullopt;
}
scoped_refptr<I420BufferInterface> captured_frame_buffer_as_i420 =
captured_frame.video_frame_buffer()->ToI420();
if (!captured_frame_buffer_as_i420) {
RTC_LOG(LS_ERROR) << "Failed to convert "
<< VideoFrameBufferTypeToString(
captured_frame.video_frame_buffer()->type())
<< " image to I420.";
return std::nullopt;
}
FrameInstrumentationData data = {
.sequence_index = sequence_index,
.communicate_upper_bits = is_key_frame,
.std_dev = filter_settings->std_dev,
.luma_error_threshold = filter_settings->luma_error_threshold,
.chroma_error_threshold = filter_settings->chroma_error_threshold};
std::vector<FilteredSample> samples = GetSampleValuesForFrame(
captured_frame_buffer_as_i420, sample_coordinates,
encoded_image._encodedWidth, encoded_image._encodedHeight,
filter_settings->std_dev);
data.sample_values.reserve(samples.size());
absl::c_transform(samples, std::back_inserter(data.sample_values),
[](const FilteredSample& sample) { return sample.value; });
return data;
}
} // namespace webrtc