Calculate video encode PSNR (in supported codecs)
the Y, U and V components, applications can do a weighted average.
https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-psnrsum
Depends on
https://github.com/cisco/openh264/pull/3824 (for OpenH264)
https://chromium-review.googlesource.com/c/webm/libvpx/+/6167966 (libvpx)
https://aomedia-review.googlesource.com/c/aom/+/196501 (libaom)
This CL implements the codec changes,
https://webrtc-review.googlesource.com/c/src/+/375021
is a follow-up to wire up getStats.
BUG=webrtc:388070060
Change-Id: I7046158a7b6e4183a9ec939fcac94eee9d65530d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/368960
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Philipp Hancke <phancke@meta.com>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#45312}
diff --git a/api/video/encoded_image.h b/api/video/encoded_image.h
index 34df30f..b8400c0 100644
--- a/api/video/encoded_image.h
+++ b/api/video/encoded_image.h
@@ -78,6 +78,13 @@
// cleaned up. Direct use of its members is strongly discouraged.
class RTC_EXPORT EncodedImage {
public:
+ // Peak signal to noise ratio, Y/U/V components.
+ struct Psnr {
+ double y = 0.0;
+ double u = 0.0;
+ double v = 0.0;
+ };
+
EncodedImage();
EncodedImage(EncodedImage&&);
EncodedImage(const EncodedImage&);
@@ -260,6 +267,9 @@
EncodedImage::Timing video_timing() const { return timing_; }
EncodedImage::Timing* video_timing_mutable() { return &timing_; }
+ std::optional<Psnr> psnr() const { return psnr_; }
+ void set_psnr(std::optional<Psnr> psnr) { psnr_ = psnr; }
+
private:
size_t capacity() const { return encoded_data_ ? encoded_data_->size() : 0; }
@@ -296,6 +306,9 @@
// used.
std::optional<CorruptionDetectionFilterSettings>
corruption_detection_filter_settings_;
+
+ // Encoders may compute PSNR for a frame.
+ std::optional<Psnr> psnr_;
};
} // namespace webrtc
diff --git a/call/BUILD.gn b/call/BUILD.gn
index 2b79626..3f5c615 100644
--- a/call/BUILD.gn
+++ b/call/BUILD.gn
@@ -411,6 +411,7 @@
"../api/adaptation:resource_adaptation_api",
"../api/crypto:options",
"../api/units:data_rate",
+ "../api/video:encoded_image",
"../api/video:video_frame",
"../api/video:video_rtp_headers",
"../api/video:video_stream_encoder",
diff --git a/call/video_send_stream.h b/call/video_send_stream.h
index bfcc15c..c5a24d2 100644
--- a/call/video_send_stream.h
+++ b/call/video_send_stream.h
@@ -27,6 +27,7 @@
#include "api/rtp_sender_interface.h"
#include "api/scoped_refptr.h"
#include "api/units/data_rate.h"
+#include "api/video/encoded_image.h"
#include "api/video/video_content_type.h"
#include "api/video/video_frame.h"
#include "api/video/video_source_interface.h"
@@ -94,6 +95,8 @@
double encode_frame_rate = 0.0;
int frames_encoded = 0;
std::optional<uint64_t> qp_sum;
+ EncodedImage::Psnr psnr_sum;
+ uint64_t psnr_measurements = 0;
uint64_t total_encode_time_ms = 0;
uint64_t total_encoded_bytes_target = 0;
uint32_t huge_frames_sent = 0;
diff --git a/experiments/field_trials.py b/experiments/field_trials.py
index c0b5a7b..c9d6474 100755
--- a/experiments/field_trials.py
+++ b/experiments/field_trials.py
@@ -197,6 +197,9 @@
FieldTrial('WebRTC-VP9-SvcForSimulcast',
347737882,
date(2024, 10, 1)),
+ FieldTrial('WebRTC-Video-CalculatePsnr',
+ 388070060,
+ date(2026, 1, 1)),
FieldTrial('WebRTC-Video-EnableRetransmitAllLayers',
42225262,
date(2024, 4, 1)),
diff --git a/media/BUILD.gn b/media/BUILD.gn
index 98a675a..4bfd8b4 100644
--- a/media/BUILD.gn
+++ b/media/BUILD.gn
@@ -340,6 +340,7 @@
"../api/units:data_rate",
"../api/units:time_delta",
"../api/units:timestamp",
+ "../api/video:encoded_image",
"../api/video:recordable_encoded_frame",
"../api/video:video_frame",
"../api/video:video_rtp_headers",
diff --git a/media/base/media_channel.h b/media/base/media_channel.h
index 373f160..d569c19 100644
--- a/media/base/media_channel.h
+++ b/media/base/media_channel.h
@@ -43,6 +43,7 @@
#include "api/units/data_rate.h"
#include "api/units/time_delta.h"
#include "api/units/timestamp.h"
+#include "api/video/encoded_image.h"
#include "api/video/recordable_encoded_frame.h"
#include "api/video/video_content_type.h"
#include "api/video/video_sink_interface.h"
@@ -596,8 +597,12 @@
// https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-totalencodedbytestarget
uint64_t total_encoded_bytes_target = 0;
bool has_entered_low_resolution = false;
+ // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-qpsum
std::optional<uint64_t> qp_sum;
VideoContentType content_type = VideoContentType::UNSPECIFIED;
+ // https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-psnrsum
+ webrtc::EncodedImage::Psnr psnr_sum;
+ uint32_t psnr_measurements = 0;
uint32_t frames_sent = 0;
// https://w3c.github.io/webrtc-stats/#dom-rtcvideosenderstats-hugeframessent
uint32_t huge_frames_sent = 0;
diff --git a/media/engine/webrtc_video_engine.cc b/media/engine/webrtc_video_engine.cc
index faa378f..44fbebf 100644
--- a/media/engine/webrtc_video_engine.cc
+++ b/media/engine/webrtc_video_engine.cc
@@ -2537,6 +2537,8 @@
info.report_block_datas.push_back(*stream_stats.report_block_data);
}
info.qp_sum = stream_stats.qp_sum;
+ info.psnr_sum = stream_stats.psnr_sum;
+ info.psnr_measurements = stream_stats.psnr_measurements;
info.total_encode_time_ms = stream_stats.total_encode_time_ms;
info.total_encoded_bytes_target = stream_stats.total_encoded_bytes_target;
info.huge_frames_sent = stream_stats.huge_frames_sent;
@@ -2589,6 +2591,12 @@
}
info.qp_sum = *info.qp_sum + *infos[i].qp_sum;
}
+ if (infos[i].psnr_measurements > 0) {
+ info.psnr_measurements += infos[i].psnr_measurements;
+ info.psnr_sum.y += infos[i].psnr_sum.y;
+ info.psnr_sum.u += infos[i].psnr_sum.u;
+ info.psnr_sum.v += infos[i].psnr_sum.v;
+ }
info.frames_encoded += infos[i].frames_encoded;
info.frames_sent += infos[i].frames_sent;
info.total_encode_time_ms += infos[i].total_encode_time_ms;
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index b100541..b0dd698 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -388,6 +388,18 @@
}
}
+rtc_library("frame_sampler") {
+ visibility = [ "*" ]
+ sources = [
+ "utility/frame_sampler.cc",
+ "utility/frame_sampler.h",
+ ]
+ deps = [
+ "..:module_api_public",
+ "../../api/video:video_frame",
+ ]
+}
+
rtc_library("video_coding_utility") {
visibility = [ "*" ]
sources = [
@@ -488,6 +500,7 @@
defines = []
deps = [
":codec_globals_headers",
+ ":frame_sampler",
":video_codec_interface",
":video_coding_utility",
"../../api:scoped_refptr",
@@ -514,6 +527,7 @@
"../../rtc_base:timeutils",
"../../rtc_base/system:rtc_export",
"../../system_wrappers:metrics",
+ "../rtp_rtcp:rtp_rtcp_format",
"svc:scalability_structures",
"svc:scalable_video_controller",
"//third_party/abseil-cpp/absl/base:nullability",
@@ -575,6 +589,7 @@
deps = [
":codec_globals_headers",
+ ":frame_sampler",
":video_codec_interface",
":video_coding_utility",
":webrtc_libvpx_interface",
@@ -608,6 +623,7 @@
"../../rtc_base/experiments:field_trial_parser",
"../../rtc_base/experiments:rate_control_settings",
"../../system_wrappers:metrics",
+ "../rtp_rtcp:rtp_rtcp_format",
"svc:scalability_mode_util",
"//third_party/abseil-cpp/absl/algorithm:container",
"//third_party/abseil-cpp/absl/base:nullability",
@@ -701,6 +717,7 @@
deps = [
":codec_globals_headers",
+ ":frame_sampler",
":video_codec_interface",
":video_coding_utility",
":webrtc_libvpx_interface",
@@ -1209,6 +1226,7 @@
"utility/corruption_detection_settings_generator_unittest.cc",
"utility/decoded_frames_history_unittest.cc",
"utility/frame_dropper_unittest.cc",
+ "utility/frame_sampler_unittest.cc",
"utility/framerate_controller_deprecated_unittest.cc",
"utility/ivf_file_reader_unittest.cc",
"utility/ivf_file_writer_unittest.cc",
@@ -1233,6 +1251,7 @@
":encoded_frame",
":frame_dependencies_calculator",
":frame_helpers",
+ ":frame_sampler",
":h264_sprop_parameter_sets",
":h26x_packet_buffer",
":nack_requester",
diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn
index e9fb4ac..02aac0a 100644
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@@ -60,6 +60,8 @@
]
deps = [
"../..:video_codec_interface",
+ "../..:video_coding_utility",
+ "../../:frame_sampler",
"../../../../api:field_trials_view",
"../../../../api:scoped_refptr",
"../../../../api/environment",
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index 8c64a56..8a49d3f 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -41,6 +41,7 @@
#include "modules/video_coding/include/video_error_codes.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
+#include "modules/video_coding/utility/frame_sampler.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/encoder_info_settings.h"
#include "rtc_base/logging.h"
@@ -154,6 +155,11 @@
// TODO(webrtc:351644568): Remove this kill-switch after the feature is fully
// deployed.
const bool post_encode_frame_drop_;
+
+ // Determine whether the frame should be sampled for PSNR.
+ FrameSampler psnr_frame_sampler_;
+ // TODO(webrtc:388070060): Remove after rollout.
+ const bool calculate_psnr_;
};
int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
@@ -195,7 +201,9 @@
timestamp_(0),
encoder_info_override_(env.field_trials()),
post_encode_frame_drop_(!env.field_trials().IsDisabled(
- "WebRTC-LibaomAv1Encoder-PostEncodeFrameDrop")) {}
+ "WebRTC-LibaomAv1Encoder-PostEncodeFrameDrop")),
+ calculate_psnr_(
+ env.field_trials().IsEnabled("WebRTC-Video-CalculatePsnr")) {}
LibaomAv1Encoder::~LibaomAv1Encoder() {
Release();
@@ -747,6 +755,11 @@
aom_enc_frame_flags_t flags =
layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
+#ifdef AOM_EFLAG_CALCULATE_PSNR
+ if (calculate_psnr_ && psnr_frame_sampler_.ShouldBeSampled(frame)) {
+ flags |= AOM_EFLAG_CALCULATE_PSNR;
+ }
+#endif
if (SvcEnabled()) {
SetSvcLayerId(*layer_frame);
@@ -770,10 +783,10 @@
// Get encoded image data.
EncodedImage encoded_image;
+ const aom_codec_cx_pkt_t* pkt;
aom_codec_iter_t iter = nullptr;
int data_pkt_count = 0;
- while (const aom_codec_cx_pkt_t* pkt =
- aom_codec_get_cx_data(&ctx_, &iter)) {
+ while ((pkt = aom_codec_get_cx_data(&ctx_, &iter)) != nullptr) {
if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
if (data_pkt_count > 0) {
RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
@@ -817,6 +830,12 @@
encoded_image.SetColorSpace(frame.color_space());
++data_pkt_count;
+ } else if (pkt->kind == AOM_CODEC_PSNR_PKT) {
+ // PSNR index: 0: total, 1: Y, 2: U, 3: V
+ encoded_image.set_psnr(
+ EncodedImage::Psnr({.y = pkt->data.psnr.psnr[1],
+ .u = pkt->data.psnr.psnr[2],
+ .v = pkt->data.psnr.psnr[3]}));
}
}
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 1b0f7a0..42d581a 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -198,7 +198,9 @@
number_of_cores_(0),
encoded_image_callback_(nullptr),
has_reported_init_(false),
- has_reported_error_(false) {
+ has_reported_error_(false),
+ calculate_psnr_(
+ env.field_trials().IsEnabled("WebRTC-Video-CalculatePsnr")) {
downscaled_buffers_.reserve(kMaxSimulcastStreams - 1);
encoded_images_.reserve(kMaxSimulcastStreams);
encoders_.reserve(kMaxSimulcastStreams);
@@ -464,6 +466,8 @@
RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width());
RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height());
+ bool calculate_psnr =
+ calculate_psnr_ && psnr_frame_sampler_.ShouldBeSampled(input_frame);
// Encode image for each layer.
for (size_t i = 0; i < encoders_.size(); ++i) {
// EncodeFrame input.
@@ -472,6 +476,9 @@
pictures_[i].iPicHeight = configurations_[i].height;
pictures_[i].iColorFormat = EVideoFormatType::videoFormatI420;
pictures_[i].uiTimeStamp = input_frame.ntp_time_ms();
+ pictures_[i].bPsnrY = calculate_psnr;
+ pictures_[i].bPsnrU = calculate_psnr;
+ pictures_[i].bPsnrV = calculate_psnr;
// Downscale images on second and ongoing layers.
if (i == 0) {
pictures_[i].iStride[0] = frame_buffer->StrideY();
@@ -564,6 +571,15 @@
h264_bitstream_parser_.ParseBitstream(encoded_images_[i]);
encoded_images_[i].qp_ =
h264_bitstream_parser_.GetLastSliceQp().value_or(-1);
+ if (calculate_psnr) {
+ encoded_images_[i].set_psnr(EncodedImage::Psnr({
+ .y = info.sLayerInfo[info.iLayerNum - 1].rPsnr[0],
+ .u = info.sLayerInfo[info.iLayerNum - 1].rPsnr[1],
+ .v = info.sLayerInfo[info.iLayerNum - 1].rPsnr[2],
+ }));
+ } else {
+ encoded_images_[i].set_psnr(std::nullopt);
+ }
// Deliver encoded image.
CodecSpecificInfo codec_specific;
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.h b/modules/video_coding/codecs/h264/h264_encoder_impl.h
index cae05a1..54c0930 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.h
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.h
@@ -38,6 +38,7 @@
#include "modules/video_coding/codecs/h264/include/h264.h"
#include "modules/video_coding/codecs/h264/include/h264_globals.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
+#include "modules/video_coding/utility/frame_sampler.h"
#include "third_party/openh264/src/codec/api/wels/codec_app_def.h"
#if defined(WEBRTC_WIN) && !defined(__clang__)
@@ -126,6 +127,11 @@
bool has_reported_error_;
std::vector<uint8_t> tl0sync_limit_;
+
+ // Determine whether the frame should be sampled for PSNR.
+ FrameSampler psnr_frame_sampler_;
+ // TODO(webrtc:388070060): Remove after rollout.
+ const bool calculate_psnr_;
};
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index 5446d83..7ff38e2 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -44,6 +44,7 @@
#include "api/video_codecs/vp8_frame_buffer_controller.h"
#include "api/video_codecs/vp8_frame_config.h"
#include "api/video_codecs/vp8_temporal_layers_factory.h"
+#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/video_coding/codecs/interface/common_constants.h"
#include "modules/video_coding/codecs/interface/libvpx_interface.h"
#include "modules/video_coding/codecs/vp8/include/vp8.h"
@@ -94,8 +95,7 @@
constexpr int kTokenPartitions = VP8_ONE_TOKENPARTITION;
constexpr uint32_t kVp832ByteAlign = 32u;
-constexpr int kRtpTicksPerSecond = 90000;
-constexpr int kRtpTicksPerMs = kRtpTicksPerSecond / 1000;
+constexpr int kRtpTicksPerMs = kVideoPayloadTypeFrequency / 1000;
// If internal frame dropping is enabled, force the encoder to output a frame
// on an encode request after this timeout even if this causes some
@@ -340,7 +340,9 @@
encoder_info_override_(env_.field_trials()),
max_frame_drop_interval_(ParseFrameDropInterval(env_.field_trials())),
android_specific_threading_settings_(env_.field_trials().IsEnabled(
- "WebRTC-LibvpxVp8Encoder-AndroidSpecificThreadingSettings")) {
+ "WebRTC-LibvpxVp8Encoder-AndroidSpecificThreadingSettings")),
+ calculate_psnr_(
+ env.field_trials().IsEnabled("WebRTC-Video-CalculatePsnr")) {
// TODO(eladalon/ilnik): These reservations might be wasting memory.
// InitEncode() is resizing to the actual size, which might be smaller.
raw_images_.reserve(kMaxSimulcastStreams);
@@ -602,7 +604,7 @@
}
// setting the time base of the codec
vpx_configs_[0].g_timebase.num = 1;
- vpx_configs_[0].g_timebase.den = kRtpTicksPerSecond;
+ vpx_configs_[0].g_timebase.den = kVideoPayloadTypeFrequency;
vpx_configs_[0].g_lag_in_frames = 0; // 0- no frame lagging
// Set the error resilience mode for temporal layers (but not simulcast).
@@ -1091,6 +1093,14 @@
flags[i] = send_key_frame ? VPX_EFLAG_FORCE_KF : EncodeFlags(tl_configs[i]);
}
+#ifdef VPX_EFLAG_CALCULATE_PSNR
+ if (calculate_psnr_ && psnr_frame_sampler_.ShouldBeSampled(frame)) {
+ for (size_t i = 0; i < encoders_.size(); ++i) {
+ flags[i] |= VPX_EFLAG_CALCULATE_PSNR;
+ }
+ }
+#endif
+
// Scale and map buffers and set `raw_images_` to hold pointers to the result.
// Because `raw_images_` are set to hold pointers to the prepared buffers, we
// need to keep these buffers alive through reference counting until after
@@ -1152,7 +1162,7 @@
// rate control seems to be off with that setup. Using the average input
// frame rate to calculate an average duration for now.
RTC_DCHECK_GT(codec_.maxFramerate, 0);
- uint32_t duration = kRtpTicksPerSecond / codec_.maxFramerate;
+ uint32_t duration = kVideoPayloadTypeFrequency / codec_.maxFramerate;
int error = WEBRTC_VIDEO_CODEC_OK;
int num_tries = 0;
@@ -1237,6 +1247,7 @@
++encoder_idx, --stream_idx) {
vpx_codec_iter_t iter = nullptr;
encoded_images_[encoder_idx].set_size(0);
+ encoded_images_[encoder_idx].set_psnr(std::nullopt);
encoded_images_[encoder_idx]._frameType = VideoFrameType::kVideoFrameDelta;
CodecSpecificInfo codec_specific;
const vpx_codec_cx_pkt_t* pkt = nullptr;
@@ -1263,11 +1274,19 @@
encoded_pos += pkt->data.frame.sz;
break;
}
+ case VPX_CODEC_PSNR_PKT:
+ // PSNR index: 0: total, 1: Y, 2: U, 3: V
+ encoded_images_[encoder_idx].set_psnr(
+ EncodedImage::Psnr({.y = pkt->data.psnr.psnr[1],
+ .u = pkt->data.psnr.psnr[2],
+ .v = pkt->data.psnr.psnr[3]}));
+ break;
default:
break;
}
// End of frame
- if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
+ if (pkt->kind == VPX_CODEC_CX_FRAME_PKT &&
+ (pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
// check if encoded frame is a key frame
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
encoded_images_[encoder_idx]._frameType =
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h
index 9b50887..e4c3dc3 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h
@@ -34,6 +34,7 @@
#include "modules/video_coding/codecs/vp8/include/vp8.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/utility/corruption_detection_settings_generator.h"
+#include "modules/video_coding/utility/frame_sampler.h"
#include "modules/video_coding/utility/framerate_controller_deprecated.h"
#include "rtc_base/experiments/encoder_info_settings.h"
#include "rtc_base/experiments/rate_control_settings.h"
@@ -155,6 +156,11 @@
std::unique_ptr<CorruptionDetectionSettingsGenerator>
corruption_detection_settings_generator_;
+
+ // Determine whether the frame should be sampled for PSNR.
+ FrameSampler psnr_frame_sampler_;
+ // TODO(webrtc:388070060): Remove after rollout.
+ const bool calculate_psnr_;
};
} // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
index d941cd6..b07e523 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@@ -290,7 +290,9 @@
performance_flags_(ParsePerformanceFlagsFromTrials(env.field_trials())),
num_steady_state_frames_(0),
config_changed_(true),
- encoder_info_override_(env.field_trials()) {
+ encoder_info_override_(env.field_trials()),
+ calculate_psnr_(
+ env.field_trials().IsEnabled("WebRTC-Video-CalculatePsnr")) {
codec_ = {};
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
}
@@ -667,7 +669,7 @@
config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
// Setting the time base of the codec.
config_->g_timebase.num = 1;
- config_->g_timebase.den = 90000;
+ config_->g_timebase.den = kVideoPayloadTypeFrequency;
config_->g_lag_in_frames = 0; // 0- no frame lagging
config_->g_threads = 1;
// Rate control settings.
@@ -841,9 +843,11 @@
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
- const vpx_codec_err_t rv = libvpx_->codec_enc_init(
- encoder_, vpx_codec_vp9_cx(), config_,
- config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH);
+ vpx_codec_flags_t flags =
+ config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH;
+
+ const vpx_codec_err_t rv =
+ libvpx_->codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, flags);
if (rv != VPX_CODEC_OK) {
RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv);
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
@@ -1233,6 +1237,11 @@
if (force_key_frame_) {
flags = VPX_EFLAG_FORCE_KF;
}
+#ifdef VPX_EFLAG_CALCULATE_PSNR
+ if (calculate_psnr_ && psnr_frame_sampler_.ShouldBeSampled(input_image)) {
+ flags |= VPX_EFLAG_CALCULATE_PSNR;
+ }
+#endif
if (svc_controller_) {
vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_);
@@ -1245,8 +1254,9 @@
if (VideoCodecMode::kScreensharing == codec_.mode) {
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
ref_config.duration[sl_idx] = static_cast<int64_t>(
- 90000 / (std::min(static_cast<float>(codec_.maxFramerate),
- framerate_controller_[sl_idx].GetTargetRate())));
+ kVideoPayloadTypeFrequency /
+ (std::min(static_cast<float>(codec_.maxFramerate),
+ framerate_controller_[sl_idx].GetTargetRate())));
}
}
@@ -1270,7 +1280,8 @@
framerate_controller_[num_active_spatial_layers_ - 1]
.GetTargetRate())
: codec_.maxFramerate;
- uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
+ uint32_t duration =
+ static_cast<uint32_t>(kVideoPayloadTypeFrequency / target_framerate_fps);
const vpx_codec_err_t rv = libvpx_->codec_encode(
encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME);
if (rv != VPX_CODEC_OK) {
@@ -1772,6 +1783,22 @@
int qp = -1;
libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
encoded_image_.qp_ = qp;
+ // Pull PSNR which is not pushed for VP9.
+ // TODO: bugs.webrtc.org/388070060 - check SVC behavior.
+ // TODO: bugs.webrtc.org/388070060 - this is broken for simulcast which seems
+ // to be using kSVC.
+ vpx_codec_iter_t iter = nullptr;
+ const vpx_codec_cx_pkt_t* cx_data = nullptr;
+ encoded_image_.set_psnr(std::nullopt);
+ while ((cx_data = vpx_codec_get_cx_data(encoder_, &iter)) != nullptr) {
+ if (cx_data->kind == VPX_CODEC_PSNR_PKT) {
+ // PSNR index: 0: total, 1: Y, 2: U, 3: V
+ encoded_image_.set_psnr(
+ EncodedImage::Psnr({.y = cx_data->data.psnr.psnr[1],
+ .u = cx_data->data.psnr.psnr[2],
+ .v = cx_data->data.psnr.psnr[3]}));
+ }
+ }
const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 ==
num_active_spatial_layers_;
diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
index d5ce610..5ccc3d7 100644
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
@@ -40,6 +40,7 @@
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/svc/simulcast_to_svc_converter.h"
+#include "modules/video_coding/utility/frame_sampler.h"
#include "modules/video_coding/utility/framerate_controller_deprecated.h"
#include "rtc_base/containers/flat_map.h"
#include "rtc_base/experiments/encoder_info_settings.h"
@@ -248,6 +249,11 @@
bool config_changed_;
const LibvpxVp9EncoderInfoSettings encoder_info_override_;
+
+ // Determine whether the frame should be sampled for PSNR.
+ FrameSampler psnr_frame_sampler_;
+ // TODO(webrtc:388070060): Remove after rollout.
+ const bool calculate_psnr_;
};
} // namespace webrtc
diff --git a/modules/video_coding/utility/frame_sampler.cc b/modules/video_coding/utility/frame_sampler.cc
new file mode 100644
index 0000000..b08f891
--- /dev/null
+++ b/modules/video_coding/utility/frame_sampler.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/utility/frame_sampler.h"
+
+#include "api/video/video_frame.h"
+#include "modules/include/module_common_types_public.h"
+
+namespace webrtc {
+
+constexpr int kTimestampDifference =
+ 90'000 - 1; // Sample every 90khz or once per second.
+
+bool FrameSampler::ShouldBeSampled(const VideoFrame& frame) {
+ if (!last_rtp_timestamp_sampled_.has_value() ||
+ (IsNewerTimestamp(frame.rtp_timestamp(),
+ *last_rtp_timestamp_sampled_ + kTimestampDifference))) {
+ last_rtp_timestamp_sampled_ = frame.rtp_timestamp();
+ return true;
+ }
+ return false;
+}
+
+} // namespace webrtc
diff --git a/modules/video_coding/utility/frame_sampler.h b/modules/video_coding/utility/frame_sampler.h
new file mode 100644
index 0000000..403c1e7
--- /dev/null
+++ b/modules/video_coding/utility/frame_sampler.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_UTILITY_FRAME_SAMPLER_H_
+#define MODULES_VIDEO_CODING_UTILITY_FRAME_SAMPLER_H_
+
+#include <cstdint>
+#include <optional>
+
+#include "api/video/video_frame.h"
+
+namespace webrtc {
+
+// Determine whether the frame should be sampled for operations
+// not done for every frame but only some of them. An example strategy
+// would be to require a minimum time elapsed between two frames based
+// on the RTP timestamp difference.
+class FrameSampler {
+ public:
+ FrameSampler() = default;
+ FrameSampler(const FrameSampler&) = delete;
+ FrameSampler& operator=(const FrameSampler&) = delete;
+
+ bool ShouldBeSampled(const VideoFrame& frame);
+
+ private:
+ std::optional<uint32_t> last_rtp_timestamp_sampled_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_VIDEO_CODING_UTILITY_FRAME_SAMPLER_H_
diff --git a/modules/video_coding/utility/frame_sampler_unittest.cc b/modules/video_coding/utility/frame_sampler_unittest.cc
new file mode 100644
index 0000000..0587338
--- /dev/null
+++ b/modules/video_coding/utility/frame_sampler_unittest.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/utility/frame_sampler.h"
+
+#include "api/make_ref_counted.h"
+#include "api/video/i420_buffer.h"
+#include "api/video/video_frame.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+TEST(FrameSampler, SamplesBasedOnRtpTimestamp) {
+ FrameSampler sampler;
+
+ auto buffer = make_ref_counted<I420Buffer>(320, 240);
+ VideoFrame frame =
+ VideoFrame::Builder().set_video_frame_buffer(buffer).build();
+
+ frame.set_rtp_timestamp(0);
+ EXPECT_TRUE(sampler.ShouldBeSampled(frame));
+ frame.set_rtp_timestamp(45'000);
+ EXPECT_FALSE(sampler.ShouldBeSampled(frame));
+ frame.set_rtp_timestamp(90'000);
+ EXPECT_TRUE(sampler.ShouldBeSampled(frame));
+}
+
+TEST(FrameSampler, RtpTimestampWraparound) {
+ FrameSampler sampler;
+
+ auto buffer = make_ref_counted<I420Buffer>(320, 240);
+ VideoFrame frame =
+ VideoFrame::Builder().set_video_frame_buffer(buffer).build();
+
+ // RTP timestamp wraps at 2**32.
+ frame.set_rtp_timestamp(0xffff'ffff - 4000);
+ EXPECT_TRUE(sampler.ShouldBeSampled(frame));
+ frame.set_rtp_timestamp(41'000);
+ EXPECT_FALSE(sampler.ShouldBeSampled(frame));
+ frame.set_rtp_timestamp(86'000);
+ EXPECT_TRUE(sampler.ShouldBeSampled(frame));
+}
+
+} // namespace webrtc