| /* |
| * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/video_coding/codecs/test/stats.h" |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <numeric> |
| |
| #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" |
| #include "rtc_base/checks.h" |
| #include "test/statistics.h" |
| |
| namespace webrtc { |
| namespace test { |
| |
| namespace { |
| const int kMaxBitrateMismatchPercent = 20; |
| } |
| |
| std::string FrameStatistics::ToString() const { |
| std::stringstream ss; |
| ss << "frame_number " << frame_number; |
| ss << " decoded_width " << decoded_width; |
| ss << " decoded_height " << decoded_height; |
| ss << " simulcast_svc_idx " << simulcast_svc_idx; |
| ss << " temporal_layer_idx " << temporal_layer_idx; |
| ss << " inter_layer_predicted " << inter_layer_predicted; |
| ss << " frame_type " << frame_type; |
| ss << " length_bytes " << length_bytes; |
| ss << " qp " << qp; |
| ss << " psnr " << psnr; |
| ss << " psnr_y " << psnr_y; |
| ss << " psnr_u " << psnr_u; |
| ss << " psnr_v " << psnr_v; |
| ss << " ssim " << ssim; |
| ss << " encode_time_us " << encode_time_us; |
| ss << " decode_time_us " << decode_time_us; |
| ss << " rtp_timestamp " << rtp_timestamp; |
| ss << " target_bitrate_kbps " << target_bitrate_kbps; |
| return ss.str(); |
| } |
| |
| std::string VideoStatistics::ToString(std::string prefix) const { |
| std::stringstream ss; |
| ss << prefix << "target_bitrate_kbps: " << target_bitrate_kbps; |
| ss << "\n" << prefix << "input_framerate_fps: " << input_framerate_fps; |
| ss << "\n" << prefix << "spatial_layer_idx: " << spatial_layer_idx; |
| ss << "\n" << prefix << "temporal_layer_idx: " << temporal_layer_idx; |
| ss << "\n" << prefix << "width: " << width; |
| ss << "\n" << prefix << "height: " << height; |
| ss << "\n" << prefix << "length_bytes: " << length_bytes; |
| ss << "\n" << prefix << "bitrate_kbps: " << bitrate_kbps; |
| ss << "\n" << prefix << "framerate_fps: " << framerate_fps; |
| ss << "\n" << prefix << "enc_speed_fps: " << enc_speed_fps; |
| ss << "\n" << prefix << "dec_speed_fps: " << dec_speed_fps; |
| ss << "\n" << prefix << "avg_delay_sec: " << avg_delay_sec; |
| ss << "\n" |
| << prefix << "max_key_frame_delay_sec: " << max_key_frame_delay_sec; |
| ss << "\n" |
| << prefix << "max_delta_frame_delay_sec: " << max_delta_frame_delay_sec; |
| ss << "\n" |
| << prefix << "time_to_reach_target_bitrate_sec: " |
| << time_to_reach_target_bitrate_sec; |
| ss << "\n" |
| << prefix << "avg_key_frame_size_bytes: " << avg_key_frame_size_bytes; |
| ss << "\n" |
| << prefix << "avg_delta_frame_size_bytes: " << avg_delta_frame_size_bytes; |
| ss << "\n" << prefix << "avg_qp: " << avg_qp; |
| ss << "\n" << prefix << "avg_psnr: " << avg_psnr; |
| ss << "\n" << prefix << "min_psnr: " << min_psnr; |
| ss << "\n" << prefix << "avg_ssim: " << avg_ssim; |
| ss << "\n" << prefix << "min_ssim: " << min_ssim; |
| ss << "\n" << prefix << "num_input_frames: " << num_input_frames; |
| ss << "\n" << prefix << "num_encoded_frames: " << num_encoded_frames; |
| ss << "\n" << prefix << "num_decoded_frames: " << num_decoded_frames; |
| ss << "\n" |
| << prefix |
| << "num_dropped_frames: " << num_input_frames - num_encoded_frames; |
| ss << "\n" << prefix << "num_key_frames: " << num_key_frames; |
| ss << "\n" << prefix << "num_spatial_resizes: " << num_spatial_resizes; |
| ss << "\n" << prefix << "max_nalu_size_bytes: " << max_nalu_size_bytes; |
| return ss.str(); |
| } |
| |
| FrameStatistics* Stats::AddFrame(size_t timestamp, size_t layer_idx) { |
| RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) == |
| rtp_timestamp_to_frame_num_[layer_idx].end()); |
| const size_t frame_num = layer_stats_[layer_idx].size(); |
| rtp_timestamp_to_frame_num_[layer_idx][timestamp] = frame_num; |
| layer_stats_[layer_idx].emplace_back(frame_num, timestamp); |
| return &layer_stats_[layer_idx].back(); |
| } |
| |
| FrameStatistics* Stats::GetFrame(size_t frame_num, size_t layer_idx) { |
| RTC_CHECK_LT(frame_num, layer_stats_[layer_idx].size()); |
| return &layer_stats_[layer_idx][frame_num]; |
| } |
| |
| FrameStatistics* Stats::GetFrameWithTimestamp(size_t timestamp, |
| size_t layer_idx) { |
| RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) != |
| rtp_timestamp_to_frame_num_[layer_idx].end()); |
| |
| return GetFrame(rtp_timestamp_to_frame_num_[layer_idx][timestamp], layer_idx); |
| } |
| |
| std::vector<VideoStatistics> Stats::SliceAndCalcLayerVideoStatistic( |
| size_t first_frame_num, |
| size_t last_frame_num) { |
| std::vector<VideoStatistics> layer_stats; |
| |
| size_t num_spatial_layers = 0; |
| size_t num_temporal_layers = 0; |
| GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers, |
| &num_temporal_layers); |
| RTC_CHECK_GT(num_spatial_layers, 0); |
| RTC_CHECK_GT(num_temporal_layers, 0); |
| |
| for (size_t spatial_layer_idx = 0; spatial_layer_idx < num_spatial_layers; |
| ++spatial_layer_idx) { |
| for (size_t temporal_layer_idx = 0; |
| temporal_layer_idx < num_temporal_layers; ++temporal_layer_idx) { |
| VideoStatistics layer_stat = SliceAndCalcVideoStatistic( |
| first_frame_num, last_frame_num, spatial_layer_idx, |
| temporal_layer_idx, false); |
| layer_stats.push_back(layer_stat); |
| } |
| } |
| |
| return layer_stats; |
| } |
| |
| VideoStatistics Stats::SliceAndCalcAggregatedVideoStatistic( |
| size_t first_frame_num, |
| size_t last_frame_num) { |
| size_t num_spatial_layers = 0; |
| size_t num_temporal_layers = 0; |
| GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers, |
| &num_temporal_layers); |
| RTC_CHECK_GT(num_spatial_layers, 0); |
| RTC_CHECK_GT(num_temporal_layers, 0); |
| |
| return SliceAndCalcVideoStatistic(first_frame_num, last_frame_num, |
| num_spatial_layers - 1, |
| num_temporal_layers - 1, true); |
| } |
| |
| void Stats::PrintFrameStatistics() { |
| for (size_t frame_num = 0; frame_num < layer_stats_[0].size(); ++frame_num) { |
| for (const auto& it : layer_stats_) { |
| const FrameStatistics& frame_stat = it.second[frame_num]; |
| printf("\n%s", frame_stat.ToString().c_str()); |
| } |
| } |
| } |
| |
| size_t Stats::Size(size_t spatial_layer_idx) { |
| return layer_stats_[spatial_layer_idx].size(); |
| } |
| |
| void Stats::Clear() { |
| layer_stats_.clear(); |
| rtp_timestamp_to_frame_num_.clear(); |
| } |
| |
| FrameStatistics Stats::AggregateFrameStatistic( |
| size_t frame_num, |
| size_t spatial_layer_idx, |
| bool aggregate_independent_layers) { |
| FrameStatistics frame_stat = *GetFrame(frame_num, spatial_layer_idx); |
| bool inter_layer_predicted = frame_stat.inter_layer_predicted; |
| while (spatial_layer_idx-- > 0) { |
| if (aggregate_independent_layers || inter_layer_predicted) { |
| FrameStatistics* base_frame_stat = GetFrame(frame_num, spatial_layer_idx); |
| frame_stat.length_bytes += base_frame_stat->length_bytes; |
| frame_stat.target_bitrate_kbps += base_frame_stat->target_bitrate_kbps; |
| |
| inter_layer_predicted = base_frame_stat->inter_layer_predicted; |
| } |
| } |
| |
| return frame_stat; |
| } |
| |
| size_t Stats::CalcLayerTargetBitrateKbps(size_t first_frame_num, |
| size_t last_frame_num, |
| size_t spatial_layer_idx, |
| size_t temporal_layer_idx, |
| bool aggregate_independent_layers) { |
| size_t target_bitrate_kbps = 0; |
| |
| // We don't know if superframe includes all required spatial layers because |
| // of possible frame drops. Run through all frames in specified range, find |
| // and return maximum target bitrate. Assume that target bitrate in frame |
| // statistic is specified per temporal layer. |
| for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; |
| ++frame_num) { |
| FrameStatistics superframe = AggregateFrameStatistic( |
| frame_num, spatial_layer_idx, aggregate_independent_layers); |
| |
| if (superframe.temporal_layer_idx <= temporal_layer_idx) { |
| target_bitrate_kbps = |
| std::max(target_bitrate_kbps, superframe.target_bitrate_kbps); |
| } |
| } |
| |
| RTC_DCHECK_GT(target_bitrate_kbps, 0); |
| return target_bitrate_kbps; |
| } |
| |
| VideoStatistics Stats::SliceAndCalcVideoStatistic( |
| size_t first_frame_num, |
| size_t last_frame_num, |
| size_t spatial_layer_idx, |
| size_t temporal_layer_idx, |
| bool aggregate_independent_layers) { |
| VideoStatistics video_stat; |
| |
| float buffer_level_bits = 0.0f; |
| Statistics buffer_level_sec; |
| |
| Statistics key_frame_size_bytes; |
| Statistics delta_frame_size_bytes; |
| |
| Statistics frame_encoding_time_us; |
| Statistics frame_decoding_time_us; |
| |
| Statistics psnr_y; |
| Statistics psnr_u; |
| Statistics psnr_v; |
| Statistics psnr; |
| Statistics ssim; |
| Statistics qp; |
| |
| size_t rtp_timestamp_first_frame = 0; |
| size_t rtp_timestamp_prev_frame = 0; |
| |
| FrameStatistics last_successfully_decoded_frame(0, 0); |
| |
| const size_t target_bitrate_kbps = CalcLayerTargetBitrateKbps( |
| first_frame_num, last_frame_num, spatial_layer_idx, temporal_layer_idx, |
| aggregate_independent_layers); |
| |
| for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; |
| ++frame_num) { |
| FrameStatistics frame_stat = AggregateFrameStatistic( |
| frame_num, spatial_layer_idx, aggregate_independent_layers); |
| |
| float time_since_first_frame_sec = |
| 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_first_frame) / |
| kVideoPayloadTypeFrequency; |
| float time_since_prev_frame_sec = |
| 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_prev_frame) / |
| kVideoPayloadTypeFrequency; |
| |
| if (frame_stat.temporal_layer_idx > temporal_layer_idx) { |
| continue; |
| } |
| |
| buffer_level_bits -= time_since_prev_frame_sec * 1000 * target_bitrate_kbps; |
| buffer_level_bits = std::max(0.0f, buffer_level_bits); |
| buffer_level_bits += 8.0 * frame_stat.length_bytes; |
| buffer_level_sec.AddSample(buffer_level_bits / |
| (1000 * target_bitrate_kbps)); |
| |
| video_stat.length_bytes += frame_stat.length_bytes; |
| |
| if (frame_stat.encoding_successful) { |
| ++video_stat.num_encoded_frames; |
| |
| if (frame_stat.frame_type == kVideoFrameKey) { |
| key_frame_size_bytes.AddSample(frame_stat.length_bytes); |
| ++video_stat.num_key_frames; |
| } else { |
| delta_frame_size_bytes.AddSample(frame_stat.length_bytes); |
| } |
| |
| frame_encoding_time_us.AddSample(frame_stat.encode_time_us); |
| qp.AddSample(frame_stat.qp); |
| |
| video_stat.max_nalu_size_bytes = std::max(video_stat.max_nalu_size_bytes, |
| frame_stat.max_nalu_size_bytes); |
| } |
| |
| if (frame_stat.decoding_successful) { |
| ++video_stat.num_decoded_frames; |
| |
| video_stat.width = frame_stat.decoded_width; |
| video_stat.height = frame_stat.decoded_height; |
| |
| psnr_y.AddSample(frame_stat.psnr_y); |
| psnr_u.AddSample(frame_stat.psnr_u); |
| psnr_v.AddSample(frame_stat.psnr_v); |
| psnr.AddSample(frame_stat.psnr); |
| ssim.AddSample(frame_stat.ssim); |
| |
| if (video_stat.num_decoded_frames > 1) { |
| if (last_successfully_decoded_frame.decoded_width != |
| frame_stat.decoded_width || |
| last_successfully_decoded_frame.decoded_height != |
| frame_stat.decoded_height) { |
| ++video_stat.num_spatial_resizes; |
| } |
| } |
| |
| frame_decoding_time_us.AddSample(frame_stat.decode_time_us); |
| last_successfully_decoded_frame = frame_stat; |
| } |
| |
| if (video_stat.num_input_frames > 0) { |
| if (video_stat.time_to_reach_target_bitrate_sec == 0.0f) { |
| const float curr_kbps = |
| 8.0 * video_stat.length_bytes / 1000 / time_since_first_frame_sec; |
| const float bitrate_mismatch_percent = |
| 100 * std::fabs(curr_kbps - target_bitrate_kbps) / |
| target_bitrate_kbps; |
| if (bitrate_mismatch_percent < kMaxBitrateMismatchPercent) { |
| video_stat.time_to_reach_target_bitrate_sec = |
| time_since_first_frame_sec; |
| } |
| } |
| } |
| |
| rtp_timestamp_prev_frame = frame_stat.rtp_timestamp; |
| if (video_stat.num_input_frames == 0) { |
| rtp_timestamp_first_frame = frame_stat.rtp_timestamp; |
| } |
| |
| ++video_stat.num_input_frames; |
| } |
| |
| const size_t num_frames = last_frame_num - first_frame_num + 1; |
| const size_t timestamp_delta = |
| GetFrame(first_frame_num + 1, spatial_layer_idx)->rtp_timestamp - |
| GetFrame(first_frame_num, spatial_layer_idx)->rtp_timestamp; |
| const float input_framerate_fps = |
| 1.0 * kVideoPayloadTypeFrequency / timestamp_delta; |
| const float duration_sec = num_frames / input_framerate_fps; |
| |
| video_stat.target_bitrate_kbps = target_bitrate_kbps; |
| video_stat.input_framerate_fps = input_framerate_fps; |
| |
| video_stat.spatial_layer_idx = spatial_layer_idx; |
| video_stat.temporal_layer_idx = temporal_layer_idx; |
| |
| video_stat.bitrate_kbps = |
| static_cast<size_t>(8 * video_stat.length_bytes / 1000 / duration_sec); |
| video_stat.framerate_fps = video_stat.num_encoded_frames / duration_sec; |
| |
| video_stat.enc_speed_fps = 1000000 / frame_encoding_time_us.Mean(); |
| video_stat.dec_speed_fps = 1000000 / frame_decoding_time_us.Mean(); |
| |
| video_stat.avg_delay_sec = buffer_level_sec.Mean(); |
| video_stat.max_key_frame_delay_sec = |
| 8 * key_frame_size_bytes.Max() / 1000 / target_bitrate_kbps; |
| video_stat.max_delta_frame_delay_sec = |
| 8 * delta_frame_size_bytes.Max() / 1000 / target_bitrate_kbps; |
| |
| video_stat.avg_key_frame_size_bytes = key_frame_size_bytes.Mean(); |
| video_stat.avg_delta_frame_size_bytes = delta_frame_size_bytes.Mean(); |
| video_stat.avg_qp = qp.Mean(); |
| |
| video_stat.avg_psnr_y = psnr_y.Mean(); |
| video_stat.avg_psnr_u = psnr_u.Mean(); |
| video_stat.avg_psnr_v = psnr_v.Mean(); |
| video_stat.avg_psnr = psnr.Mean(); |
| video_stat.min_psnr = psnr.Min(); |
| video_stat.avg_ssim = ssim.Mean(); |
| video_stat.min_ssim = ssim.Min(); |
| |
| return video_stat; |
| } |
| |
| void Stats::GetNumberOfEncodedLayers(size_t first_frame_num, |
| size_t last_frame_num, |
| size_t* num_encoded_spatial_layers, |
| size_t* num_encoded_temporal_layers) { |
| *num_encoded_spatial_layers = 0; |
| *num_encoded_temporal_layers = 0; |
| |
| const size_t num_spatial_layers = layer_stats_.size(); |
| |
| for (size_t frame_num = first_frame_num; frame_num <= last_frame_num; |
| ++frame_num) { |
| for (size_t spatial_layer_idx = 0; spatial_layer_idx < num_spatial_layers; |
| ++spatial_layer_idx) { |
| FrameStatistics* frame_stat = GetFrame(frame_num, spatial_layer_idx); |
| if (frame_stat->encoding_successful) { |
| *num_encoded_spatial_layers = std::max( |
| *num_encoded_spatial_layers, frame_stat->simulcast_svc_idx + 1); |
| *num_encoded_temporal_layers = std::max( |
| *num_encoded_temporal_layers, frame_stat->temporal_layer_idx + 1); |
| } |
| } |
| } |
| } |
| |
| } // namespace test |
| } // namespace webrtc |