blob: d29db274088da987bd1e780a2d41633fca950729 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/video_processing/main/source/content_analysis.h"
#include <math.h>
#include <stdlib.h>
#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
#include "webrtc/system_wrappers/include/tick_util.h"
namespace webrtc {
VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
: orig_frame_(NULL),
prev_frame_(NULL),
width_(0),
height_(0),
skip_num_(1),
border_(8),
motion_magnitude_(0.0f),
spatial_pred_err_(0.0f),
spatial_pred_err_h_(0.0f),
spatial_pred_err_v_(0.0f),
first_frame_(true),
ca_Init_(false),
content_metrics_(NULL) {
ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
if (runtime_cpu_detection) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
if (WebRtc_GetCPUInfo(kSSE2)) {
ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
}
#endif
}
Release();
}
VPMContentAnalysis::~VPMContentAnalysis() {
Release();
}
VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
const VideoFrame& inputFrame) {
if (inputFrame.IsZeroSize())
return NULL;
// Init if needed (native dimension change).
if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
return NULL;
}
// Only interested in the Y plane.
orig_frame_ = inputFrame.buffer(kYPlane);
// Compute spatial metrics: 3 spatial prediction errors.
(this->*ComputeSpatialMetrics)();
// Compute motion metrics
if (first_frame_ == false)
ComputeMotionMetrics();
// Saving current frame as previous one: Y only.
memcpy(prev_frame_, orig_frame_, width_ * height_);
first_frame_ = false;
ca_Init_ = true;
return ContentMetrics();
}
int32_t VPMContentAnalysis::Release() {
if (content_metrics_ != NULL) {
delete content_metrics_;
content_metrics_ = NULL;
}
if (prev_frame_ != NULL) {
delete [] prev_frame_;
prev_frame_ = NULL;
}
width_ = 0;
height_ = 0;
first_frame_ = true;
return VPM_OK;
}
int32_t VPMContentAnalysis::Initialize(int width, int height) {
width_ = width;
height_ = height;
first_frame_ = true;
// skip parameter: # of skipped rows: for complexity reduction
// temporal also currently uses it for column reduction.
skip_num_ = 1;
// use skipNum = 2 for 4CIF, WHD
if ( (height_ >= 576) && (width_ >= 704) ) {
skip_num_ = 2;
}
// use skipNum = 4 for FULLL_HD images
if ( (height_ >= 1080) && (width_ >= 1920) ) {
skip_num_ = 4;
}
if (content_metrics_ != NULL) {
delete content_metrics_;
}
if (prev_frame_ != NULL) {
delete [] prev_frame_;
}
// Spatial Metrics don't work on a border of 8. Minimum processing
// block size is 16 pixels. So make sure the width and height support this.
if (width_ <= 32 || height_ <= 32) {
ca_Init_ = false;
return VPM_PARAMETER_ERROR;
}
content_metrics_ = new VideoContentMetrics();
if (content_metrics_ == NULL) {
return VPM_MEMORY;
}
prev_frame_ = new uint8_t[width_ * height_]; // Y only.
if (prev_frame_ == NULL) return VPM_MEMORY;
return VPM_OK;
}
// Compute motion metrics: magnitude over non-zero motion vectors,
// and size of zero cluster
int32_t VPMContentAnalysis::ComputeMotionMetrics() {
// Motion metrics: only one is derived from normalized
// (MAD) temporal difference
(this->*TemporalDiffMetric)();
return VPM_OK;
}
// Normalized temporal difference (MAD): used as a motion level metric
// Normalize MAD by spatial contrast: images with more contrast
// (pixel variance) likely have larger temporal difference
// To reduce complexity, we compute the metric for a reduced set of points.
int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
// size of original frame
int sizei = height_;
int sizej = width_;
uint32_t tempDiffSum = 0;
uint32_t pixelSum = 0;
uint64_t pixelSqSum = 0;
uint32_t num_pixels = 0; // Counter for # of pixels.
const int width_end = ((width_ - 2*border_) & -16) + border_;
for (int i = border_; i < sizei - border_; i += skip_num_) {
for (int j = border_; j < width_end; j++) {
num_pixels += 1;
int ssn = i * sizej + j;
uint8_t currPixel = orig_frame_[ssn];
uint8_t prevPixel = prev_frame_[ssn];
tempDiffSum += (uint32_t)abs((int16_t)(currPixel - prevPixel));
pixelSum += (uint32_t) currPixel;
pixelSqSum += (uint64_t) (currPixel * currPixel);
}
}
// Default.
motion_magnitude_ = 0.0f;
if (tempDiffSum == 0) return VPM_OK;
// Normalize over all pixels.
float const tempDiffAvg = (float)tempDiffSum / (float)(num_pixels);
float const pixelSumAvg = (float)pixelSum / (float)(num_pixels);
float const pixelSqSumAvg = (float)pixelSqSum / (float)(num_pixels);
float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
if (contrast > 0.0) {
contrast = sqrt(contrast);
motion_magnitude_ = tempDiffAvg/contrast;
}
return VPM_OK;
}
// Compute spatial metrics:
// To reduce complexity, we compute the metric for a reduced set of points.
// The spatial metrics are rough estimates of the prediction error cost for
// each QM spatial mode: 2x2,1x2,2x1
// The metrics are a simple estimate of the up-sampling prediction error,
// estimated assuming sub-sampling for decimation (no filtering),
// and up-sampling back up with simple bilinear interpolation.
int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
const int sizei = height_;
const int sizej = width_;
// Pixel mean square average: used to normalize the spatial metrics.
uint32_t pixelMSA = 0;
uint32_t spatialErrSum = 0;
uint32_t spatialErrVSum = 0;
uint32_t spatialErrHSum = 0;
// make sure work section is a multiple of 16
const int width_end = ((sizej - 2*border_) & -16) + border_;
for (int i = border_; i < sizei - border_; i += skip_num_) {
for (int j = border_; j < width_end; j++) {
int ssn1= i * sizej + j;
int ssn2 = (i + 1) * sizej + j; // bottom
int ssn3 = (i - 1) * sizej + j; // top
int ssn4 = i * sizej + j + 1; // right
int ssn5 = i * sizej + j - 1; // left
uint16_t refPixel1 = orig_frame_[ssn1] << 1;
uint16_t refPixel2 = orig_frame_[ssn1] << 2;
uint8_t bottPixel = orig_frame_[ssn2];
uint8_t topPixel = orig_frame_[ssn3];
uint8_t rightPixel = orig_frame_[ssn4];
uint8_t leftPixel = orig_frame_[ssn5];
spatialErrSum += (uint32_t) abs((int16_t)(refPixel2
- (uint16_t)(bottPixel + topPixel + leftPixel + rightPixel)));
spatialErrVSum += (uint32_t) abs((int16_t)(refPixel1
- (uint16_t)(bottPixel + topPixel)));
spatialErrHSum += (uint32_t) abs((int16_t)(refPixel1
- (uint16_t)(leftPixel + rightPixel)));
pixelMSA += orig_frame_[ssn1];
}
}
// Normalize over all pixels.
const float spatialErr = (float)(spatialErrSum >> 2);
const float spatialErrH = (float)(spatialErrHSum >> 1);
const float spatialErrV = (float)(spatialErrVSum >> 1);
const float norm = (float)pixelMSA;
// 2X2:
spatial_pred_err_ = spatialErr / norm;
// 1X2:
spatial_pred_err_h_ = spatialErrH / norm;
// 2X1:
spatial_pred_err_v_ = spatialErrV / norm;
return VPM_OK;
}
VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
if (ca_Init_ == false) return NULL;
content_metrics_->spatial_pred_err = spatial_pred_err_;
content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
// Motion metric: normalized temporal difference (MAD).
content_metrics_->motion_magnitude = motion_magnitude_;
return content_metrics_;
}
} // namespace webrtc