webrtc/modules/video_coding/main/source/qm_select.h - src - Git at Google

 /*
  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_

 #include "webrtc/common_types.h"
 #include "webrtc/typedefs.h"

 /******************************************************/
 /* Quality Modes: Resolution and Robustness settings  */
 /******************************************************/

 namespace webrtc {
 struct VideoContentMetrics;

 struct VCMResolutionScale {
   VCMResolutionScale()
       : codec_width(640),
         codec_height(480),
         frame_rate(30.0f),
         spatial_width_fact(1.0f),
         spatial_height_fact(1.0f),
         temporal_fact(1.0f),
         change_resolution_spatial(false),
         change_resolution_temporal(false) {
   }
   uint16_t codec_width;
   uint16_t codec_height;
   float frame_rate;
   float spatial_width_fact;
   float spatial_height_fact;
   float temporal_fact;
   bool change_resolution_spatial;
   bool change_resolution_temporal;
 };

 enum ImageType {
   kQCIF = 0,            // 176x144
   kHCIF,                // 264x216 = half(~3/4x3/4) CIF.
   kQVGA,                // 320x240 = quarter VGA.
   kCIF,                 // 352x288
   kHVGA,                // 480x360 = half(~3/4x3/4) VGA.
   kVGA,                 // 640x480
   kQFULLHD,             // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
   kWHD,                 // 1280x720
   kFULLHD,              // 1920x1080
   kNumImageTypes
 };

 const uint32_t kSizeOfImageType[kNumImageTypes] =
 { 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };

 enum FrameRateLevelClass {
   kFrameRateLow,
   kFrameRateMiddle1,
   kFrameRateMiddle2,
   kFrameRateHigh
 };

 enum ContentLevelClass {
   kLow,
   kHigh,
   kDefault
 };

 struct VCMContFeature {
   VCMContFeature()
       : value(0.0f),
         level(kDefault) {
   }
   void Reset() {
     value = 0.0f;
     level = kDefault;
   }
   float value;
   ContentLevelClass level;
 };

 enum UpDownAction {
   kUpResolution,
   kDownResolution
 };

 enum SpatialAction {
   kNoChangeSpatial,
   kOneHalfSpatialUniform,        // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
   kOneQuarterSpatialUniform,     // 1/2 x 1/2: 1/4 pixel reduction.
   kNumModesSpatial
 };

 enum TemporalAction {
   kNoChangeTemporal,
   kTwoThirdsTemporal,     // 2/3 frame rate reduction
   kOneHalfTemporal,       // 1/2 frame rate reduction
   kNumModesTemporal
 };

 struct ResolutionAction {
   ResolutionAction()
       : spatial(kNoChangeSpatial),
         temporal(kNoChangeTemporal) {
   }
   SpatialAction spatial;
   TemporalAction temporal;
 };

 // Down-sampling factors for spatial (width and height), and temporal.
 const float kFactorWidthSpatial[kNumModesSpatial] =
     { 1.0f, 4.0f / 3.0f, 2.0f };

 const float kFactorHeightSpatial[kNumModesSpatial] =
     { 1.0f, 4.0f / 3.0f, 2.0f };

 const float kFactorTemporal[kNumModesTemporal] =
     { 1.0f, 1.5f, 2.0f };

 enum EncoderState {
   kStableEncoding,    // Low rate mis-match, stable buffer levels.
   kStressedEncoding,  // Significant over-shooting of target rate,
                       // Buffer under-flow, etc.
   kEasyEncoding       // Significant under-shooting of target rate.
 };

 // QmMethod class: main class for resolution and robustness settings

 class VCMQmMethod {
  public:
   VCMQmMethod();
   virtual ~VCMQmMethod();

   // Reset values
   void ResetQM();
   virtual void Reset() = 0;

   // Compute content class.
   uint8_t ComputeContentClass();

   // Update with the content metrics.
   void UpdateContent(const VideoContentMetrics* content_metrics);

   // Compute spatial texture magnitude and level.
   // Spatial texture is a spatial prediction error measure.
   void ComputeSpatial();

   // Compute motion magnitude and level for NFD metric.
   // NFD is normalized frame difference (normalized by spatial variance).
   void ComputeMotionNFD();

   // Get the imageType (CIF, VGA, HD, etc) for the system width/height.
   ImageType GetImageType(uint16_t width, uint16_t height);

   // Return the closest image type.
   ImageType FindClosestImageType(uint16_t width, uint16_t height);

   // Get the frame rate level.
   FrameRateLevelClass FrameRateLevel(float frame_rate);

  protected:
   // Content Data.
   const VideoContentMetrics* content_metrics_;

   // Encoder frame sizes and native frame sizes.
   uint16_t width_;
   uint16_t height_;
   float user_frame_rate_;
   uint16_t native_width_;
   uint16_t native_height_;
   float native_frame_rate_;
   float aspect_ratio_;
   // Image type and frame rate leve, for the current encoder resolution.
   ImageType image_type_;
   FrameRateLevelClass framerate_level_;
   // Content class data.
   VCMContFeature motion_;
   VCMContFeature spatial_;
   uint8_t content_class_;
   bool init_;
 };

 // Resolution settings class

 class VCMQmResolution : public VCMQmMethod {
  public:
   VCMQmResolution();
   virtual ~VCMQmResolution();

   // Reset all quantities.
   virtual void Reset();

   // Reset rate quantities and counters after every SelectResolution() call.
   void ResetRates();

   // Reset down-sampling state.
   void ResetDownSamplingState();

   // Get the encoder state.
   EncoderState GetEncoderState();

   // Initialize after SetEncodingData in media_opt.
   int Initialize(float bitrate,
                  float user_framerate,
                  uint16_t width,
                  uint16_t height,
                  int num_layers);

   // Update the encoder frame size.
   void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);

   // Update with actual bit rate (size of the latest encoded frame)
   // and frame type, after every encoded frame.
   void UpdateEncodedSize(size_t encoded_size);

   // Update with new target bitrate, actual encoder sent rate, frame_rate,
   // loss rate: every ~1 sec from SetTargetRates in media_opt.
   void UpdateRates(float target_bitrate,
                    float encoder_sent_rate,
                    float incoming_framerate,
                    uint8_t packet_loss);

   // Extract ST (spatio-temporal) resolution action.
   // Inputs: qm: Reference to the quality modes pointer.
   // Output: the spatial and/or temporal scale change.
   int SelectResolution(VCMResolutionScale** qm);

  private:
   // Set the default resolution action.
   void SetDefaultAction();

   // Compute rates for the selection of down-sampling action.
   void ComputeRatesForSelection();

   // Compute the encoder state.
   void ComputeEncoderState();

   // Return true if the action is to go back up in resolution.
   bool GoingUpResolution();

   // Return true if the action is to go down in resolution.
   bool GoingDownResolution();

   // Check the condition for going up in resolution by the scale factors:
   // |facWidth|, |facHeight|, |facTemp|.
   // |scaleFac| is a scale factor for the transition rate.
   bool ConditionForGoingUp(float fac_width,
                            float fac_height,
                            float fac_temp,
                            float scale_fac);

   // Get the bitrate threshold for the resolution action.
   // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
   // |scaleFac| is a scale factor for the transition rate.
   float GetTransitionRate(float fac_width,
                           float fac_height,
                           float fac_temp,
                           float scale_fac);

   // Update the down-sampling state.
   void UpdateDownsamplingState(UpDownAction up_down);

   // Update the codec frame size and frame rate.
   void UpdateCodecResolution();

   // Return a state based on average target rate relative transition rate.
   uint8_t RateClass(float transition_rate);

   // Adjust the action selected from the table.
   void AdjustAction();

   // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
   void ConvertSpatialFractionalToWhole();

   // Returns true if the new frame sizes, under the selected spatial action,
   // are of even size.
   bool EvenFrameSize();

   // Insert latest down-sampling action into the history list.
   void InsertLatestDownAction();

   // Remove the last (first element) down-sampling action from the list.
   void RemoveLastDownAction();

   // Check constraints on the amount of down-sampling allowed.
   void ConstrainAmountOfDownSampling();

   // For going up in resolution: pick spatial or temporal action,
   // if both actions were separately selected.
   void PickSpatialOrTemporal();

   // Select the directional (1x2 or 2x1) spatial down-sampling action.
   void SelectSpatialDirectionMode(float transition_rate);

   enum { kDownActionHistorySize = 10};

   VCMResolutionScale* qm_;
   // Encoder rate control parameters.
   float target_bitrate_;
   float incoming_framerate_;
   float per_frame_bandwidth_;
   float buffer_level_;

   // Data accumulated every ~1sec from MediaOpt.
   float sum_target_rate_;
   float sum_incoming_framerate_;
   float sum_rate_MM_;
   float sum_rate_MM_sgn_;
   float sum_packet_loss_;
   // Counters.
   uint32_t frame_cnt_;
   uint32_t frame_cnt_delta_;
   uint32_t update_rate_cnt_;
   uint32_t low_buffer_cnt_;

   // Resolution state parameters.
   float state_dec_factor_spatial_;
   float state_dec_factor_temporal_;

   // Quantities used for selection.
   float avg_target_rate_;
   float avg_incoming_framerate_;
   float avg_ratio_buffer_low_;
   float avg_rate_mismatch_;
   float avg_rate_mismatch_sgn_;
   float avg_packet_loss_;
   EncoderState encoder_state_;
   ResolutionAction action_;
   // Short history of the down-sampling actions from the Initialize() state.
   // This is needed for going up in resolution. Since the total amount of
   // down-sampling actions are constrained, the length of the list need not be
   // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
   ResolutionAction down_action_history_[kDownActionHistorySize];
   int num_layers_;
 };

 // Robustness settings class.

 class VCMQmRobustness : public VCMQmMethod {
  public:
   VCMQmRobustness();
   ~VCMQmRobustness();

   virtual void Reset();

   // Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
   // Returns an adjustment factor.
   float AdjustFecFactor(uint8_t code_rate_delta,
                         float total_rate,
                         float framerate,
                         int64_t rtt_time,
                         uint8_t packet_loss);

   // Set the UEP protection on/off.
   bool SetUepProtection(uint8_t code_rate_delta,
                         float total_rate,
                         uint8_t packet_loss,
                         bool frame_type);

  private:
   // Previous state of network parameters.
   float prev_total_rate_;
   int64_t prev_rtt_time_;
   uint8_t prev_packet_loss_;
   uint8_t prev_code_rate_delta_;
 };
 }  // namespace webrtc
 #endif  // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
	/*
	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
	#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_

	#include "webrtc/common_types.h"
	#include "webrtc/typedefs.h"

	/******************************************************/
	/* Quality Modes: Resolution and Robustness settings */
	/******************************************************/

	namespace webrtc {
	struct VideoContentMetrics;

	struct VCMResolutionScale {
	VCMResolutionScale()
	: codec_width(640),
	codec_height(480),
	frame_rate(30.0f),
	spatial_width_fact(1.0f),
	spatial_height_fact(1.0f),
	temporal_fact(1.0f),
	change_resolution_spatial(false),
	change_resolution_temporal(false) {
	}
	uint16_t codec_width;
	uint16_t codec_height;
	float frame_rate;
	float spatial_width_fact;
	float spatial_height_fact;
	float temporal_fact;
	bool change_resolution_spatial;
	bool change_resolution_temporal;
	};

	enum ImageType {
	kQCIF = 0, // 176x144
	kHCIF, // 264x216 = half(~3/4x3/4) CIF.
	kQVGA, // 320x240 = quarter VGA.
	kCIF, // 352x288
	kHVGA, // 480x360 = half(~3/4x3/4) VGA.
	kVGA, // 640x480
	kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
	kWHD, // 1280x720
	kFULLHD, // 1920x1080
	kNumImageTypes
	};

	const uint32_t kSizeOfImageType[kNumImageTypes] =
	{ 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };

	enum FrameRateLevelClass {
	kFrameRateLow,
	kFrameRateMiddle1,
	kFrameRateMiddle2,
	kFrameRateHigh
	};

	enum ContentLevelClass {
	kLow,
	kHigh,
	kDefault
	};

	struct VCMContFeature {
	VCMContFeature()
	: value(0.0f),
	level(kDefault) {
	}
	void Reset() {
	value = 0.0f;
	level = kDefault;
	}
	float value;
	ContentLevelClass level;
	};

	enum UpDownAction {
	kUpResolution,
	kDownResolution
	};

	enum SpatialAction {
	kNoChangeSpatial,
	kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
	kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction.
	kNumModesSpatial
	};

	enum TemporalAction {
	kNoChangeTemporal,
	kTwoThirdsTemporal, // 2/3 frame rate reduction
	kOneHalfTemporal, // 1/2 frame rate reduction
	kNumModesTemporal
	};

	struct ResolutionAction {
	ResolutionAction()
	: spatial(kNoChangeSpatial),
	temporal(kNoChangeTemporal) {
	}
	SpatialAction spatial;
	TemporalAction temporal;
	};

	// Down-sampling factors for spatial (width and height), and temporal.
	const float kFactorWidthSpatial[kNumModesSpatial] =
	{ 1.0f, 4.0f / 3.0f, 2.0f };

	const float kFactorHeightSpatial[kNumModesSpatial] =
	{ 1.0f, 4.0f / 3.0f, 2.0f };

	const float kFactorTemporal[kNumModesTemporal] =
	{ 1.0f, 1.5f, 2.0f };

	enum EncoderState {
	kStableEncoding, // Low rate mis-match, stable buffer levels.
	kStressedEncoding, // Significant over-shooting of target rate,
	// Buffer under-flow, etc.
	kEasyEncoding // Significant under-shooting of target rate.
	};

	// QmMethod class: main class for resolution and robustness settings

	class VCMQmMethod {
	public:
	VCMQmMethod();
	virtual ~VCMQmMethod();

	// Reset values
	void ResetQM();
	virtual void Reset() = 0;

	// Compute content class.
	uint8_t ComputeContentClass();

	// Update with the content metrics.
	void UpdateContent(const VideoContentMetrics* content_metrics);

	// Compute spatial texture magnitude and level.
	// Spatial texture is a spatial prediction error measure.
	void ComputeSpatial();

	// Compute motion magnitude and level for NFD metric.
	// NFD is normalized frame difference (normalized by spatial variance).
	void ComputeMotionNFD();

	// Get the imageType (CIF, VGA, HD, etc) for the system width/height.
	ImageType GetImageType(uint16_t width, uint16_t height);

	// Return the closest image type.
	ImageType FindClosestImageType(uint16_t width, uint16_t height);

	// Get the frame rate level.
	FrameRateLevelClass FrameRateLevel(float frame_rate);

	protected:
	// Content Data.
	const VideoContentMetrics* content_metrics_;

	// Encoder frame sizes and native frame sizes.
	uint16_t width_;
	uint16_t height_;
	float user_frame_rate_;
	uint16_t native_width_;
	uint16_t native_height_;
	float native_frame_rate_;
	float aspect_ratio_;
	// Image type and frame rate leve, for the current encoder resolution.
	ImageType image_type_;
	FrameRateLevelClass framerate_level_;
	// Content class data.
	VCMContFeature motion_;
	VCMContFeature spatial_;
	uint8_t content_class_;
	bool init_;
	};

	// Resolution settings class

	class VCMQmResolution : public VCMQmMethod {
	public:
	VCMQmResolution();
	virtual ~VCMQmResolution();

	// Reset all quantities.
	virtual void Reset();

	// Reset rate quantities and counters after every SelectResolution() call.
	void ResetRates();

	// Reset down-sampling state.
	void ResetDownSamplingState();

	// Get the encoder state.
	EncoderState GetEncoderState();

	// Initialize after SetEncodingData in media_opt.
	int Initialize(float bitrate,
	float user_framerate,
	uint16_t width,
	uint16_t height,
	int num_layers);

	// Update the encoder frame size.
	void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);

	// Update with actual bit rate (size of the latest encoded frame)
	// and frame type, after every encoded frame.
	void UpdateEncodedSize(size_t encoded_size);

	// Update with new target bitrate, actual encoder sent rate, frame_rate,
	// loss rate: every ~1 sec from SetTargetRates in media_opt.
	void UpdateRates(float target_bitrate,
	float encoder_sent_rate,
	float incoming_framerate,
	uint8_t packet_loss);

	// Extract ST (spatio-temporal) resolution action.
	// Inputs: qm: Reference to the quality modes pointer.
	// Output: the spatial and/or temporal scale change.
	int SelectResolution(VCMResolutionScale** qm);

	private:
	// Set the default resolution action.
	void SetDefaultAction();

	// Compute rates for the selection of down-sampling action.
	void ComputeRatesForSelection();

	// Compute the encoder state.
	void ComputeEncoderState();

	// Return true if the action is to go back up in resolution.
	bool GoingUpResolution();

	// Return true if the action is to go down in resolution.
	bool GoingDownResolution();

	// Check the condition for going up in resolution by the scale factors:
	// \|facWidth\|, \|facHeight\|, \|facTemp\|.
	// \|scaleFac\| is a scale factor for the transition rate.
	bool ConditionForGoingUp(float fac_width,
	float fac_height,
	float fac_temp,
	float scale_fac);

	// Get the bitrate threshold for the resolution action.
	// The case \|facWidth\|=\|facHeight\|=\|facTemp\|==1 is for down-sampling action.
	// \|scaleFac\| is a scale factor for the transition rate.
	float GetTransitionRate(float fac_width,
	float fac_height,
	float fac_temp,
	float scale_fac);

	// Update the down-sampling state.
	void UpdateDownsamplingState(UpDownAction up_down);

	// Update the codec frame size and frame rate.
	void UpdateCodecResolution();

	// Return a state based on average target rate relative transition rate.
	uint8_t RateClass(float transition_rate);

	// Adjust the action selected from the table.
	void AdjustAction();

	// Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
	void ConvertSpatialFractionalToWhole();

	// Returns true if the new frame sizes, under the selected spatial action,
	// are of even size.
	bool EvenFrameSize();

	// Insert latest down-sampling action into the history list.
	void InsertLatestDownAction();

	// Remove the last (first element) down-sampling action from the list.
	void RemoveLastDownAction();

	// Check constraints on the amount of down-sampling allowed.
	void ConstrainAmountOfDownSampling();

	// For going up in resolution: pick spatial or temporal action,
	// if both actions were separately selected.
	void PickSpatialOrTemporal();

	// Select the directional (1x2 or 2x1) spatial down-sampling action.
	void SelectSpatialDirectionMode(float transition_rate);

	enum { kDownActionHistorySize = 10};

	VCMResolutionScale* qm_;
	// Encoder rate control parameters.
	float target_bitrate_;
	float incoming_framerate_;
	float per_frame_bandwidth_;
	float buffer_level_;

	// Data accumulated every ~1sec from MediaOpt.
	float sum_target_rate_;
	float sum_incoming_framerate_;
	float sum_rate_MM_;
	float sum_rate_MM_sgn_;
	float sum_packet_loss_;
	// Counters.
	uint32_t frame_cnt_;
	uint32_t frame_cnt_delta_;
	uint32_t update_rate_cnt_;
	uint32_t low_buffer_cnt_;

	// Resolution state parameters.
	float state_dec_factor_spatial_;
	float state_dec_factor_temporal_;

	// Quantities used for selection.
	float avg_target_rate_;
	float avg_incoming_framerate_;
	float avg_ratio_buffer_low_;
	float avg_rate_mismatch_;
	float avg_rate_mismatch_sgn_;
	float avg_packet_loss_;
	EncoderState encoder_state_;
	ResolutionAction action_;
	// Short history of the down-sampling actions from the Initialize() state.
	// This is needed for going up in resolution. Since the total amount of
	// down-sampling actions are constrained, the length of the list need not be
	// large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
	ResolutionAction down_action_history_[kDownActionHistorySize];
	int num_layers_;
	};

	// Robustness settings class.

	class VCMQmRobustness : public VCMQmMethod {
	public:
	VCMQmRobustness();
	~VCMQmRobustness();

	virtual void Reset();

	// Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
	// Returns an adjustment factor.
	float AdjustFecFactor(uint8_t code_rate_delta,
	float total_rate,
	float framerate,
	int64_t rtt_time,
	uint8_t packet_loss);

	// Set the UEP protection on/off.
	bool SetUepProtection(uint8_t code_rate_delta,
	float total_rate,
	uint8_t packet_loss,
	bool frame_type);

	private:
	// Previous state of network parameters.
	float prev_total_rate_;
	int64_t prev_rtt_time_;
	uint8_t prev_packet_loss_;
	uint8_t prev_code_rate_delta_;
	};
	} // namespace webrtc
	#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_