In ksvc controller reuse unused frame configuration
vp9 encoder wrapper rely on that behaviour
to generate vp9-specific temporal references
Bug: webrtc:11999
Change-Id: I35536af4eca76450e2f72777e06ad3af872a5800
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211340
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33445}
diff --git a/modules/video_coding/svc/scalability_structure_key_svc.cc b/modules/video_coding/svc/scalability_structure_key_svc.cc
index c430aa4..1cee80e 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc.cc
@@ -22,28 +22,6 @@
#include "rtc_base/logging.h"
namespace webrtc {
-namespace {
-// Values to use as LayerFrameConfig::Id
-enum : int { kKey, kDelta };
-
-DecodeTargetIndication
-Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) {
- if (config.IsKeyframe() || config.Id() == kKey) {
- RTC_DCHECK_EQ(config.TemporalId(), 0);
- return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
- : DecodeTargetIndication::kSwitch;
- }
-
- if (sid != config.SpatialId() || tid < config.TemporalId()) {
- return DecodeTargetIndication::kNotPresent;
- }
- if (tid == config.TemporalId() && tid > 0) {
- return DecodeTargetIndication::kDiscardable;
- }
- return DecodeTargetIndication::kSwitch;
-}
-
-} // namespace
constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers;
constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers;
@@ -88,6 +66,25 @@
return false;
}
+DecodeTargetIndication ScalabilityStructureKeySvc::Dti(
+ int sid,
+ int tid,
+ const LayerFrameConfig& config) {
+ if (config.IsKeyframe() || config.Id() == kKey) {
+ RTC_DCHECK_EQ(config.TemporalId(), 0);
+ return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
+ : DecodeTargetIndication::kSwitch;
+ }
+
+ if (sid != config.SpatialId() || tid < config.TemporalId()) {
+ return DecodeTargetIndication::kNotPresent;
+ }
+ if (tid == config.TemporalId() && tid > 0) {
+ return DecodeTargetIndication::kDiscardable;
+ }
+ return DecodeTargetIndication::kSwitch;
+}
+
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::KeyframeConfig() {
std::vector<LayerFrameConfig> configs;
@@ -129,7 +126,7 @@
continue;
}
configs.emplace_back();
- configs.back().Id(kDelta).S(sid).T(0).ReferenceAndUpdate(
+ configs.back().Id(kDeltaT0).S(sid).T(0).ReferenceAndUpdate(
BufferIndex(sid, /*tid=*/0));
}
return configs;
@@ -145,7 +142,7 @@
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
- config.Id(kDelta).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
+ config.Id(kDeltaT1).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
if (num_temporal_layers_ > 2) {
config.Update(BufferIndex(sid, /*tid=*/1));
}
@@ -154,7 +151,7 @@
}
std::vector<ScalableVideoController::LayerFrameConfig>
-ScalabilityStructureKeySvc::T2Config() {
+ScalabilityStructureKeySvc::T2Config(FramePattern pattern) {
std::vector<LayerFrameConfig> configs;
configs.reserve(num_spatial_layers_);
for (int sid = 0; sid < num_spatial_layers_; ++sid) {
@@ -163,7 +160,7 @@
}
configs.emplace_back();
ScalableVideoController::LayerFrameConfig& config = configs.back();
- config.Id(kDelta).S(sid).T(2);
+ config.Id(pattern).S(sid).T(2);
if (can_reference_t1_frame_for_spatial_id_[sid]) {
config.Reference(BufferIndex(sid, /*tid=*/1));
} else {
@@ -173,6 +170,37 @@
return configs;
}
+ScalabilityStructureKeySvc::FramePattern
+ScalabilityStructureKeySvc::NextPattern(FramePattern last_pattern) const {
+ switch (last_pattern) {
+ case kNone:
+ return kKey;
+ case kDeltaT2B:
+ return kDeltaT0;
+ case kDeltaT2A:
+ if (TemporalLayerIsActive(1)) {
+ return kDeltaT1;
+ }
+ return kDeltaT0;
+ case kDeltaT1:
+ if (TemporalLayerIsActive(2)) {
+ return kDeltaT2B;
+ }
+ return kDeltaT0;
+ case kDeltaT0:
+ case kKey:
+ if (TemporalLayerIsActive(2)) {
+ return kDeltaT2A;
+ }
+ if (TemporalLayerIsActive(1)) {
+ return kDeltaT1;
+ }
+ return kDeltaT0;
+ }
+ RTC_NOTREACHED();
+ return kNone;
+}
+
std::vector<ScalableVideoController::LayerFrameConfig>
ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
if (active_decode_targets_.none()) {
@@ -184,37 +212,19 @@
last_pattern_ = kNone;
}
- switch (last_pattern_) {
- case kNone:
- last_pattern_ = kDeltaT0;
+ FramePattern current_pattern = NextPattern(last_pattern_);
+ switch (current_pattern) {
+ case kKey:
return KeyframeConfig();
- case kDeltaT2B:
- last_pattern_ = kDeltaT0;
- return T0Config();
- case kDeltaT2A:
- if (TemporalLayerIsActive(1)) {
- last_pattern_ = kDeltaT1;
- return T1Config();
- }
- last_pattern_ = kDeltaT0;
+ case kDeltaT0:
return T0Config();
case kDeltaT1:
- if (TemporalLayerIsActive(2)) {
- last_pattern_ = kDeltaT2B;
- return T2Config();
- }
- last_pattern_ = kDeltaT0;
- return T0Config();
- case kDeltaT0:
- if (TemporalLayerIsActive(2)) {
- last_pattern_ = kDeltaT2A;
- return T2Config();
- } else if (TemporalLayerIsActive(1)) {
- last_pattern_ = kDeltaT1;
- return T1Config();
- }
- last_pattern_ = kDeltaT0;
- return T0Config();
+ return T1Config();
+ case kDeltaT2A:
+ case kDeltaT2B:
+ return T2Config(current_pattern);
+ case kNone:
+ break;
}
RTC_NOTREACHED();
return {};
@@ -222,6 +232,11 @@
GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
const LayerFrameConfig& config) {
+ // When encoder drops all frames for a temporal unit, it is better to reuse
+ // old temporal pattern rather than switch to next one, thus switch to next
+ // pattern defered here from the `NextFrameConfig`.
+ // In particular creating VP9 references rely on this behavior.
+ last_pattern_ = static_cast<FramePattern>(config.Id());
if (config.TemporalId() == 1) {
can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
}
diff --git a/modules/video_coding/svc/scalability_structure_key_svc.h b/modules/video_coding/svc/scalability_structure_key_svc.h
index 110c2a8..b66f6f8 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc.h
+++ b/modules/video_coding/svc/scalability_structure_key_svc.h
@@ -32,8 +32,9 @@
void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override;
private:
- enum FramePattern {
+ enum FramePattern : int {
kNone,
+ kKey,
kDeltaT0,
kDeltaT2A,
kDeltaT1,
@@ -53,10 +54,16 @@
active_decode_targets_.set(sid * num_temporal_layers_ + tid, value);
}
bool TemporalLayerIsActive(int tid) const;
+ static DecodeTargetIndication Dti(int sid,
+ int tid,
+ const LayerFrameConfig& config);
+
std::vector<LayerFrameConfig> KeyframeConfig();
std::vector<LayerFrameConfig> T0Config();
std::vector<LayerFrameConfig> T1Config();
- std::vector<LayerFrameConfig> T2Config();
+ std::vector<LayerFrameConfig> T2Config(FramePattern pattern);
+
+ FramePattern NextPattern(FramePattern last_pattern) const;
const int num_spatial_layers_;
const int num_temporal_layers_;
diff --git a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
index 34ec747..5f923bb 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
@@ -62,14 +62,108 @@
// Simulate T1 frame dropped by the encoder,
// i.e. retrieve config, but skip calling OnEncodeDone.
structure.NextFrameConfig(/*restart=*/false);
- // one more temporal units (T2)
+ // one more temporal unit.
wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
- ASSERT_THAT(frames, SizeIs(9));
+ EXPECT_THAT(frames, SizeIs(9));
+ EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
+TEST(ScalabilityStructureL3T3KeyTest,
+ SkippingFrameReusePreviousFrameConfiguration) {
+ std::vector<GenericFrameInfo> frames;
+ ScalabilityStructureL3T3Key structure;
+ ScalabilityStructureWrapper wrapper(structure);
+
+ // 1st 2 temporal units (T0 and T2)
+ wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+ ASSERT_THAT(frames, SizeIs(6));
+ ASSERT_EQ(frames[0].temporal_id, 0);
+ ASSERT_EQ(frames[3].temporal_id, 2);
+
+ // Simulate a frame dropped by the encoder,
+ // i.e. retrieve config, but skip calling OnEncodeDone.
+ structure.NextFrameConfig(/*restart=*/false);
+ // two more temporal unit, expect temporal pattern continues
+ wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+ ASSERT_THAT(frames, SizeIs(12));
+ // Expect temporal pattern continues as if there were no dropped frames.
+ EXPECT_EQ(frames[6].temporal_id, 1);
+ EXPECT_EQ(frames[9].temporal_id, 2);
+}
+
+TEST(ScalabilityStructureL3T3KeyTest, SkippingKeyFrameTriggersNewKeyFrame) {
+ std::vector<GenericFrameInfo> frames;
+ ScalabilityStructureL3T3Key structure;
+ ScalabilityStructureWrapper wrapper(structure);
+
+ // Ask for a key frame config, but do not return any frames
+ structure.NextFrameConfig(/*restart=*/false);
+
+ // Ask for more frames, expect they start with a key frame.
+ wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+ ASSERT_THAT(frames, SizeIs(6));
+ ASSERT_EQ(frames[0].temporal_id, 0);
+ ASSERT_EQ(frames[3].temporal_id, 2);
+ EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
+TEST(ScalabilityStructureL3T3KeyTest,
+ SkippingT2FrameAndDisablingT2LayerProduceT1AsNextFrame) {
+ std::vector<GenericFrameInfo> frames;
+ ScalabilityStructureL3T3Key structure;
+ ScalabilityStructureWrapper wrapper(structure);
+
+ wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+ // Ask for next (T2) frame config, but do not return any frames
+ auto config = structure.NextFrameConfig(/*restart=*/false);
+ ASSERT_THAT(config, Not(IsEmpty()));
+ ASSERT_EQ(config.front().TemporalId(), 2);
+
+ // Disable T2 layer,
+ structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2));
+ // Expect instead of reusing unused config, T1 config is generated.
+ config = structure.NextFrameConfig(/*restart=*/false);
+ ASSERT_THAT(config, Not(IsEmpty()));
+ EXPECT_EQ(config.front().TemporalId(), 1);
+}
+
+TEST(ScalabilityStructureL3T3KeyTest, EnableT2LayerWhileProducingT1Frame) {
+ std::vector<GenericFrameInfo> frames;
+ ScalabilityStructureL3T3Key structure;
+ ScalabilityStructureWrapper wrapper(structure);
+
+ // Disable T2 layer,
+ structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/2, /*s2=*/2));
+
+ // Generate the key frame.
+ wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+ ASSERT_THAT(frames, SizeIs(3));
EXPECT_EQ(frames[0].temporal_id, 0);
- EXPECT_EQ(frames[3].temporal_id, 2);
- // T1 frames were dropped by the encoder.
+
+ // Ask for next (T1) frame config, but do not return any frames yet.
+ auto config = structure.NextFrameConfig(/*restart=*/false);
+ ASSERT_THAT(config, Not(IsEmpty()));
+ ASSERT_EQ(config.front().TemporalId(), 1);
+
+ // Reenable T2 layer.
+ structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3, /*s2=*/3));
+
+ // Finish encoding previously requested config.
+ for (auto layer_config : config) {
+ GenericFrameInfo info = structure.OnEncodeDone(layer_config);
+ EXPECT_EQ(info.temporal_id, 1);
+ frames.push_back(info);
+ }
+ ASSERT_THAT(frames, SizeIs(6));
+
+ // Generate more frames, expect T2 pattern resumes.
+ wrapper.GenerateFrames(/*num_temporal_units=*/4, frames);
+ ASSERT_THAT(frames, SizeIs(18));
EXPECT_EQ(frames[6].temporal_id, 2);
+ EXPECT_EQ(frames[9].temporal_id, 0);
+ EXPECT_EQ(frames[12].temporal_id, 2);
+ EXPECT_EQ(frames[15].temporal_id, 1);
EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
}