Make video quality analyzer compatible with real SFU in the network

Bug: webrtc:11557
Change-Id: I8ab1fb0896e267f30856a45df6099bd9aae9bc03
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174801
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Commit-Queue: Artem Titov <titovartem@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31216}
diff --git a/api/test/peerconnection_quality_test_fixture.h b/api/test/peerconnection_quality_test_fixture.h
index 99858fc..d55647a 100644
--- a/api/test/peerconnection_quality_test_fixture.h
+++ b/api/test/peerconnection_quality_test_fixture.h
@@ -131,6 +131,10 @@
   //    available layer and won't restore lower layers, so analyzer won't
   //    receive required data which will cause wrong results or test failures.
   struct VideoSimulcastConfig {
+    explicit VideoSimulcastConfig(int simulcast_streams_count)
+        : simulcast_streams_count(simulcast_streams_count) {
+      RTC_CHECK_GT(simulcast_streams_count, 1);
+    }
     VideoSimulcastConfig(int simulcast_streams_count, int target_spatial_index)
         : simulcast_streams_count(simulcast_streams_count),
           target_spatial_index(target_spatial_index) {
@@ -152,7 +156,10 @@
     //    in such case |target_spatial_index| will specify the top interesting
     //    spatial layer and all layers below, including target one will be
     //    processed. All layers above target one will be dropped.
-    int target_spatial_index;
+    // If not specified than whatever stream will be received will be analyzed.
+    // It requires Selective Forwarding Unit (SFU) to be configured in the
+    // network.
+    absl::optional<int> target_spatial_index;
   };
 
   // Contains properties of single video stream.
diff --git a/test/pc/e2e/BUILD.gn b/test/pc/e2e/BUILD.gn
index 7022804..182bbfd 100644
--- a/test/pc/e2e/BUILD.gn
+++ b/test/pc/e2e/BUILD.gn
@@ -242,6 +242,7 @@
       ":echo_emulation",
       ":peer_configurer",
       ":peer_connection_quality_test_params",
+      ":quality_analyzing_video_encoder",
       ":test_peer",
       ":video_quality_analyzer_injection_helper",
       "../..:copy_to_file_audio_capturer",
diff --git a/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc b/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc
index 239d7e1..786509d 100644
--- a/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc
+++ b/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc
@@ -198,7 +198,7 @@
   }
   it->second.encoded_time = Now();
   it->second.encoded_image_size = encoded_image.size();
-  it->second.target_encode_bitrate = stats.target_encode_bitrate;
+  it->second.target_encode_bitrate += stats.target_encode_bitrate;
 }
 
 void DefaultVideoQualityAnalyzer::OnFrameDropped(
diff --git a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
index 6ab2938..2e7b8f4 100644
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
@@ -267,16 +267,14 @@
 
     discard = ShouldDiscard(frame_id, encoded_image);
     if (!discard) {
-      std::string stream_label = analyzer_->GetStreamLabel(frame_id);
-      absl::optional<int> required_spatial_index =
-          stream_required_spatial_index_[stream_label];
       target_encode_bitrate = bitrate_allocation_.GetSpatialLayerSum(
-          required_spatial_index.value_or(0));
+          encoded_image.SpatialIndex().value_or(0));
     }
   }
 
   if (!discard) {
-    // Analyzer should see only encoded images, that weren't discarded.
+    // Analyzer should see only encoded images, that weren't discarded. But all
+    // not discarded layers have to be passed.
     VideoQualityAnalyzerInterface::EncoderStats stats;
     stats.target_encode_bitrate = target_encode_bitrate;
     analyzer_->OnFrameEncoded(frame_id, encoded_image, stats);
@@ -312,6 +310,9 @@
   absl::optional<int> required_spatial_index =
       stream_required_spatial_index_[stream_label];
   if (required_spatial_index) {
+    if (*required_spatial_index == kAnalyzeAnySpatialStream) {
+      return false;
+    }
     absl::optional<int> cur_spatial_index = encoded_image.SpatialIndex();
     if (!cur_spatial_index) {
       cur_spatial_index = 0;
diff --git a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
index 03231be..3307dc7 100644
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
@@ -29,6 +29,11 @@
 namespace webrtc {
 namespace webrtc_pc_e2e {
 
+// Tells QualityAnalyzingVideoEncoder that it shouldn't mark any spatial stream
+// as to be discarded. In such case the top stream will be passed to
+// VideoQualityAnalyzerInterface as a reference.
+constexpr int kAnalyzeAnySpatialStream = -1;
+
 // QualityAnalyzingVideoEncoder is used to wrap origin video encoder and inject
 // VideoQualityAnalyzerInterface before and after encoder.
 //
@@ -136,6 +141,12 @@
   const int id_;
   std::unique_ptr<VideoEncoder> delegate_;
   const double bitrate_multiplier_;
+  // Contains mapping from stream label to optional spatial index.
+  // If we have stream label "Foo" and mapping contains
+  // 1. |absl::nullopt| means "Foo" isn't simulcast/SVC stream
+  // 2. |kAnalyzeAnySpatialStream| means all simulcast/SVC streams are required
+  // 3. Concrete value means that particular simulcast/SVC stream have to be
+  //    analyzed.
   std::map<std::string, absl::optional<int>> stream_required_spatial_index_;
   EncodedImageDataInjector* const injector_;
   VideoQualityAnalyzerInterface* const analyzer_;
diff --git a/test/pc/e2e/peer_configurer.cc b/test/pc/e2e/peer_configurer.cc
index 08fcf17..eabe1ab 100644
--- a/test/pc/e2e/peer_configurer.cc
+++ b/test/pc/e2e/peer_configurer.cc
@@ -135,6 +135,11 @@
 
       if (video_config.simulcast_config) {
         has_simulcast = true;
+        if (video_config.simulcast_config->target_spatial_index) {
+          RTC_CHECK_GE(*video_config.simulcast_config->target_spatial_index, 0);
+          RTC_CHECK_LT(*video_config.simulcast_config->target_spatial_index,
+                       video_config.simulcast_config->simulcast_streams_count);
+        }
         RTC_CHECK(!video_config.max_encode_bitrate_bps)
             << "Setting max encode bitrate is not implemented for simulcast.";
         RTC_CHECK(!video_config.min_encode_bitrate_bps)
diff --git a/test/pc/e2e/test_peer_factory.cc b/test/pc/e2e/test_peer_factory.cc
index 0d08f8e..009c446 100644
--- a/test/pc/e2e/test_peer_factory.cc
+++ b/test/pc/e2e/test_peer_factory.cc
@@ -19,6 +19,7 @@
 #include "media/engine/webrtc_media_engine_defaults.h"
 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
 #include "p2p/client/basic_port_allocator.h"
+#include "test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h"
 #include "test/pc/e2e/echo/echo_emulation.h"
 #include "test/pc/e2e/peer_configurer.h"
 #include "test/testsupport/copy_to_file_audio_capturer.h"
@@ -60,6 +61,12 @@
   }
 }
 
+// Returns mapping from stream label to optional spatial index.
+// If we have stream label "Foo" and mapping contains
+// 1. |absl::nullopt| means "Foo" isn't simulcast/SVC stream
+// 2. |kAnalyzeAnySpatialStream| means all simulcast/SVC streams are required
+// 3. Concrete value means that particular simulcast/SVC stream have to be
+//    analyzed.
 std::map<std::string, absl::optional<int>>
 CalculateRequiredSpatialIndexPerStream(
     const std::vector<VideoConfig>& video_configs) {
@@ -70,6 +77,9 @@
     absl::optional<int> spatial_index;
     if (video_config.simulcast_config) {
       spatial_index = video_config.simulcast_config->target_spatial_index;
+      if (!spatial_index) {
+        spatial_index = kAnalyzeAnySpatialStream;
+      }
     }
     bool res = out.insert({*video_config.stream_label, spatial_index}).second;
     RTC_DCHECK(res) << "Duplicate video_config.stream_label="