Reland "Add spatial index to EncodedImage."

This is a reland of da0898dfae3b0a013ca8ad3828e9adfdc749748d

Original change's description:
> Add spatial index to EncodedImage.
>
> Replaces the VP8 simulcast index and VP9 spatial index formely part of
> CodecSpecificInfo.
>
> Bug: webrtc:9378
> Change-Id: I80eafd63fbdee0a25864338196a690628b4bd3d2
> Reviewed-on: https://webrtc-review.googlesource.com/83161
> Commit-Queue: Niels Moller <nisse@webrtc.org>
> Reviewed-by: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Sebastian Jansson <srte@webrtc.org>
> Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
> Reviewed-by: Philip Eliasson <philipel@webrtc.org>
> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#24485}

Tbr: magjed@webrtc.org
Bug: webrtc:9378
Change-Id: Iff20b656581ef63317e073833d1a326f7118fdfd
Reviewed-on: https://webrtc-review.googlesource.com/96780
Commit-Queue: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sebastian Jansson <srte@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24507}
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index 608688b..0dbe183 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -22,6 +22,7 @@
 
 namespace {
 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
+                                   absl::optional<int> spatial_index,
                                    RTPVideoHeader* rtp) {
   rtp->codec = info.codecType;
   switch (info.codecType) {
@@ -31,7 +32,7 @@
       rtp->vp8().temporalIdx = info.codecSpecific.VP8.temporalIdx;
       rtp->vp8().layerSync = info.codecSpecific.VP8.layerSync;
       rtp->vp8().keyIdx = info.codecSpecific.VP8.keyIdx;
-      rtp->simulcastIdx = info.codecSpecific.VP8.simulcastIdx;
+      rtp->simulcastIdx = spatial_index.value_or(0);
       return;
     }
     case kVideoCodecVP9: {
@@ -44,13 +45,16 @@
       vp9_header.non_ref_for_inter_layer_pred =
           info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
       vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
-      vp9_header.spatial_idx = info.codecSpecific.VP9.spatial_idx;
       vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
       vp9_header.inter_layer_predicted =
           info.codecSpecific.VP9.inter_layer_predicted;
       vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
       vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
-
+      if (vp9_header.num_spatial_layers > 1) {
+        vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
+      } else {
+        vp9_header.spatial_idx = kNoSpatialIdx;
+      }
       if (info.codecSpecific.VP9.ss_data_available) {
         vp9_header.spatial_layer_resolution_present =
             info.codecSpecific.VP9.spatial_layer_resolution_present;
@@ -75,13 +79,13 @@
       auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
       h264_header.packetization_mode =
           info.codecSpecific.H264.packetization_mode;
-      rtp->simulcastIdx = info.codecSpecific.H264.simulcast_idx;
+      rtp->simulcastIdx = spatial_index.value_or(0);
       return;
     }
     case kVideoCodecMultiplex:
     case kVideoCodecGeneric:
       rtp->codec = kVideoCodecGeneric;
-      rtp->simulcastIdx = info.codecSpecific.generic.simulcast_idx;
+      rtp->simulcastIdx = spatial_index.value_or(0);
       return;
     default:
       return;
@@ -131,7 +135,8 @@
     int64_t shared_frame_id) {
   RTPVideoHeader rtp_video_header;
   if (codec_specific_info) {
-    PopulateRtpWithCodecSpecifics(*codec_specific_info, &rtp_video_header);
+    PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
+                                  &rtp_video_header);
   }
   rtp_video_header.rotation = image.rotation_;
   rtp_video_header.content_type = image.content_type_;
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index 4d0b017..b0c0027 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -38,11 +38,11 @@
   EncodedImage encoded_image;
   encoded_image.rotation_ = kVideoRotation_90;
   encoded_image.content_type_ = VideoContentType::SCREENSHARE;
+  encoded_image.SetSpatialIndex(1);
 
   CodecSpecificInfo codec_info;
   memset(&codec_info, 0, sizeof(CodecSpecificInfo));
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 1;
   codec_info.codecSpecific.VP8.temporalIdx = 0;
   codec_info.codecSpecific.VP8.keyIdx = kNoKeyIdx;
   codec_info.codecSpecific.VP8.layerSync = false;
@@ -52,7 +52,6 @@
       params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 1;
   codec_info.codecSpecific.VP8.temporalIdx = 1;
   codec_info.codecSpecific.VP8.layerSync = true;
 
@@ -79,13 +78,12 @@
   EncodedImage encoded_image;
   encoded_image.rotation_ = kVideoRotation_90;
   encoded_image.content_type_ = VideoContentType::SCREENSHARE;
-
+  encoded_image.SetSpatialIndex(0);
   CodecSpecificInfo codec_info;
   memset(&codec_info, 0, sizeof(CodecSpecificInfo));
   codec_info.codecType = kVideoCodecVP9;
   codec_info.codecSpecific.VP9.num_spatial_layers = 3;
   codec_info.codecSpecific.VP9.first_frame_in_picture = true;
-  codec_info.codecSpecific.VP9.spatial_idx = 0;
   codec_info.codecSpecific.VP9.temporal_idx = 2;
   codec_info.codecSpecific.VP9.end_of_picture = false;
 
@@ -100,7 +98,7 @@
   EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
   EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
   EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
-  EXPECT_EQ(vp9_header.spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
+  EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex());
   EXPECT_EQ(vp9_header.num_spatial_layers,
             codec_info.codecSpecific.VP9.num_spatial_layers);
   EXPECT_EQ(vp9_header.end_of_picture,
@@ -108,9 +106,9 @@
 
   // Next spatial layer.
   codec_info.codecSpecific.VP9.first_frame_in_picture = false;
-  codec_info.codecSpecific.VP9.spatial_idx += 1;
   codec_info.codecSpecific.VP9.end_of_picture = true;
 
+  encoded_image.SetSpatialIndex(1);
   header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoRotation_90, header.rotation);
@@ -119,7 +117,7 @@
   EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
   EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
   EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
-  EXPECT_EQ(vp9_header.spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
+  EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex());
   EXPECT_EQ(vp9_header.num_spatial_layers,
             codec_info.codecSpecific.VP9.num_spatial_layers);
   EXPECT_EQ(vp9_header.end_of_picture,
@@ -154,7 +152,6 @@
   CodecSpecificInfo codec_info;
   memset(&codec_info, 0, sizeof(CodecSpecificInfo));
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 0;
 
   RtpPayloadParams params(kSsrc1, &state);
   RTPVideoHeader header =
diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc
index 1fbd102..a2df830 100644
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@@ -95,21 +95,6 @@
   return modules;
 }
 
-absl::optional<size_t> GetSimulcastIdx(const CodecSpecificInfo* info) {
-  if (!info)
-    return absl::nullopt;
-  switch (info->codecType) {
-    case kVideoCodecVP8:
-      return absl::optional<size_t>(info->codecSpecific.VP8.simulcastIdx);
-    case kVideoCodecH264:
-      return absl::optional<size_t>(info->codecSpecific.H264.simulcast_idx);
-    case kVideoCodecMultiplex:
-    case kVideoCodecGeneric:
-      return absl::optional<size_t>(info->codecSpecific.generic.simulcast_idx);
-    default:
-      return absl::nullopt;
-  }
-}
 bool PayloadTypeSupportsSkippingFecPackets(const std::string& payload_name) {
   const VideoCodecType codecType = PayloadStringToCodecType(payload_name);
   if (codecType == kVideoCodecVP8 || codecType == kVideoCodecVP9) {
@@ -320,7 +305,14 @@
     return Result(Result::ERROR_SEND_FAILED);
 
   shared_frame_id_++;
-  size_t stream_index = GetSimulcastIdx(codec_specific_info).value_or(0);
+  size_t stream_index = 0;
+  if (codec_specific_info &&
+      (codec_specific_info->codecType == kVideoCodecVP8 ||
+       codec_specific_info->codecType == kVideoCodecH264 ||
+       codec_specific_info->codecType == kVideoCodecGeneric)) {
+    // Map spatial index to simulcast.
+    stream_index = encoded_image.SpatialIndex().value_or(0);
+  }
   RTC_DCHECK_LT(stream_index, rtp_modules_.size());
   RTPVideoHeader rtp_video_header = params_[stream_index].GetRtpVideoHeader(
       encoded_image, codec_specific_info, shared_frame_id_);
diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc
index 9e3290d..bd8a6a6 100644
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@@ -169,44 +169,41 @@
 
 TEST(RtpVideoSenderTest, SendSimulcastSetActive) {
   uint8_t payload = 'a';
-  EncodedImage encoded_image;
-  encoded_image.SetTimestamp(1);
-  encoded_image.capture_time_ms_ = 2;
-  encoded_image._frameType = kVideoFrameKey;
-  encoded_image._buffer = &payload;
-  encoded_image._length = 1;
+  EncodedImage encoded_image_1;
+  encoded_image_1.SetTimestamp(1);
+  encoded_image_1.capture_time_ms_ = 2;
+  encoded_image_1._frameType = kVideoFrameKey;
+  encoded_image_1._buffer = &payload;
+  encoded_image_1._length = 1;
 
   RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, kPayloadType, {});
 
-  CodecSpecificInfo codec_info_1;
-  memset(&codec_info_1, 0, sizeof(CodecSpecificInfo));
-  codec_info_1.codecType = kVideoCodecVP8;
-  codec_info_1.codecSpecific.VP8.simulcastIdx = 0;
+  CodecSpecificInfo codec_info;
+  memset(&codec_info, 0, sizeof(CodecSpecificInfo));
+  codec_info.codecType = kVideoCodecVP8;
 
   test.router()->SetActive(true);
   EXPECT_EQ(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_1, nullptr)
+                ->OnEncodedImage(encoded_image_1, &codec_info, nullptr)
                 .error);
 
-  CodecSpecificInfo codec_info_2;
-  memset(&codec_info_2, 0, sizeof(CodecSpecificInfo));
-  codec_info_2.codecType = kVideoCodecVP8;
-  codec_info_2.codecSpecific.VP8.simulcastIdx = 1;
+  EncodedImage encoded_image_2(encoded_image_1);
+  encoded_image_2.SetSpatialIndex(1);
   EXPECT_EQ(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_2, nullptr)
+                ->OnEncodedImage(encoded_image_2, &codec_info, nullptr)
                 .error);
 
   // Inactive.
   test.router()->SetActive(false);
   EXPECT_NE(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_1, nullptr)
+                ->OnEncodedImage(encoded_image_1, &codec_info, nullptr)
                 .error);
   EXPECT_NE(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_2, nullptr)
+                ->OnEncodedImage(encoded_image_2, &codec_info, nullptr)
                 .error);
 }
 
@@ -216,22 +213,19 @@
 // be sent if both modules are inactive.
 TEST(RtpVideoSenderTest, SendSimulcastSetActiveModules) {
   uint8_t payload = 'a';
-  EncodedImage encoded_image;
-  encoded_image.SetTimestamp(1);
-  encoded_image.capture_time_ms_ = 2;
-  encoded_image._frameType = kVideoFrameKey;
-  encoded_image._buffer = &payload;
-  encoded_image._length = 1;
+  EncodedImage encoded_image_1;
+  encoded_image_1.SetTimestamp(1);
+  encoded_image_1.capture_time_ms_ = 2;
+  encoded_image_1._frameType = kVideoFrameKey;
+  encoded_image_1._buffer = &payload;
+  encoded_image_1._length = 1;
+  EncodedImage encoded_image_2(encoded_image_1);
+  encoded_image_2.SetSpatialIndex(1);
 
   RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, kPayloadType, {});
-  CodecSpecificInfo codec_info_1;
-  memset(&codec_info_1, 0, sizeof(CodecSpecificInfo));
-  codec_info_1.codecType = kVideoCodecVP8;
-  codec_info_1.codecSpecific.VP8.simulcastIdx = 0;
-  CodecSpecificInfo codec_info_2;
-  memset(&codec_info_2, 0, sizeof(CodecSpecificInfo));
-  codec_info_2.codecType = kVideoCodecVP8;
-  codec_info_2.codecSpecific.VP8.simulcastIdx = 1;
+  CodecSpecificInfo codec_info;
+  memset(&codec_info, 0, sizeof(CodecSpecificInfo));
+  codec_info.codecType = kVideoCodecVP8;
 
   // Only setting one stream to active will still set the payload router to
   // active and allow sending data on the active stream.
@@ -239,7 +233,7 @@
   test.router()->SetActiveModules(active_modules);
   EXPECT_EQ(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_1, nullptr)
+                ->OnEncodedImage(encoded_image_1, &codec_info, nullptr)
                 .error);
 
   // Setting both streams to inactive will turn the payload router to
@@ -250,11 +244,11 @@
   // because the payload router is inactive.
   EXPECT_NE(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_1, nullptr)
+                ->OnEncodedImage(encoded_image_1, &codec_info, nullptr)
                 .error);
   EXPECT_NE(EncodedImageCallback::Result::OK,
             test.router()
-                ->OnEncodedImage(encoded_image, &codec_info_2, nullptr)
+                ->OnEncodedImage(encoded_image_1, &codec_info, nullptr)
                 .error);
 }
 
diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc
index 4ab83a0..5827fc5 100644
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@@ -449,16 +449,14 @@
     const EncodedImage& encodedImage,
     const CodecSpecificInfo* codecSpecificInfo,
     const RTPFragmentationHeader* fragmentation) {
+  EncodedImage stream_image(encodedImage);
   CodecSpecificInfo stream_codec_specific = *codecSpecificInfo;
   stream_codec_specific.codec_name = implementation_name_.c_str();
-  if (stream_codec_specific.codecType == webrtc::kVideoCodecVP8) {
-    stream_codec_specific.codecSpecific.VP8.simulcastIdx = stream_idx;
-  } else if (stream_codec_specific.codecType == webrtc::kVideoCodecH264) {
-    stream_codec_specific.codecSpecific.H264.simulcast_idx = stream_idx;
-  }
+
+  stream_image.SetSpatialIndex(stream_idx);
 
   return encoded_complete_callback_->OnEncodedImage(
-      encodedImage, &stream_codec_specific, fragmentation);
+      stream_image, &stream_codec_specific, fragmentation);
 }
 
 void SimulcastEncoderAdapter::PopulateStreamCodec(
diff --git a/media/engine/simulcast_encoder_adapter_unittest.cc b/media/engine/simulcast_encoder_adapter_unittest.cc
index 410b85e..3999840 100644
--- a/media/engine/simulcast_encoder_adapter_unittest.cc
+++ b/media/engine/simulcast_encoder_adapter_unittest.cc
@@ -339,11 +339,10 @@
                         const RTPFragmentationHeader* fragmentation) override {
     last_encoded_image_width_ = encoded_image._encodedWidth;
     last_encoded_image_height_ = encoded_image._encodedHeight;
-    if (codec_specific_info) {
-      last_encoded_image_simulcast_index_ =
-          codec_specific_info->codecSpecific.VP8.simulcastIdx;
-    }
-    return Result(Result::OK, encoded_image.Timestamp());
+    last_encoded_image_simulcast_index_ =
+        encoded_image.SpatialIndex().value_or(-1);
+
+    return Result(Result::OK, encoded_image._timeStamp);
   }
 
   bool GetLastEncodedImageInfo(int* out_width,
diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
index 4fc9b4f..4effcdb 100644
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@@ -506,6 +506,7 @@
             : VideoContentType::UNSPECIFIED;
     encoded_images_[i].timing_.flags = VideoSendTiming::kInvalid;
     encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType);
+    encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx);
 
     // Split encoded image up into fragments. This also updates
     // |encoded_image_|.
@@ -526,8 +527,6 @@
       codec_specific.codecType = kVideoCodecH264;
       codec_specific.codecSpecific.H264.packetization_mode =
           packetization_mode_;
-      codec_specific.codecSpecific.H264.simulcast_idx =
-          configurations_[i].simulcast_idx;
       encoded_image_callback_->OnEncodedImage(encoded_images_[i],
                                               &codec_specific, &frag_header);
     }
diff --git a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
index 4733b3a..c3a7506 100644
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@@ -285,7 +285,6 @@
 
       CodecSpecificInfo codec_info = *codecSpecificInfo;
       codec_info.codecType = kVideoCodecMultiplex;
-      codec_info.codecSpecific.generic.simulcast_idx = 0;
       encoded_complete_callback_->OnEncodedImage(combined_image_, &codec_info,
                                                  fragmentation);
     }
diff --git a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
index 303914d..56de138 100644
--- a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
+++ b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
@@ -232,7 +232,7 @@
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
   EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());
 
   const MultiplexImage& unpacked_frame =
       MultiplexEncodedImagePacker::Unpack(encoded_frame);
@@ -252,7 +252,7 @@
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
   EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());
 
   const MultiplexImage& unpacked_frame =
       MultiplexEncodedImagePacker::Unpack(encoded_frame);
diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc
index ccf7053..75605ae 100644
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@@ -56,22 +56,17 @@
   return max_size;
 }
 
-void GetLayerIndices(const CodecSpecificInfo& codec_specific,
-                     size_t* spatial_idx,
-                     size_t* temporal_idx) {
+size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) {
+  size_t temporal_idx = 0;
   if (codec_specific.codecType == kVideoCodecVP8) {
-    *spatial_idx = codec_specific.codecSpecific.VP8.simulcastIdx;
-    *temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
+    temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
   } else if (codec_specific.codecType == kVideoCodecVP9) {
-    *spatial_idx = codec_specific.codecSpecific.VP9.spatial_idx;
-    *temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
+    temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
   }
-  if (*spatial_idx == kNoSpatialIdx) {
-    *spatial_idx = 0;
+  if (temporal_idx == kNoTemporalIdx) {
+    temporal_idx = 0;
   }
-  if (*temporal_idx == kNoTemporalIdx) {
-    *temporal_idx = 0;
-  }
+  return temporal_idx;
 }
 
 int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) {
@@ -347,9 +342,8 @@
   }
 
   // Layer metadata.
-  size_t spatial_idx = 0;
-  size_t temporal_idx = 0;
-  GetLayerIndices(codec_specific, &spatial_idx, &temporal_idx);
+  size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+  size_t temporal_idx = GetTemporalLayerIndex(codec_specific);
 
   FrameStatistics* frame_stat =
       stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx);
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
index 3826e14..ef46fd2 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@@ -817,7 +817,6 @@
   codec_specific->codecType = kVideoCodecVP8;
   codec_specific->codec_name = ImplementationName();
   CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
-  vp8Info->simulcastIdx = stream_idx;
   vp8Info->keyIdx = kNoKeyIdx;  // TODO(hlundin) populate this
   vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
   temporal_layers_[stream_idx]->PopulateCodecSpecific(
@@ -876,6 +875,7 @@
           encoded_images_[encoder_idx]._frameType = kVideoFrameKey;
           is_keyframe = true;
         }
+        encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
         PopulateCodecSpecific(&codec_specific, tl_configs[stream_idx], *pkt,
                               stream_idx, input_image.timestamp());
         break;
diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
index aaa10dd..dc55d17 100644
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@@ -70,7 +70,7 @@
     VerifyQpParser(*encoded_frame);
     EXPECT_STREQ("libvpx", codec_specific_info->codec_name);
     EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType);
-    EXPECT_EQ(0u, codec_specific_info->codecSpecific.VP8.simulcastIdx);
+    EXPECT_EQ(0, encoded_frame->SpatialIndex());
   }
 
   void EncodeAndExpectFrameWith(const VideoFrame& input_frame,
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index f6594c1..0f5855f 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -356,7 +356,7 @@
             encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
 
   ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
-  EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, kNoSpatialIdx);
+  EXPECT_FALSE(frames[0].SpatialIndex());
   EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
 }
 
@@ -395,7 +395,7 @@
 
     // Key frame.
     EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff);
     EXPECT_TRUE(
@@ -408,7 +408,7 @@
 
     // Delta frame.
     EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
     EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
               inter_layer_pred == InterLayerPredMode::kOff ||
                   inter_layer_pred == InterLayerPredMode::kOnKeyPic);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 5805563..03c2c77 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -754,6 +754,7 @@
 }
 
 void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                                           absl::optional<int>* spatial_idx,
                                            const vpx_codec_cx_pkt& pkt,
                                            uint32_t timestamp,
                                            bool first_frame_in_picture) {
@@ -780,9 +781,9 @@
   }
   if (num_active_spatial_layers_ == 1) {
     RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
-    vp9_info->spatial_idx = kNoSpatialIdx;
+    *spatial_idx = absl::nullopt;
   } else {
-    vp9_info->spatial_idx = layer_id.spatial_layer_id;
+    *spatial_idx = layer_id.spatial_layer_id;
   }
   if (layer_id.spatial_layer_id != 0) {
     vp9_info->ss_data_available = false;
@@ -1021,8 +1022,10 @@
   RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);
 
   memset(&codec_specific_, 0, sizeof(codec_specific_));
-  PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp(),
-                        first_frame_in_picture);
+  absl::optional<int> spatial_index;
+  PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
+                        input_image_->timestamp(), first_frame_in_picture);
+  encoded_image_.SetSpatialIndex(spatial_index);
 
   if (is_flexible_mode_) {
     UpdateReferenceBuffers(*pkt, pics_since_key_);
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index e8fd606..887b560 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -61,6 +61,7 @@
   int InitAndSetControlSettings(const VideoCodec* inst);
 
   void PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                             absl::optional<int>* spatial_idx,
                              const vpx_codec_cx_pkt& pkt,
                              uint32_t timestamp,
                              bool first_frame_in_picture);
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index c53a737..37914e4 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -82,7 +82,6 @@
         if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
           // This is the first packet for this frame.
           _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
           _codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
           _codecSpecificInfo.codecType = kVideoCodecVP9;
@@ -106,8 +105,6 @@
               vp9_header.temporal_up_switch;
         }
         if (vp9_header.spatial_idx != kNoSpatialIdx) {
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
-              vp9_header.spatial_idx;
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
               vp9_header.inter_layer_predicted;
         }
diff --git a/modules/video_coding/generic_encoder.cc b/modules/video_coding/generic_encoder.cc
index a8999fc..b1d7c28 100644
--- a/modules/video_coding/generic_encoder.cc
+++ b/modules/video_coding/generic_encoder.cc
@@ -391,21 +391,10 @@
     const RTPFragmentationHeader* fragmentation_header) {
   TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
                        "timestamp", encoded_image.Timestamp());
-  size_t simulcast_svc_idx = 0;
-  if (codec_specific->codecType == kVideoCodecVP9) {
-    if (codec_specific->codecSpecific.VP9.num_spatial_layers > 1)
-      simulcast_svc_idx = codec_specific->codecSpecific.VP9.spatial_idx;
-  } else if (codec_specific->codecType == kVideoCodecVP8) {
-    simulcast_svc_idx = codec_specific->codecSpecific.VP8.simulcastIdx;
-  } else if (codec_specific->codecType == kVideoCodecGeneric) {
-    simulcast_svc_idx = codec_specific->codecSpecific.generic.simulcast_idx;
-  } else if (codec_specific->codecType == kVideoCodecH264) {
-    // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here.
-  }
-
+  const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
   EncodedImage image_copy(encoded_image);
 
-  FillTimingInfo(simulcast_svc_idx, &image_copy);
+  FillTimingInfo(spatial_idx, &image_copy);
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   uint8_t experiment_id =
@@ -421,7 +410,7 @@
   // id in content type to +1 of that is actual simulcast index. This is because
   // value 0 on the wire is reserved for 'no simulcast stream specified'.
   RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
-      &image_copy.content_type_, static_cast<uint8_t>(simulcast_svc_idx + 1)));
+      &image_copy.content_type_, static_cast<uint8_t>(spatial_idx + 1)));
 
   Result result = post_encode_callback_->OnEncodedImage(
       image_copy, codec_specific, fragmentation_header);
diff --git a/modules/video_coding/generic_encoder_unittest.cc b/modules/video_coding/generic_encoder_unittest.cc
index c889769..2be6856 100644
--- a/modules/video_coding/generic_encoder_unittest.cc
+++ b/modules/video_coding/generic_encoder_unittest.cc
@@ -95,8 +95,8 @@
       image._length = FrameSize(min_frame_size, max_frame_size, s, i);
       image.capture_time_ms_ = current_timestamp;
       image.SetTimestamp(static_cast<uint32_t>(current_timestamp * 90));
+      image.SetSpatialIndex(s);
       codec_specific.codecType = kVideoCodecGeneric;
-      codec_specific.codecSpecific.generic.simulcast_idx = s;
       callback.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
                                current_timestamp, s);
       if (dropped) {
@@ -189,7 +189,6 @@
   image.capture_time_ms_ = timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   VideoCodec::TimingFrameTriggerThresholds thresholds;
@@ -221,7 +220,6 @@
   image.capture_time_ms_ = timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   callback.SetInternalSource(true);
@@ -257,7 +255,6 @@
   const int64_t kTimestampMs3 = 47721860;
   const int64_t kTimestampMs4 = 47721870;
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   // Any non-zero bitrate needed to be set before the first frame.
@@ -293,7 +290,6 @@
   CodecSpecificInfo codec_specific;
   const int64_t kTimestampMs = 123456;
   codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
   FakeEncodedImageCallback sink;
   VCMEncodedFrameCallback callback(&sink, nullptr);
   // Any non-zero bitrate needed to be set before the first frame.
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index 9108625..94d4271 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -28,6 +28,8 @@
 // with a copy-constructor. See below.
 struct CodecSpecificInfoVP8 {
   bool nonReference;
+  // TODO(bugs.webrtc.org/9378): Delete simulcastIdx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
   uint8_t simulcastIdx;
   uint8_t temporalIdx;
   bool layerSync;
@@ -43,6 +45,8 @@
   bool non_ref_for_inter_layer_pred;
 
   uint8_t temporal_idx;
+  // TODO(bugs.webrtc.org/9378): Delete spatial_idx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
   uint8_t spatial_idx;
   bool temporal_up_switch;
   bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
@@ -63,13 +67,14 @@
   bool end_of_picture;
 };
 
+// TODO(bugs.webrtc.org/9378): Delete this struct. Unused, except that
+// simulcast_idx is assigned in downstream code.
 struct CodecSpecificInfoGeneric {
   uint8_t simulcast_idx;
 };
 
 struct CodecSpecificInfoH264 {
   H264PacketizationMode packetization_mode;
-  uint8_t simulcast_idx;
 };
 
 union CodecSpecificInfoUnion {
diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
index 03de176..4af526c 100644
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@@ -76,15 +76,9 @@
   virtual Result OnEncodedImage(const EncodedImage& encoded_image,
                                 const CodecSpecificInfo* codec_specific_info,
                                 const RTPFragmentationHeader* fragmentation) {
-    uint16_t simulcast_idx = 0;
     bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
-    if (is_vp8) {
-      simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
-    } else {
-      simulcast_idx = codec_specific_info->codecSpecific.H264.simulcast_idx;
-    }
     // Only store the base layer.
-    if (simulcast_idx) {
+    if (encoded_image.SpatialIndex().value_or(0) == 0) {
       if (encoded_image._frameType == kVideoFrameKey) {
         delete[] encoded_key_frame_._buffer;
         encoded_key_frame_._buffer = new uint8_t[encoded_image._size];
@@ -104,9 +98,9 @@
       }
     }
     if (is_vp8) {
-      layer_sync_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
           codec_specific_info->codecSpecific.VP8.layerSync;
-      temporal_layer_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
           codec_specific_info->codecSpecific.VP8.temporalIdx;
     }
     return Result(Result::OK, encoded_image.Timestamp());
diff --git a/sdk/android/src/jni/androidmediaencoder.cc b/sdk/android/src/jni/androidmediaencoder.cc
index 35608c8..435bc71 100644
--- a/sdk/android/src/jni/androidmediaencoder.cc
+++ b/sdk/android/src/jni/androidmediaencoder.cc
@@ -1008,7 +1008,6 @@
       info.codecType = codec_type;
       if (codec_type == kVideoCodecVP8) {
         info.codecSpecific.VP8.nonReference = false;
-        info.codecSpecific.VP8.simulcastIdx = 0;
         info.codecSpecific.VP8.temporalIdx = kNoTemporalIdx;
         info.codecSpecific.VP8.layerSync = false;
         info.codecSpecific.VP8.keyIdx = kNoKeyIdx;
@@ -1020,7 +1019,6 @@
         info.codecSpecific.VP9.flexible_mode = false;
         info.codecSpecific.VP9.ss_data_available = key_frame ? true : false;
         info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
-        info.codecSpecific.VP9.spatial_idx = kNoSpatialIdx;
         info.codecSpecific.VP9.temporal_up_switch = true;
         info.codecSpecific.VP9.inter_layer_predicted = false;
         info.codecSpecific.VP9.gof_idx =
diff --git a/sdk/android/src/jni/videoencoderwrapper.cc b/sdk/android/src/jni/videoencoderwrapper.cc
index ff8be05..c50e9f6 100644
--- a/sdk/android/src/jni/videoencoderwrapper.cc
+++ b/sdk/android/src/jni/videoencoderwrapper.cc
@@ -390,7 +390,6 @@
   switch (codec_settings_.codecType) {
     case kVideoCodecVP8:
       info.codecSpecific.VP8.nonReference = false;
-      info.codecSpecific.VP8.simulcastIdx = 0;
       info.codecSpecific.VP8.temporalIdx = kNoTemporalIdx;
       info.codecSpecific.VP8.layerSync = false;
       info.codecSpecific.VP8.keyIdx = kNoKeyIdx;
@@ -403,7 +402,6 @@
       info.codecSpecific.VP9.flexible_mode = false;
       info.codecSpecific.VP9.ss_data_available = key_frame ? true : false;
       info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
-      info.codecSpecific.VP9.spatial_idx = kNoSpatialIdx;
       info.codecSpecific.VP9.temporal_up_switch = true;
       info.codecSpecific.VP9.inter_layer_predicted = false;
       info.codecSpecific.VP9.gof_idx =
diff --git a/test/fake_encoder.cc b/test/fake_encoder.cc
index b8d04e6..2075399 100644
--- a/test/fake_encoder.cc
+++ b/test/fake_encoder.cc
@@ -102,7 +102,6 @@
     CodecSpecificInfo specifics;
     memset(&specifics, 0, sizeof(specifics));
     specifics.codecType = kVideoCodecGeneric;
-    specifics.codecSpecific.generic.simulcast_idx = i;
     std::unique_ptr<uint8_t[]> encoded_buffer(
         new uint8_t[frame_info.layers[i].size]);
     memcpy(encoded_buffer.get(), encoded_buffer_, frame_info.layers[i].size);
@@ -118,6 +117,7 @@
     encoded.content_type_ = (mode == VideoCodecMode::kScreensharing)
                                 ? VideoContentType::SCREENSHARE
                                 : VideoContentType::UNSPECIFIED;
+    encoded.SetSpatialIndex(i);
     specifics.codec_name = ImplementationName();
     if (callback->OnEncodedImage(encoded, &specifics, nullptr).error !=
         EncodedImageCallback::Result::OK) {
diff --git a/test/fake_vp8_encoder.cc b/test/fake_vp8_encoder.cc
index 9d05297..04dff00 100644
--- a/test/fake_vp8_encoder.cc
+++ b/test/fake_vp8_encoder.cc
@@ -88,7 +88,6 @@
   codec_specific->codecType = kVideoCodecVP8;
   codec_specific->codec_name = ImplementationName();
   CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
-  vp8Info->simulcastIdx = stream_idx;
   vp8Info->keyIdx = kNoKeyIdx;
   vp8Info->nonReference = false;
   temporal_layers_[stream_idx]->PopulateCodecSpecific(
@@ -100,7 +99,7 @@
     const CodecSpecificInfo* codec_specific_info,
     const RTPFragmentationHeader* fragments) {
   RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_);
-  uint8_t stream_idx = codec_specific_info->codecSpecific.generic.simulcast_idx;
+  uint8_t stream_idx = encoded_image.SpatialIndex().value_or(0);
   CodecSpecificInfo overrided_specific_info;
   TemporalLayers::FrameConfig tl_config =
       temporal_layers_[stream_idx]->UpdateLayerConfig(encoded_image._timeStamp);
diff --git a/video/send_statistics_proxy.cc b/video/send_statistics_proxy.cc
index 2ab025a..0057933 100644
--- a/video/send_statistics_proxy.cc
+++ b/video/send_statistics_proxy.cc
@@ -81,9 +81,9 @@
                             kVideoMax);
 }
 
-bool IsForcedFallbackPossible(const CodecSpecificInfo* codec_info) {
-  return codec_info->codecType == kVideoCodecVP8 &&
-         codec_info->codecSpecific.VP8.simulcastIdx == 0 &&
+bool IsForcedFallbackPossible(const CodecSpecificInfo* codec_info,
+                              int simulcast_index) {
+  return codec_info->codecType == kVideoCodecVP8 && simulcast_index == 0 &&
          (codec_info->codecSpecific.VP8.temporalIdx == 0 ||
           codec_info->codecSpecific.VP8.temporalIdx == kNoTemporalIdx);
 }
@@ -219,7 +219,7 @@
     sent_height_counter_.Add(it->second.max_height);
 
     // Check number of encoded streams per timestamp.
-    if (num_streams_ > it->second.max_simulcast_idx) {
+    if (num_streams_ > static_cast<size_t>(it->second.max_simulcast_idx)) {
       *is_limited_in_resolution = false;
       if (num_streams_ > 1) {
         int disabled_streams =
@@ -241,7 +241,7 @@
 
 bool SendStatisticsProxy::UmaSamplesContainer::InsertEncodedFrame(
     const EncodedImage& encoded_frame,
-    size_t simulcast_idx,
+    int simulcast_idx,
     bool* is_limited_in_resolution) {
   int64_t now_ms = clock_->TimeInMilliseconds();
   RemoveOld(now_ms, is_limited_in_resolution);
@@ -806,14 +806,15 @@
 
 void SendStatisticsProxy::UpdateEncoderFallbackStats(
     const CodecSpecificInfo* codec_info,
-    int pixels) {
-  UpdateFallbackDisabledStats(codec_info, pixels);
+    int pixels,
+    int simulcast_index) {
+  UpdateFallbackDisabledStats(codec_info, pixels, simulcast_index);
 
   if (!fallback_max_pixels_ || !uma_container_->fallback_info_.is_possible) {
     return;
   }
 
-  if (!IsForcedFallbackPossible(codec_info)) {
+  if (!IsForcedFallbackPossible(codec_info, simulcast_index)) {
     uma_container_->fallback_info_.is_possible = false;
     return;
   }
@@ -855,14 +856,15 @@
 
 void SendStatisticsProxy::UpdateFallbackDisabledStats(
     const CodecSpecificInfo* codec_info,
-    int pixels) {
+    int pixels,
+    int simulcast_index) {
   if (!fallback_max_pixels_disabled_ ||
       !uma_container_->fallback_info_disabled_.is_possible ||
       stats_.has_entered_low_resolution) {
     return;
   }
 
-  if (!IsForcedFallbackPossible(codec_info) ||
+  if (!IsForcedFallbackPossible(codec_info, simulcast_index) ||
       strcmp(codec_info->codec_name, kVp8SwCodecName) == 0) {
     uma_container_->fallback_info_disabled_.is_possible = false;
     return;
@@ -882,26 +884,27 @@
 void SendStatisticsProxy::OnSendEncodedImage(
     const EncodedImage& encoded_image,
     const CodecSpecificInfo* codec_info) {
-  size_t simulcast_idx = 0;
+  // Simulcast is used for VP8, H264 and Generic.
+  int simulcast_idx =
+      (codec_info && (codec_info->codecType == kVideoCodecVP8 ||
+                      codec_info->codecType == kVideoCodecH264 ||
+                      codec_info->codecType == kVideoCodecGeneric))
+          ? encoded_image.SpatialIndex().value_or(0)
+          : 0;
 
   rtc::CritScope lock(&crit_);
   ++stats_.frames_encoded;
   if (codec_info) {
-    if (codec_info->codecType == kVideoCodecVP8) {
-      simulcast_idx = codec_info->codecSpecific.VP8.simulcastIdx;
-    } else if (codec_info->codecType == kVideoCodecH264) {
-      simulcast_idx = codec_info->codecSpecific.H264.simulcast_idx;
-    } else if (codec_info->codecType == kVideoCodecGeneric) {
-      simulcast_idx = codec_info->codecSpecific.generic.simulcast_idx;
-    }
     if (codec_info->codec_name) {
-      UpdateEncoderFallbackStats(codec_info, encoded_image._encodedWidth *
-                                                 encoded_image._encodedHeight);
+      UpdateEncoderFallbackStats(
+          codec_info,
+          encoded_image._encodedWidth * encoded_image._encodedHeight,
+          simulcast_idx);
       stats_.encoder_implementation_name = codec_info->codec_name;
     }
   }
 
-  if (simulcast_idx >= rtp_config_.ssrcs.size()) {
+  if (static_cast<size_t>(simulcast_idx) >= rtp_config_.ssrcs.size()) {
     RTC_LOG(LS_ERROR) << "Encoded image outside simulcast range ("
                       << simulcast_idx << " >= " << rtp_config_.ssrcs.size()
                       << ").";
@@ -928,20 +931,13 @@
 
     if (codec_info) {
       if (codec_info->codecType == kVideoCodecVP8) {
-        int spatial_idx = (rtp_config_.ssrcs.size() == 1)
-                              ? -1
-                              : static_cast<int>(simulcast_idx);
+        int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx;
         uma_container_->qp_counters_[spatial_idx].vp8.Add(encoded_image.qp_);
       } else if (codec_info->codecType == kVideoCodecVP9) {
-        int spatial_idx =
-            (codec_info->codecSpecific.VP9.num_spatial_layers == 1)
-                ? -1
-                : codec_info->codecSpecific.VP9.spatial_idx;
+        int spatial_idx = encoded_image.SpatialIndex().value_or(-1);
         uma_container_->qp_counters_[spatial_idx].vp9.Add(encoded_image.qp_);
       } else if (codec_info->codecType == kVideoCodecH264) {
-        int spatial_idx = (rtp_config_.ssrcs.size() == 1)
-                              ? -1
-                              : static_cast<int>(simulcast_idx);
+        int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx;
         uma_container_->qp_counters_[spatial_idx].h264.Add(encoded_image.qp_);
       }
     }
diff --git a/video/send_statistics_proxy.h b/video/send_statistics_proxy.h
index 875348f..b5868d0 100644
--- a/video/send_statistics_proxy.h
+++ b/video/send_statistics_proxy.h
@@ -188,10 +188,7 @@
     }
   };
   struct Frame {
-    Frame(int64_t send_ms,
-          uint32_t width,
-          uint32_t height,
-          size_t simulcast_idx)
+    Frame(int64_t send_ms, uint32_t width, uint32_t height, int simulcast_idx)
         : send_ms(send_ms),
           max_width(width),
           max_height(height),
@@ -200,7 +197,7 @@
         send_ms;          // Time when first frame with this timestamp is sent.
     uint32_t max_width;   // Max width with this timestamp.
     uint32_t max_height;  // Max height with this timestamp.
-    size_t max_simulcast_idx;  // Max simulcast index with this timestamp.
+    int max_simulcast_idx;  // Max simulcast index with this timestamp.
   };
   typedef std::map<uint32_t, Frame, TimestampOlderThan> EncodedFrameMap;
 
@@ -218,10 +215,12 @@
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   void UpdateEncoderFallbackStats(const CodecSpecificInfo* codec_info,
-                                  int pixels)
+                                  int pixels,
+                                  int simulcast_index)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
   void UpdateFallbackDisabledStats(const CodecSpecificInfo* codec_info,
-                                   int pixels)
+                                   int pixels,
+                                   int simulcast_index)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
   Clock* const clock_;
@@ -257,7 +256,7 @@
     void InitializeBitrateCounters(const VideoSendStream::Stats& stats);
 
     bool InsertEncodedFrame(const EncodedImage& encoded_frame,
-                            size_t simulcast_idx,
+                            int simulcast_idx,
                             bool* is_limited_in_resolution);
     void RemoveOld(int64_t now_ms, bool* is_limited_in_resolution);
 
diff --git a/video/send_statistics_proxy_unittest.cc b/video/send_statistics_proxy_unittest.cc
index d4a7876..4dbfc03 100644
--- a/video/send_statistics_proxy_unittest.cc
+++ b/video/send_statistics_proxy_unittest.cc
@@ -36,7 +36,6 @@
 const CodecSpecificInfo kDefaultCodecInfo = []() {
   CodecSpecificInfo codec_info;
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 0;
   return codec_info;
 }();
 }  // namespace
@@ -1313,10 +1312,10 @@
   codec_info.codecType = kVideoCodecVP8;
 
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    codec_info.codecSpecific.VP8.simulcastIdx = 0;
+    encoded_image.SetSpatialIndex(0);
     encoded_image.qp_ = kQpIdx0;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    codec_info.codecSpecific.VP8.simulcastIdx = 1;
+    encoded_image.SetSpatialIndex(1);
     encoded_image.qp_ = kQpIdx1;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
   }
@@ -1338,7 +1337,7 @@
   codec_info.codecType = kVideoCodecVP8;
 
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    codec_info.codecSpecific.VP8.simulcastIdx = 0;
+    encoded_image.SetSpatialIndex(0);
     encoded_image.qp_ = kQpIdx0;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
   }
@@ -1355,10 +1354,10 @@
 
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
     encoded_image.qp_ = kQpIdx0;
-    codec_info.codecSpecific.VP9.spatial_idx = 0;
+    encoded_image.SetSpatialIndex(0);
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
     encoded_image.qp_ = kQpIdx1;
-    codec_info.codecSpecific.VP9.spatial_idx = 1;
+    encoded_image.SetSpatialIndex(1);
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
   }
   statistics_proxy_.reset();
@@ -1381,7 +1380,6 @@
 
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
     encoded_image.qp_ = kQpIdx0;
-    codec_info.codecSpecific.VP9.spatial_idx = 0;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
   }
   statistics_proxy_.reset();
@@ -1395,10 +1393,10 @@
   codec_info.codecType = kVideoCodecH264;
 
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    codec_info.codecSpecific.H264.simulcast_idx = 0;
+    encoded_image.SetSpatialIndex(0);
     encoded_image.qp_ = kQpIdx0;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    codec_info.codecSpecific.H264.simulcast_idx = 1;
+    encoded_image.SetSpatialIndex(1);
     encoded_image.qp_ = kQpIdx1;
     statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
   }
@@ -1540,6 +1538,7 @@
       VideoStreamEncoderObserver::AdaptationReason::kNone, cpu_counts,
       quality_counts);
   EncodedImage encoded_image;
+  encoded_image.SetSpatialIndex(0);
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
     statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
 
@@ -1560,6 +1559,7 @@
       VideoStreamEncoderObserver::AdaptationReason::kNone, cpu_counts,
       quality_counts);
   EncodedImage encoded_image;
+  encoded_image.SetSpatialIndex(0);
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
     statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
 
@@ -1584,6 +1584,7 @@
       VideoStreamEncoderObserver::AdaptationReason::kNone, cpu_counts,
       quality_counts);
   EncodedImage encoded_image;
+  encoded_image.SetSpatialIndex(0);
   for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
     statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
   // Histograms are updated when the statistics_proxy_ is deleted.
@@ -1717,13 +1718,13 @@
   EncodedImage encoded_image;
   encoded_image._encodedWidth = kEncodedWidth;
   encoded_image._encodedHeight = kEncodedHeight;
+  encoded_image.SetSpatialIndex(0);
 
   CodecSpecificInfo codec_info;
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 0;
 
   statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-  codec_info.codecSpecific.VP8.simulcastIdx = 1;
+  encoded_image.SetSpatialIndex(1);
   statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
 
   VideoSendStream::Stats stats = statistics_proxy_->GetStats();
@@ -1746,7 +1747,6 @@
 
   // Report stats for second SSRC to make sure it's not outdated along with the
   // first SSRC.
-  codec_info.codecSpecific.VP8.simulcastIdx = 1;
   statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
 
   // Forward 1 ms, reach timeout, substream 0 should have no resolution
@@ -1765,13 +1765,13 @@
   EncodedImage encoded_image;
   encoded_image._encodedWidth = kEncodedWidth;
   encoded_image._encodedHeight = kEncodedHeight;
+  encoded_image.SetSpatialIndex(0);
 
   CodecSpecificInfo codec_info;
   codec_info.codecType = kVideoCodecVP8;
-  codec_info.codecSpecific.VP8.simulcastIdx = 0;
 
   statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-  codec_info.codecSpecific.VP8.simulcastIdx = 1;
+  encoded_image.SetSpatialIndex(1);
   statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
 
   statistics_proxy_->OnInactiveSsrc(config_.rtp.ssrcs[1]);
@@ -2182,11 +2182,11 @@
   explicit ForcedFallbackTest(const std::string& field_trials)
       : SendStatisticsProxyTest(field_trials) {
     codec_info_.codecType = kVideoCodecVP8;
-    codec_info_.codecSpecific.VP8.simulcastIdx = 0;
     codec_info_.codecSpecific.VP8.temporalIdx = 0;
     codec_info_.codec_name = "fake_codec";
     encoded_image_._encodedWidth = kWidth;
     encoded_image_._encodedHeight = kHeight;
+    encoded_image_.SetSpatialIndex(0);
   }
 
   ~ForcedFallbackTest() override {}
@@ -2260,7 +2260,7 @@
 }
 
 TEST_F(ForcedFallbackEnabled, StatsNotUpdatedForSimulcast) {
-  codec_info_.codecSpecific.VP8.simulcastIdx = 1;
+  encoded_image_.SetSpatialIndex(1);
   InsertEncodedFrames(kMinFrames, kFrameIntervalMs);
   statistics_proxy_.reset();
   EXPECT_EQ(0, metrics::NumSamples(kPrefix + "FallbackTimeInPercent.Vp8"));
diff --git a/video/video_receive_stream.cc b/video/video_receive_stream.cc
index 477bf40..20dc9fb 100644
--- a/video/video_receive_stream.cc
+++ b/video/video_receive_stream.cc
@@ -321,10 +321,6 @@
     const CodecSpecificInfo* codec_specific_info,
     const RTPFragmentationHeader* fragmentation) {
   stats_proxy_.OnPreDecode(encoded_image, codec_specific_info);
-  size_t simulcast_idx = 0;
-  if (codec_specific_info->codecType == kVideoCodecVP8) {
-    simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
-  }
   {
     rtc::CritScope lock(&ivf_writer_lock_);
     if (ivf_writer_.get()) {
diff --git a/video/video_send_stream_impl.cc b/video/video_send_stream_impl.cc
index cf4bf87..822b951 100644
--- a/video/video_send_stream_impl.cc
+++ b/video/video_send_stream_impl.cc
@@ -522,11 +522,13 @@
   // Encoded is called on whatever thread the real encoder implementation run
   // on. In the case of hardware encoders, there might be several encoders
   // running in parallel on different threads.
-  size_t simulcast_idx = 0;
-  if (codec_specific_info->codecType == kVideoCodecVP8) {
-    simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
-  }
+  const size_t simulcast_idx =
+      (codec_specific_info->codecType != kVideoCodecVP9)
+          ? encoded_image.SpatialIndex().value_or(0)
+          : 0;
   if (config_->post_encode_callback) {
+    // TODO(nisse): Delete webrtc::EncodedFrame class, pass EncodedImage
+    // instead.
     config_->post_encode_callback->EncodedFrameCallback(EncodedFrame(
         encoded_image._buffer, encoded_image._length, encoded_image._frameType,
         simulcast_idx, encoded_image.Timestamp()));
@@ -542,15 +544,10 @@
   EncodedImageCallback::Result result = rtp_video_sender_->OnEncodedImage(
       encoded_image, codec_specific_info, fragmentation);
 
-  RTC_DCHECK(codec_specific_info);
-
-  int layer = codec_specific_info->codecType == kVideoCodecVP8
-                  ? codec_specific_info->codecSpecific.VP8.simulcastIdx
-                  : 0;
   {
     rtc::CritScope lock(&ivf_writers_crit_);
-    if (file_writers_[layer].get()) {
-      bool ok = file_writers_[layer]->WriteFrame(
+    if (file_writers_[simulcast_idx].get()) {
+      bool ok = file_writers_[simulcast_idx]->WriteFrame(
           encoded_image, codec_specific_info->codecType);
       RTC_DCHECK(ok);
     }
diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc
index 3351fec..88482e3 100644
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@@ -3055,10 +3055,10 @@
       encoded.capture_time_ms_ = input_image.render_time_ms();
 
       for (size_t i = 0; i < kNumStreams; ++i) {
-        specifics.codecSpecific.generic.simulcast_idx = static_cast<uint8_t>(i);
         encoded._frameType = (*frame_types)[i];
         encoded._encodedWidth = kEncodedResolution[i].width;
         encoded._encodedHeight = kEncodedResolution[i].height;
+        encoded.SetSpatialIndex(i);
         EncodedImageCallback* callback;
         {
           rtc::CritScope cs(&crit_sect_);