Reland "Copy video frames metadata between encoded and plain frames in one place"

Reland with fixes.

Currently some video frames metadata like rotation or ntp timestamps are
copied in every encoder and decoder separately. This CL makes copying to
happen at a single place for send or receive side. This will make it
easier to add new metadata in the future.

Also, added some missing tests.

Original Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133346

Bug: webrtc:10460
Change-Id: I98629589fa55ca1d74056033cf86faccfdf848cd
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/136582
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27930}
diff --git a/media/base/fake_video_renderer.cc b/media/base/fake_video_renderer.cc
index 64de624..4253ba4 100644
--- a/media/base/fake_video_renderer.cc
+++ b/media/base/fake_video_renderer.cc
@@ -28,6 +28,13 @@
   height_ = frame.height();
   rotation_ = frame.rotation();
   timestamp_us_ = frame.timestamp_us();
+  ntp_timestamp_ms_ = frame.ntp_time_ms();
+  color_space_ = frame.color_space();
+  frame_rendered_event_.Set();
+}
+
+bool FakeVideoRenderer::WaitForRenderedFrame(int64_t timeout_ms) {
+  return frame_rendered_event_.Wait(timeout_ms);
 }
 
 }  // namespace cricket
diff --git a/media/base/fake_video_renderer.h b/media/base/fake_video_renderer.h
index 171c0e3..e04bb3e 100644
--- a/media/base/fake_video_renderer.h
+++ b/media/base/fake_video_renderer.h
@@ -19,6 +19,7 @@
 #include "api/video/video_rotation.h"
 #include "api/video/video_sink_interface.h"
 #include "rtc_base/critical_section.h"
+#include "rtc_base/event.h"
 
 namespace cricket {
 
@@ -30,6 +31,7 @@
   void OnFrame(const webrtc::VideoFrame& frame) override;
 
   int errors() const { return errors_; }
+
   int width() const {
     rtc::CritScope cs(&crit_);
     return width_;
@@ -38,6 +40,7 @@
     rtc::CritScope cs(&crit_);
     return height_;
   }
+
   webrtc::VideoRotation rotation() const {
     rtc::CritScope cs(&crit_);
     return rotation_;
@@ -47,15 +50,29 @@
     rtc::CritScope cs(&crit_);
     return timestamp_us_;
   }
+
   int num_rendered_frames() const {
     rtc::CritScope cs(&crit_);
     return num_rendered_frames_;
   }
+
   bool black_frame() const {
     rtc::CritScope cs(&crit_);
     return black_frame_;
   }
 
+  int64_t ntp_time_ms() const {
+    rtc::CritScope cs(&crit_);
+    return ntp_timestamp_ms_;
+  }
+
+  absl::optional<webrtc::ColorSpace> color_space() const {
+    rtc::CritScope cs(&crit_);
+    return color_space_;
+  }
+
+  bool WaitForRenderedFrame(int64_t timeout_ms);
+
  private:
   static bool CheckFrameColorYuv(uint8_t y_min,
                                  uint8_t y_max,
@@ -116,8 +133,11 @@
   webrtc::VideoRotation rotation_ = webrtc::kVideoRotation_0;
   int64_t timestamp_us_ = 0;
   int num_rendered_frames_ = 0;
+  int64_t ntp_timestamp_ms_ = 0;
   bool black_frame_ = false;
   rtc::CriticalSection crit_;
+  rtc::Event frame_rendered_event_;
+  absl::optional<webrtc::ColorSpace> color_space_;
 };
 
 }  // namespace cricket
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index baa78f4..a6fa96c 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -781,6 +781,7 @@
       "decoding_state_unittest.cc",
       "fec_controller_unittest.cc",
       "frame_buffer2_unittest.cc",
+      "generic_decoder_unittest.cc",
       "h264_sprop_parameter_sets_unittest.cc",
       "h264_sps_pps_tracker_unittest.cc",
       "histogram_unittest.cc",
@@ -840,6 +841,7 @@
       "../../api:scoped_refptr",
       "../../api:simulcast_test_fixture_api",
       "../../api:videocodec_test_fixture_api",
+      "../../api/task_queue:default_task_queue_factory",
       "../../api/test/video:function_video_factory",
       "../../api/video:builtin_video_bitrate_allocator_factory",
       "../../api/video:video_bitrate_allocation",
@@ -850,6 +852,7 @@
       "../../api/video_codecs:video_codecs_api",
       "../../api/video_codecs:vp8_temporal_layers_factory",
       "../../common_video",
+      "../../common_video/test:utilities",
       "../../media:rtc_media_base",
       "../../rtc_base",
       "../../rtc_base:checks",
@@ -863,6 +866,7 @@
       "../../system_wrappers:event_wrapper",
       "../../system_wrappers:field_trial",
       "../../system_wrappers:metrics",
+      "../../test:fake_video_codecs",
       "../../test:field_trial",
       "../../test:fileutils",
       "../../test:test_common",
diff --git a/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc b/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
index 3654ed5..4af0ebb 100644
--- a/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
+++ b/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
@@ -19,7 +19,6 @@
 #include "api/video_codecs/video_decoder.h"
 #include "api/video_codecs/video_encoder.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
-#include "common_video/test/utilities.h"
 #include "media/base/codec.h"
 #include "media/base/media_constants.h"
 #include "modules/video_coding/codecs/h264/include/h264.h"
@@ -49,17 +48,9 @@
 #ifdef WEBRTC_USE_H264
 #define MAYBE_EncodeDecode EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DecodedQpEqualsEncodedQp
-#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
-  EncodedColorSpaceEqualsInputColorSpace
-#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
-  DecodedColorSpaceEqualsEncodedColorSpace
 #else
 #define MAYBE_EncodeDecode DISABLED_EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DISABLED_DecodedQpEqualsEncodedQp
-#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
-  DISABLED_EncodedColorSpaceEqualsInputColorSpace
-#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
-  DISABLED_DecodedColorSpaceEqualsEncodedColorSpace
 #endif
 
 TEST_F(TestH264Impl, MAYBE_EncodeDecode) {
@@ -105,45 +96,4 @@
   EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }
 
-TEST_F(TestH264Impl, MAYBE_EncodedColorSpaceEqualsInputColorSpace) {
-  VideoFrame* input_frame = NextInputFrame();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  VideoFrame input_frame_w_color_space =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(input_frame_w_color_space, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
-TEST_F(TestH264Impl, MAYBE_DecodedColorSpaceEqualsEncodedColorSpace) {
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(*NextInputFrame(), nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  // Add color space to encoded frame.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
-  std::unique_ptr<VideoFrame> decoded_frame;
-  absl::optional<uint8_t> decoded_qp;
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
-}
-
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
index a2597ef..eb6b89f 100644
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@@ -227,51 +227,10 @@
   EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
 
   EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp());
-  EXPECT_EQ(kInitialTimestampMs, encoded_frame.capture_time_ms_);
   EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth));
   EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight));
 }
 
-// We only test the encoder here, since the decoded frame rotation is set based
-// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
-// TODO(brandtr): Consider passing through the rotation flag through the decoder
-// in the same way as done in the encoder.
-TEST_F(TestVp8Impl, EncodedRotationEqualsInputRotation) {
-  VideoFrame* input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_0);
-
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
-
-  input_frame->set_rotation(kVideoRotation_90);
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
-}
-
-TEST_F(TestVp8Impl, EncodedColorSpaceEqualsInputColorSpace) {
-  // Video frame without explicit color space information.
-  VideoFrame* input_frame = NextInputFrame();
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  VideoFrame input_frame_w_color_space =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-
-  EncodeAndWaitForFrame(input_frame_w_color_space, &encoded_frame,
-                        &codec_specific_info);
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
 TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) {
   VideoFrame* input_frame = NextInputFrame();
   EncodedImage encoded_frame;
@@ -290,24 +249,6 @@
   EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }
 
-TEST_F(TestVp8Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
-  VideoFrame* input_frame = NextInputFrame();
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-
-  // Encoded frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1));
-  std::unique_ptr<VideoFrame> decoded_frame;
-  absl::optional<uint8_t> decoded_qp;
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
-}
-
 TEST_F(TestVp8Impl, ChecksSimulcastSettings) {
   codec_settings_.numberOfSimulcastStreams = 2;
   // Resolutions are not in ascending order, temporal layers do not match.
@@ -402,7 +343,6 @@
   // Compute PSNR on all planes (faster than SSIM).
   EXPECT_GT(I420PSNR(input_frame, decoded_frame.get()), 36);
   EXPECT_EQ(kInitialTimestampRtp, decoded_frame->timestamp());
-  EXPECT_EQ(kTestNtpTimeMs, decoded_frame->ntp_time_ms());
 }
 
 #if defined(WEBRTC_ANDROID)
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 95ba266..309dac1 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -11,7 +11,6 @@
 #include "api/video/color_space.h"
 #include "api/video/i420_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
-#include "common_video/test/utilities.h"
 #include "media/base/vp9_profile.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/video_coding/codecs/test/video_codec_unittest.h"
@@ -146,50 +145,7 @@
             color_space.chroma_siting_vertical());
 }
 
-// We only test the encoder here, since the decoded frame rotation is set based
-// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
-// TODO(brandtr): Consider passing through the rotation flag through the decoder
-// in the same way as done in the encoder.
-TEST_F(TestVp9Impl, EncodedRotationEqualsInputRotation) {
-  VideoFrame* input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_0);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
-
-  input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_90);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
-}
-
-TEST_F(TestVp9Impl, EncodedColorSpaceEqualsInputColorSpace) {
-  // Video frame without explicit color space information.
-  VideoFrame* input_frame = NextInputFrame();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  VideoFrame input_frame_w_hdr =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(input_frame_w_hdr, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
-TEST_F(TestVp9Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
+TEST_F(TestVp9Impl, DecodedColorSpaceFromBitstream) {
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*NextInputFrame(), nullptr));
   EncodedImage encoded_frame;
@@ -206,15 +162,6 @@
   ASSERT_TRUE(decoded_frame->color_space());
   // No HDR metadata present.
   EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata());
-
-  // Encoded frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
 }
 
 TEST_F(TestVp9Impl, DecodedQpEqualsEncodedQp) {
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index 94da40f..a6bb55b 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -52,8 +52,10 @@
     return static_cast<const webrtc::EncodedImage&>(*this);
   }
 
+  using EncodedImage::ColorSpace;
   using EncodedImage::data;
   using EncodedImage::set_size;
+  using EncodedImage::SetColorSpace;
   using EncodedImage::SetSpatialIndex;
   using EncodedImage::SetTimestamp;
   using EncodedImage::size;
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index cf986d6..cb6c819 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -80,6 +80,12 @@
     return;
   }
 
+  decodedImage.set_ntp_time_ms(frameInfo->ntp_time_ms);
+  if (frameInfo->color_space) {
+    decodedImage.set_color_space(frameInfo->color_space);
+  }
+  decodedImage.set_rotation(frameInfo->rotation);
+
   const int64_t now_ms = _clock->TimeInMilliseconds();
   if (!decode_time_ms) {
     decode_time_ms = now_ms - frameInfo->decodeStartTimeMs;
@@ -140,7 +146,6 @@
 
   decodedImage.set_timestamp_us(frameInfo->renderTimeMs *
                                 rtc::kNumMicrosecsPerMillisec);
-  decodedImage.set_rotation(frameInfo->rotation);
   _receiveCallback->FrameToRender(decodedImage, qp, frameInfo->content_type);
 }
 
@@ -199,6 +204,14 @@
   _frameInfos[_nextFrameInfoIdx].renderTimeMs = frame.RenderTimeMs();
   _frameInfos[_nextFrameInfoIdx].rotation = frame.rotation();
   _frameInfos[_nextFrameInfoIdx].timing = frame.video_timing();
+  _frameInfos[_nextFrameInfoIdx].ntp_time_ms =
+      frame.EncodedImage().ntp_time_ms_;
+  if (frame.ColorSpace()) {
+    _frameInfos[_nextFrameInfoIdx].color_space = *frame.ColorSpace();
+  } else {
+    _frameInfos[_nextFrameInfoIdx].color_space = absl::nullopt;
+  }
+
   // Set correctly only for key frames. Thus, use latest key frame
   // content type. If the corresponding key frame was lost, decode will fail
   // and content type will be ignored.
diff --git a/modules/video_coding/generic_decoder.h b/modules/video_coding/generic_decoder.h
index 36428fb..97336b1 100644
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@@ -34,6 +34,8 @@
   VideoRotation rotation;
   VideoContentType content_type;
   EncodedImage::Timing timing;
+  int64_t ntp_time_ms;
+  absl::optional<ColorSpace> color_space;
 };
 
 class VCMDecodedFrameCallback : public DecodedImageCallback {
diff --git a/modules/video_coding/generic_decoder_unittest.cc b/modules/video_coding/generic_decoder_unittest.cc
new file mode 100644
index 0000000..adc945c
--- /dev/null
+++ b/modules/video_coding/generic_decoder_unittest.cc
@@ -0,0 +1,126 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/video_coding/generic_decoder.h"
+
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/task_queue/default_task_queue_factory.h"
+#include "common_video/test/utilities.h"
+#include "modules/video_coding/timing.h"
+#include "rtc_base/critical_section.h"
+#include "rtc_base/event.h"
+#include "system_wrappers/include/clock.h"
+#include "test/fake_decoder.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace video_coding {
+
+class ReceiveCallback : public VCMReceiveCallback {
+ public:
+  int32_t FrameToRender(VideoFrame& videoFrame,  // NOLINT
+                        absl::optional<uint8_t> qp,
+                        VideoContentType content_type) override {
+    {
+      rtc::CritScope cs(&lock_);
+      last_frame_ = videoFrame;
+    }
+    received_frame_event_.Set();
+    return 0;
+  }
+
+  absl::optional<VideoFrame> GetLastFrame() {
+    rtc::CritScope cs(&lock_);
+    return last_frame_;
+  }
+
+  absl::optional<VideoFrame> WaitForFrame(int64_t wait_ms) {
+    if (received_frame_event_.Wait(wait_ms)) {
+      rtc::CritScope cs(&lock_);
+      return last_frame_;
+    } else {
+      return absl::nullopt;
+    }
+  }
+
+ private:
+  rtc::CriticalSection lock_;
+  rtc::Event received_frame_event_;
+  absl::optional<VideoFrame> last_frame_ RTC_GUARDED_BY(lock_);
+};
+
+class GenericDecoderTest : public ::testing::Test {
+ protected:
+  GenericDecoderTest()
+      : clock_(0),
+        timing_(&clock_),
+        task_queue_factory_(CreateDefaultTaskQueueFactory()),
+        decoder_(task_queue_factory_.get()),
+        vcm_callback_(&timing_, &clock_),
+        generic_decoder_(&decoder_, /*isExternal=*/true) {}
+
+  void SetUp() override {
+    generic_decoder_.RegisterDecodeCompleteCallback(&vcm_callback_);
+    vcm_callback_.SetUserReceiveCallback(&user_callback_);
+    VideoCodec settings;
+    settings.codecType = kVideoCodecVP8;
+    settings.width = 10;
+    settings.height = 10;
+    generic_decoder_.InitDecode(&settings, /*numberOfCores=*/4);
+  }
+
+  SimulatedClock clock_;
+  VCMTiming timing_;
+  std::unique_ptr<TaskQueueFactory> task_queue_factory_;
+  webrtc::test::FakeDecoder decoder_;
+  VCMDecodedFrameCallback vcm_callback_;
+  VCMGenericDecoder generic_decoder_;
+  ReceiveCallback user_callback_;
+};
+
+TEST_F(GenericDecoderTest, PassesColorSpace) {
+  webrtc::ColorSpace color_space =
+      CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  VCMEncodedFrame encoded_frame;
+  encoded_frame.SetColorSpace(color_space);
+  generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(10);
+  ASSERT_TRUE(decoded_frame.has_value());
+  absl::optional<webrtc::ColorSpace> decoded_color_space =
+      decoded_frame->color_space();
+  ASSERT_TRUE(decoded_color_space.has_value());
+  EXPECT_EQ(*decoded_color_space, color_space);
+}
+
+TEST_F(GenericDecoderTest, PassesColorSpaceForDelayedDecoders) {
+  webrtc::ColorSpace color_space =
+      CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  decoder_.SetDelayedDecoding(100);
+
+  {
+    // Ensure the original frame is destroyed before the decoding is completed.
+    VCMEncodedFrame encoded_frame;
+    encoded_frame.SetColorSpace(color_space);
+    generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  }
+
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(200);
+  ASSERT_TRUE(decoded_frame.has_value());
+  absl::optional<webrtc::ColorSpace> decoded_color_space =
+      decoded_frame->color_space();
+  ASSERT_TRUE(decoded_color_space.has_value());
+  EXPECT_EQ(*decoded_color_space, color_space);
+}
+
+}  // namespace video_coding
+}  // namespace webrtc
diff --git a/test/fake_decoder.cc b/test/fake_decoder.cc
index 10ac851..c5ba231 100644
--- a/test/fake_decoder.cc
+++ b/test/fake_decoder.cc
@@ -12,6 +12,7 @@
 
 #include <string.h>
 
+#include "absl/memory/memory.h"
 #include "api/scoped_refptr.h"
 #include "api/video/i420_buffer.h"
 #include "api/video/video_frame.h"
@@ -19,6 +20,7 @@
 #include "api/video/video_rotation.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "rtc_base/checks.h"
+#include "rtc_base/task_queue.h"
 #include "rtc_base/time_utils.h"
 
 namespace webrtc {
@@ -29,8 +31,14 @@
 const int kDefaultHeight = 180;
 }  // namespace
 
-FakeDecoder::FakeDecoder()
-    : callback_(NULL), width_(kDefaultWidth), height_(kDefaultHeight) {}
+FakeDecoder::FakeDecoder() : FakeDecoder(nullptr) {}
+
+FakeDecoder::FakeDecoder(TaskQueueFactory* task_queue_factory)
+    : callback_(nullptr),
+      width_(kDefaultWidth),
+      height_(kDefaultHeight),
+      task_queue_factory_(task_queue_factory),
+      decode_delay_ms_(0) {}
 
 int32_t FakeDecoder::InitDecode(const VideoCodec* config,
                                 int32_t number_of_cores) {
@@ -45,20 +53,40 @@
     height_ = input._encodedHeight;
   }
 
-  VideoFrame frame =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(I420Buffer::Create(width_, height_))
-          .set_rotation(webrtc::kVideoRotation_0)
-          .set_timestamp_ms(render_time_ms)
-          .build();
+  rtc::scoped_refptr<I420Buffer> buffer = I420Buffer::Create(width_, height_);
+  I420Buffer::SetBlack(buffer);
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_video_frame_buffer(buffer)
+                         .set_rotation(webrtc::kVideoRotation_0)
+                         .set_timestamp_ms(render_time_ms)
+                         .build();
   frame.set_timestamp(input.Timestamp());
   frame.set_ntp_time_ms(input.ntp_time_ms_);
 
-  callback_->Decoded(frame);
+  if (decode_delay_ms_ == 0 || !task_queue_) {
+    callback_->Decoded(frame);
+  } else {
+    task_queue_->PostDelayedTask(
+        [frame, this]() {
+          VideoFrame copy = frame;
+          callback_->Decoded(copy);
+        },
+        decode_delay_ms_);
+  }
 
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
+void FakeDecoder::SetDelayedDecoding(int decode_delay_ms) {
+  RTC_CHECK(task_queue_factory_);
+  if (!task_queue_) {
+    task_queue_ =
+        absl::make_unique<rtc::TaskQueue>(task_queue_factory_->CreateTaskQueue(
+            "fake_decoder", TaskQueueFactory::Priority::NORMAL));
+  }
+  decode_delay_ms_ = decode_delay_ms;
+}
+
 int32_t FakeDecoder::RegisterDecodeCompleteCallback(
     DecodedImageCallback* callback) {
   callback_ = callback;
diff --git a/test/fake_decoder.h b/test/fake_decoder.h
index 8fe7427..055c55b 100644
--- a/test/fake_decoder.h
+++ b/test/fake_decoder.h
@@ -13,10 +13,12 @@
 
 #include <stdint.h>
 
+#include "api/task_queue/task_queue_factory.h"
 #include "api/video/encoded_image.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_decoder.h"
 #include "modules/video_coding/include/video_codec_interface.h"
+#include "rtc_base/task_queue.h"
 
 namespace webrtc {
 namespace test {
@@ -24,6 +26,7 @@
 class FakeDecoder : public VideoDecoder {
  public:
   FakeDecoder();
+  explicit FakeDecoder(TaskQueueFactory* task_queue_factory);
   virtual ~FakeDecoder() {}
 
   int32_t InitDecode(const VideoCodec* config,
@@ -42,10 +45,15 @@
 
   static const char* kImplementationName;
 
+  void SetDelayedDecoding(int decode_delay_ms);
+
  private:
   DecodedImageCallback* callback_;
   int width_;
   int height_;
+  std::unique_ptr<rtc::TaskQueue> task_queue_;
+  TaskQueueFactory* task_queue_factory_;
+  int decode_delay_ms_;
 };
 
 class FakeH264Decoder : public FakeDecoder {
diff --git a/video/BUILD.gn b/video/BUILD.gn
index 02464d5..c29ae55 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -171,8 +171,8 @@
     "encoder_bitrate_adjuster.h",
     "encoder_overshoot_detector.cc",
     "encoder_overshoot_detector.h",
-    "frame_encode_timer.cc",
-    "frame_encode_timer.h",
+    "frame_encode_metadata_writer.cc",
+    "frame_encode_metadata_writer.h",
     "overuse_frame_detector.cc",
     "overuse_frame_detector.h",
     "video_stream_encoder.cc",
@@ -492,7 +492,7 @@
       "end_to_end_tests/ssrc_tests.cc",
       "end_to_end_tests/stats_tests.cc",
       "end_to_end_tests/transport_feedback_tests.cc",
-      "frame_encode_timer_unittest.cc",
+      "frame_encode_metadata_writer_unittest.cc",
       "overuse_frame_detector_unittest.cc",
       "picture_id_tests.cc",
       "quality_scaling_tests.cc",
diff --git a/video/frame_encode_timer.cc b/video/frame_encode_metadata_writer.cc
similarity index 72%
rename from video/frame_encode_timer.cc
rename to video/frame_encode_metadata_writer.cc
index 42002f7..4b5fabb 100644
--- a/video/frame_encode_timer.cc
+++ b/video/frame_encode_metadata_writer.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "video/frame_encode_timer.h"
+#include "video/frame_encode_metadata_writer.h"
 
 #include <algorithm>
 
@@ -23,29 +23,31 @@
 const int kThrottleRatio = 100000;
 }  // namespace
 
-FrameEncodeTimer::TimingFramesLayerInfo::TimingFramesLayerInfo() = default;
-FrameEncodeTimer::TimingFramesLayerInfo::~TimingFramesLayerInfo() = default;
+FrameEncodeMetadataWriter::TimingFramesLayerInfo::TimingFramesLayerInfo() =
+    default;
+FrameEncodeMetadataWriter::TimingFramesLayerInfo::~TimingFramesLayerInfo() =
+    default;
 
-FrameEncodeTimer::FrameEncodeTimer(EncodedImageCallback* frame_drop_callback)
+FrameEncodeMetadataWriter::FrameEncodeMetadataWriter(
+    EncodedImageCallback* frame_drop_callback)
     : frame_drop_callback_(frame_drop_callback),
       internal_source_(false),
       framerate_fps_(0),
       last_timing_frame_time_ms_(-1),
-      incorrect_capture_time_logged_messages_(0),
       reordered_frames_logged_messages_(0),
       stalled_encoder_logged_messages_(0) {
   codec_settings_.timing_frame_thresholds = {-1, 0};
 }
-FrameEncodeTimer::~FrameEncodeTimer() {}
+FrameEncodeMetadataWriter::~FrameEncodeMetadataWriter() {}
 
-void FrameEncodeTimer::OnEncoderInit(const VideoCodec& codec,
-                                     bool internal_source) {
+void FrameEncodeMetadataWriter::OnEncoderInit(const VideoCodec& codec,
+                                              bool internal_source) {
   rtc::CritScope cs(&lock_);
   codec_settings_ = codec;
   internal_source_ = internal_source;
 }
 
-void FrameEncodeTimer::OnSetRates(
+void FrameEncodeMetadataWriter::OnSetRates(
     const VideoBitrateAllocation& bitrate_allocation,
     uint32_t framerate_fps) {
   rtc::CritScope cs(&lock_);
@@ -60,8 +62,7 @@
   }
 }
 
-void FrameEncodeTimer::OnEncodeStarted(uint32_t rtp_timestamp,
-                                       int64_t capture_time_ms) {
+void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) {
   rtc::CritScope cs(&lock_);
   if (internal_source_) {
     return;
@@ -69,19 +70,24 @@
 
   const size_t num_spatial_layers = NumSpatialLayers();
   timing_frames_info_.resize(num_spatial_layers);
+  FrameMetadata metadata;
+  metadata.rtp_timestamp = frame.timestamp();
+  metadata.encode_start_time_ms = rtc::TimeMillis();
+  metadata.ntp_time_ms = frame.ntp_time_ms();
+  metadata.timestamp_us = frame.timestamp_us();
+  metadata.rotation = frame.rotation();
+  metadata.color_space = frame.color_space();
   for (size_t si = 0; si < num_spatial_layers; ++si) {
-    RTC_DCHECK(
-        timing_frames_info_[si].encode_start_list.empty() ||
-        rtc::TimeDiff(
-            capture_time_ms,
-            timing_frames_info_[si].encode_start_list.back().capture_time_ms) >=
-            0);
+    RTC_DCHECK(timing_frames_info_[si].frames.empty() ||
+               rtc::TimeDiff(
+                   frame.render_time_ms(),
+                   timing_frames_info_[si].frames.back().timestamp_us / 1000) >=
+                   0);
     // If stream is disabled due to low bandwidth OnEncodeStarted still will be
     // called and have to be ignored.
     if (timing_frames_info_[si].target_bitrate_bytes_per_sec == 0)
       return;
-    if (timing_frames_info_[si].encode_start_list.size() ==
-        kMaxEncodeStartTimeListSize) {
+    if (timing_frames_info_[si].frames.size() == kMaxEncodeStartTimeListSize) {
       ++stalled_encoder_logged_messages_;
       if (stalled_encoder_logged_messages_ <= kMessagesThrottlingThreshold ||
           stalled_encoder_logged_messages_ % kThrottleRatio == 0) {
@@ -95,25 +101,26 @@
       }
       frame_drop_callback_->OnDroppedFrame(
           EncodedImageCallback::DropReason::kDroppedByEncoder);
-      timing_frames_info_[si].encode_start_list.pop_front();
+      timing_frames_info_[si].frames.pop_front();
     }
-    timing_frames_info_[si].encode_start_list.emplace_back(
-        rtp_timestamp, capture_time_ms, rtc::TimeMillis());
+    timing_frames_info_[si].frames.emplace_back(metadata);
   }
 }
 
-void FrameEncodeTimer::FillTimingInfo(size_t simulcast_svc_idx,
-                                      EncodedImage* encoded_image,
-                                      int64_t encode_done_ms) {
+void FrameEncodeMetadataWriter::FillTimingInfo(size_t simulcast_svc_idx,
+                                               EncodedImage* encoded_image) {
   rtc::CritScope cs(&lock_);
   absl::optional<size_t> outlier_frame_size;
   absl::optional<int64_t> encode_start_ms;
   uint8_t timing_flags = VideoSendTiming::kNotTriggered;
 
+  int64_t encode_done_ms = rtc::TimeMillis();
+
   // Encoders with internal sources do not call OnEncodeStarted
   // |timing_frames_info_| may be not filled here.
   if (!internal_source_) {
-    encode_start_ms = ExtractEncodeStartTime(simulcast_svc_idx, encoded_image);
+    encode_start_ms =
+        ExtractEncodeStartTimeAndFillMetadata(simulcast_svc_idx, encoded_image);
   }
 
   if (timing_frames_info_.size() > simulcast_svc_idx) {
@@ -176,7 +183,7 @@
   }
 }
 
-void FrameEncodeTimer::Reset() {
+void FrameEncodeMetadataWriter::Reset() {
   rtc::CritScope cs(&lock_);
   timing_frames_info_.clear();
   last_timing_frame_time_ms_ = -1;
@@ -184,48 +191,40 @@
   stalled_encoder_logged_messages_ = 0;
 }
 
-absl::optional<int64_t> FrameEncodeTimer::ExtractEncodeStartTime(
+absl::optional<int64_t>
+FrameEncodeMetadataWriter::ExtractEncodeStartTimeAndFillMetadata(
     size_t simulcast_svc_idx,
     EncodedImage* encoded_image) {
   absl::optional<int64_t> result;
   size_t num_simulcast_svc_streams = timing_frames_info_.size();
   if (simulcast_svc_idx < num_simulcast_svc_streams) {
-    auto encode_start_list =
-        &timing_frames_info_[simulcast_svc_idx].encode_start_list;
+    auto metadata_list = &timing_frames_info_[simulcast_svc_idx].frames;
     // Skip frames for which there was OnEncodeStarted but no OnEncodedImage
     // call. These are dropped by encoder internally.
     // Because some hardware encoders don't preserve capture timestamp we
     // use RTP timestamps here.
-    while (!encode_start_list->empty() &&
+    while (!metadata_list->empty() &&
            IsNewerTimestamp(encoded_image->Timestamp(),
-                            encode_start_list->front().rtp_timestamp)) {
+                            metadata_list->front().rtp_timestamp)) {
       frame_drop_callback_->OnDroppedFrame(
           EncodedImageCallback::DropReason::kDroppedByEncoder);
-      encode_start_list->pop_front();
+      metadata_list->pop_front();
     }
-    if (!encode_start_list->empty() &&
-        encode_start_list->front().rtp_timestamp ==
-            encoded_image->Timestamp()) {
-      result.emplace(encode_start_list->front().encode_start_time_ms);
-      if (encoded_image->capture_time_ms_ !=
-          encode_start_list->front().capture_time_ms) {
-        // Force correct capture timestamp.
-        encoded_image->capture_time_ms_ =
-            encode_start_list->front().capture_time_ms;
-        ++incorrect_capture_time_logged_messages_;
-        if (incorrect_capture_time_logged_messages_ <=
-                kMessagesThrottlingThreshold ||
-            incorrect_capture_time_logged_messages_ % kThrottleRatio == 0) {
-          RTC_LOG(LS_WARNING)
-              << "Encoder is not preserving capture timestamps.";
-          if (incorrect_capture_time_logged_messages_ ==
-              kMessagesThrottlingThreshold) {
-            RTC_LOG(LS_WARNING) << "Too many log messages. Further incorrect "
-                                   "timestamps warnings will be throttled.";
-          }
-        }
-      }
-      encode_start_list->pop_front();
+    if (!metadata_list->empty() &&
+        metadata_list->front().rtp_timestamp == encoded_image->Timestamp()) {
+      result.emplace(metadata_list->front().encode_start_time_ms);
+
+      encoded_image->capture_time_ms_ =
+          metadata_list->front().timestamp_us / 1000;
+      encoded_image->ntp_time_ms_ = metadata_list->front().ntp_time_ms;
+      encoded_image->rotation_ = metadata_list->front().rotation;
+      encoded_image->SetColorSpace(metadata_list->front().color_space);
+      encoded_image->content_type_ =
+          (codec_settings_.mode == VideoCodecMode::kScreensharing)
+              ? VideoContentType::SCREENSHARE
+              : VideoContentType::UNSPECIFIED;
+
+      metadata_list->pop_front();
     } else {
       ++reordered_frames_logged_messages_;
       if (reordered_frames_logged_messages_ <= kMessagesThrottlingThreshold ||
@@ -243,7 +242,7 @@
   return result;
 }
 
-size_t FrameEncodeTimer::NumSpatialLayers() const {
+size_t FrameEncodeMetadataWriter::NumSpatialLayers() const {
   size_t num_spatial_layers = codec_settings_.numberOfSimulcastStreams;
   if (codec_settings_.codecType == kVideoCodecVP9) {
     num_spatial_layers = std::max(
diff --git a/video/frame_encode_timer.h b/video/frame_encode_metadata_writer.h
similarity index 63%
rename from video/frame_encode_timer.h
rename to video/frame_encode_metadata_writer.h
index f92a33b..c1ffcd9 100644
--- a/video/frame_encode_timer.h
+++ b/video/frame_encode_metadata_writer.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VIDEO_FRAME_ENCODE_TIMER_H_
-#define VIDEO_FRAME_ENCODE_TIMER_H_
+#ifndef VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
+#define VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
 
 #include <list>
 #include <vector>
@@ -22,20 +22,18 @@
 
 namespace webrtc {
 
-class FrameEncodeTimer {
+class FrameEncodeMetadataWriter {
  public:
-  explicit FrameEncodeTimer(EncodedImageCallback* frame_drop_callback);
-  ~FrameEncodeTimer();
+  explicit FrameEncodeMetadataWriter(EncodedImageCallback* frame_drop_callback);
+  ~FrameEncodeMetadataWriter();
 
   void OnEncoderInit(const VideoCodec& codec, bool internal_source);
   void OnSetRates(const VideoBitrateAllocation& bitrate_allocation,
                   uint32_t framerate_fps);
 
-  void OnEncodeStarted(uint32_t rtp_timestamp, int64_t capture_time_ms);
+  void OnEncodeStarted(const VideoFrame& frame);
 
-  void FillTimingInfo(size_t simulcast_svc_idx,
-                      EncodedImage* encoded_image,
-                      int64_t encode_done_ms);
+  void FillTimingInfo(size_t simulcast_svc_idx, EncodedImage* encoded_image);
   void Reset();
 
  private:
@@ -43,26 +41,23 @@
 
   // For non-internal-source encoders, returns encode started time and fixes
   // capture timestamp for the frame, if corrupted by the encoder.
-  absl::optional<int64_t> ExtractEncodeStartTime(size_t simulcast_svc_idx,
-                                                 EncodedImage* encoded_image)
-      RTC_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  absl::optional<int64_t> ExtractEncodeStartTimeAndFillMetadata(
+      size_t simulcast_svc_idx,
+      EncodedImage* encoded_image) RTC_EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  struct EncodeStartTimeRecord {
-    EncodeStartTimeRecord(uint32_t timestamp,
-                          int64_t capture_time,
-                          int64_t encode_start_time)
-        : rtp_timestamp(timestamp),
-          capture_time_ms(capture_time),
-          encode_start_time_ms(encode_start_time) {}
+  struct FrameMetadata {
     uint32_t rtp_timestamp;
-    int64_t capture_time_ms;
     int64_t encode_start_time_ms;
+    int64_t ntp_time_ms = 0;
+    int64_t timestamp_us = 0;
+    VideoRotation rotation = kVideoRotation_0;
+    absl::optional<ColorSpace> color_space;
   };
   struct TimingFramesLayerInfo {
     TimingFramesLayerInfo();
     ~TimingFramesLayerInfo();
     size_t target_bitrate_bytes_per_sec = 0;
-    std::list<EncodeStartTimeRecord> encode_start_list;
+    std::list<FrameMetadata> frames;
   };
 
   rtc::CriticalSection lock_;
@@ -74,11 +69,10 @@
   // Separate instance for each simulcast stream or spatial layer.
   std::vector<TimingFramesLayerInfo> timing_frames_info_ RTC_GUARDED_BY(&lock_);
   int64_t last_timing_frame_time_ms_ RTC_GUARDED_BY(&lock_);
-  size_t incorrect_capture_time_logged_messages_ RTC_GUARDED_BY(&lock_);
   size_t reordered_frames_logged_messages_ RTC_GUARDED_BY(&lock_);
   size_t stalled_encoder_logged_messages_ RTC_GUARDED_BY(&lock_);
 };
 
 }  // namespace webrtc
 
-#endif  // VIDEO_FRAME_ENCODE_TIMER_H_
+#endif  // VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
diff --git a/video/frame_encode_timer_unittest.cc b/video/frame_encode_metadata_writer_unittest.cc
similarity index 64%
rename from video/frame_encode_timer_unittest.cc
rename to video/frame_encode_metadata_writer_unittest.cc
index 12eb136..dcb870b 100644
--- a/video/frame_encode_timer_unittest.cc
+++ b/video/frame_encode_metadata_writer_unittest.cc
@@ -8,18 +8,25 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "video/frame_encode_metadata_writer.h"
+
 #include <cstddef>
 #include <vector>
 
+#include "api/video/i420_buffer.h"
+#include "api/video/video_frame.h"
 #include "api/video/video_timing.h"
+#include "common_video/test/utilities.h"
 #include "modules/video_coding/include/video_coding_defines.h"
 #include "rtc_base/time_utils.h"
 #include "test/gtest.h"
-#include "video/frame_encode_timer.h"
 
 namespace webrtc {
 namespace test {
 namespace {
+
+const rtc::scoped_refptr<I420Buffer> kFrameBuffer = I420Buffer::Create(4, 4);
+
 inline size_t FrameSize(const size_t& min_frame_size,
                         const size_t& max_frame_size,
                         const int& s,
@@ -65,7 +72,7 @@
     const int num_streams,
     const int num_frames) {
   FakeEncodedImageCallback sink;
-  FrameEncodeTimer encode_timer(&sink);
+  FrameEncodeMetadataWriter encode_timer(&sink);
   VideoCodec codec_settings;
   codec_settings.numberOfSimulcastStreams = num_streams;
   codec_settings.timing_frame_thresholds = {delay_ms,
@@ -83,8 +90,12 @@
   int64_t current_timestamp = 0;
   for (int i = 0; i < num_frames; ++i) {
     current_timestamp += 1;
-    encode_timer.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
-                                 current_timestamp);
+    VideoFrame frame = VideoFrame::Builder()
+                           .set_timestamp_rtp(current_timestamp * 90)
+                           .set_timestamp_ms(current_timestamp)
+                           .set_video_frame_buffer(kFrameBuffer)
+                           .build();
+    encode_timer.OnEncodeStarted(frame);
     for (int si = 0; si < num_streams; ++si) {
       // every (5+s)-th frame is dropped on s-th stream by design.
       bool dropped = i % (5 + si) == 0;
@@ -101,7 +112,7 @@
         continue;
       }
 
-      encode_timer.FillTimingInfo(si, &image, current_timestamp);
+      encode_timer.FillTimingInfo(si, &image);
 
       if (IsTimingFrame(image)) {
         result[si].push_back(FrameType::kTiming);
@@ -190,7 +201,7 @@
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
 
   FakeEncodedImageCallback sink;
-  FrameEncodeTimer encode_timer(&sink);
+  FrameEncodeMetadataWriter encode_timer(&sink);
   VideoCodec codec_settings;
   // Make all frames timing frames.
   codec_settings.timing_frame_thresholds.delay_ms = 1;
@@ -200,16 +211,20 @@
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   // Verify a single frame works with encode start time set.
-  encode_timer.OnEncodeStarted(static_cast<uint32_t>(timestamp * 90),
-                               timestamp);
-  encode_timer.FillTimingInfo(0, &image, timestamp);
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(timestamp)
+                         .set_timestamp_rtp(timestamp * 90)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_TRUE(IsTimingFrame(image));
 
   // New frame, now skip OnEncodeStarted. Should not result in timing frame.
   image.capture_time_ms_ = ++timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   image.timing_ = EncodedImage::Timing();
-  encode_timer.FillTimingInfo(0, &image, timestamp);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_FALSE(IsTimingFrame(image));
 }
 
@@ -226,7 +241,7 @@
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
 
   FakeEncodedImageCallback sink;
-  FrameEncodeTimer encode_timer(&sink);
+  FrameEncodeMetadataWriter encode_timer(&sink);
 
   VideoCodec codec_settings;
   // Make all frames timing frames.
@@ -238,7 +253,7 @@
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   // Verify a single frame without encode timestamps isn't a timing frame.
-  encode_timer.FillTimingInfo(0, &image, timestamp);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_FALSE(IsTimingFrame(image));
 
   // New frame, but this time with encode timestamps set in timing_.
@@ -248,14 +263,14 @@
   image.timing_ = EncodedImage::Timing();
   image.timing_.encode_start_ms = timestamp + kEncodeStartDelayMs;
   image.timing_.encode_finish_ms = timestamp + kEncodeFinishDelayMs;
-  const int64_t kEncodeDoneTimestamp = 1234567;
-  encode_timer.FillTimingInfo(0, &image, kEncodeDoneTimestamp);
+
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_TRUE(IsTimingFrame(image));
 
   // Frame is captured kEncodeFinishDelayMs before it's encoded, so restored
   // capture timestamp should be kEncodeFinishDelayMs in the past.
-  EXPECT_EQ(image.capture_time_ms_,
-            kEncodeDoneTimestamp - kEncodeFinishDelayMs);
+  EXPECT_NEAR(image.capture_time_ms_, rtc::TimeMillis() - kEncodeFinishDelayMs,
+              1);
 }
 
 TEST(FrameEncodeTimerTest, NotifiesAboutDroppedFrames) {
@@ -265,7 +280,7 @@
   const int64_t kTimestampMs4 = 47721870;
 
   FakeEncodedImageCallback sink;
-  FrameEncodeTimer encode_timer(&sink);
+  FrameEncodeMetadataWriter encode_timer(&sink);
   encode_timer.OnEncoderInit(VideoCodec(), false);
   // Any non-zero bitrate needed to be set before the first frame.
   VideoBitrateAllocation bitrate_allocation;
@@ -273,17 +288,27 @@
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   EncodedImage image;
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_rtp(kTimestampMs1 * 90)
+                         .set_timestamp_ms(kTimestampMs1)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+
   image.capture_time_ms_ = kTimestampMs1;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
-  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
+  frame.set_timestamp(image.capture_time_ms_ * 90);
+  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
+  encode_timer.OnEncodeStarted(frame);
 
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
-  encode_timer.FillTimingInfo(0, &image, kTimestampMs1);
+  encode_timer.FillTimingInfo(0, &image);
 
   image.capture_time_ms_ = kTimestampMs2;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
+  frame.set_timestamp(image.capture_time_ms_ * 90);
+  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
+  encode_timer.OnEncodeStarted(frame);
   // No OnEncodedImageCall for timestamp2. Yet, at this moment it's not known
   // that frame with timestamp2 was dropped.
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
@@ -291,15 +316,19 @@
   image.capture_time_ms_ = kTimestampMs3;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
-  encode_timer.FillTimingInfo(0, &image, kTimestampMs3);
+  frame.set_timestamp(image.capture_time_ms_ * 90);
+  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
 
   image.capture_time_ms_ = kTimestampMs4;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
-  encode_timer.FillTimingInfo(0, &image, kTimestampMs4);
+  frame.set_timestamp(image.capture_time_ms_ * 90);
+  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
 }
 
@@ -308,7 +337,7 @@
   const int64_t kTimestampMs = 123456;
   FakeEncodedImageCallback sink;
 
-  FrameEncodeTimer encode_timer(&sink);
+  FrameEncodeMetadataWriter encode_timer(&sink);
   encode_timer.OnEncoderInit(VideoCodec(), false);
   // Any non-zero bitrate needed to be set before the first frame.
   VideoBitrateAllocation bitrate_allocation;
@@ -317,11 +346,93 @@
 
   image.capture_time_ms_ = kTimestampMs;  // Correct timestamp.
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
-  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(image.capture_time_ms_)
+                         .set_timestamp_rtp(image.capture_time_ms_ * 90)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
   image.capture_time_ms_ = 0;  // Incorrect timestamp.
-  encode_timer.FillTimingInfo(0, &image, kTimestampMs);
+  encode_timer.FillTimingInfo(0, &image);
   EXPECT_EQ(kTimestampMs, image.capture_time_ms_);
 }
 
+TEST(FrameEncodeTimerTest, CopiesRotation) {
+  EncodedImage image;
+  const int64_t kTimestampMs = 123456;
+  FakeEncodedImageCallback sink;
+
+  FrameEncodeMetadataWriter encode_timer(&sink);
+  encode_timer.OnEncoderInit(VideoCodec(), false);
+  // Any non-zero bitrate needed to be set before the first frame.
+  VideoBitrateAllocation bitrate_allocation;
+  bitrate_allocation.SetBitrate(0, 0, 500000);
+  encode_timer.OnSetRates(bitrate_allocation, 30);
+
+  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(kTimestampMs)
+                         .set_timestamp_rtp(kTimestampMs * 90)
+                         .set_rotation(kVideoRotation_180)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
+  EXPECT_EQ(kVideoRotation_180, image.rotation_);
+}
+
+TEST(FrameEncodeTimerTest, SetsContentType) {
+  EncodedImage image;
+  const int64_t kTimestampMs = 123456;
+  FakeEncodedImageCallback sink;
+
+  FrameEncodeMetadataWriter encode_timer(&sink);
+  VideoCodec codec;
+  codec.mode = VideoCodecMode::kScreensharing;
+  encode_timer.OnEncoderInit(codec, false);
+  // Any non-zero bitrate needed to be set before the first frame.
+  VideoBitrateAllocation bitrate_allocation;
+  bitrate_allocation.SetBitrate(0, 0, 500000);
+  encode_timer.OnSetRates(bitrate_allocation, 30);
+
+  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(kTimestampMs)
+                         .set_timestamp_rtp(kTimestampMs * 90)
+                         .set_rotation(kVideoRotation_180)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
+  EXPECT_EQ(VideoContentType::SCREENSHARE, image.content_type_);
+}
+
+TEST(FrameEncodeTimerTest, CopiesColorSpace) {
+  EncodedImage image;
+  const int64_t kTimestampMs = 123456;
+  FakeEncodedImageCallback sink;
+
+  FrameEncodeMetadataWriter encode_timer(&sink);
+  encode_timer.OnEncoderInit(VideoCodec(), false);
+  // Any non-zero bitrate needed to be set before the first frame.
+  VideoBitrateAllocation bitrate_allocation;
+  bitrate_allocation.SetBitrate(0, 0, 500000);
+  encode_timer.OnSetRates(bitrate_allocation, 30);
+
+  webrtc::ColorSpace color_space =
+      CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
+  VideoFrame frame = VideoFrame::Builder()
+                         .set_timestamp_ms(kTimestampMs)
+                         .set_timestamp_rtp(kTimestampMs * 90)
+                         .set_color_space(color_space)
+                         .set_video_frame_buffer(kFrameBuffer)
+                         .build();
+  encode_timer.OnEncodeStarted(frame);
+  encode_timer.FillTimingInfo(0, &image);
+  ASSERT_NE(image.ColorSpace(), nullptr);
+  EXPECT_EQ(color_space, *image.ColorSpace());
+}
+
 }  // namespace test
 }  // namespace webrtc
diff --git a/video/video_receive_stream_unittest.cc b/video/video_receive_stream_unittest.cc
index 2d29ef7..fe4c292 100644
--- a/video/video_receive_stream_unittest.cc
+++ b/video/video_receive_stream_unittest.cc
@@ -14,9 +14,12 @@
 #include "test/gmock.h"
 #include "test/gtest.h"
 
+#include "absl/memory/memory.h"
 #include "api/task_queue/default_task_queue_factory.h"
+#include "api/test/video/function_video_decoder_factory.h"
 #include "api/video_codecs/video_decoder.h"
 #include "call/rtp_stream_receiver_controller.h"
+#include "common_video/test/utilities.h"
 #include "media/base/fake_video_renderer.h"
 #include "modules/pacing/packet_router.h"
 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
@@ -24,6 +27,7 @@
 #include "rtc_base/critical_section.h"
 #include "rtc_base/event.h"
 #include "system_wrappers/include/clock.h"
+#include "test/fake_decoder.h"
 #include "test/field_trial.h"
 #include "test/video_decoder_proxy_factory.h"
 #include "video/call_stats.h"
@@ -62,6 +66,12 @@
 
 class FrameObjectFake : public video_coding::EncodedFrame {
  public:
+  void SetPayloadType(uint8_t payload_type) { _payloadType = payload_type; }
+
+  void SetRotation(const VideoRotation& rotation) { rotation_ = rotation; }
+
+  void SetNtpTime(int64_t ntp_time_ms) { ntp_time_ms_ = ntp_time_ms; }
+
   int64_t ReceivedTime() const override { return 0; }
 
   int64_t RenderTime() const override { return _renderTimeMs; }
@@ -100,10 +110,11 @@
     Clock* clock = Clock::GetRealTimeClock();
     timing_ = new VCMTiming(clock);
 
-    video_receive_stream_.reset(new webrtc::internal::VideoReceiveStream(
-        task_queue_factory_.get(), &rtp_stream_receiver_controller_,
-        kDefaultNumCpuCores, &packet_router_, config_.Copy(),
-        process_thread_.get(), &call_stats_, clock, timing_));
+    video_receive_stream_ =
+        absl::make_unique<webrtc::internal::VideoReceiveStream>(
+            task_queue_factory_.get(), &rtp_stream_receiver_controller_,
+            kDefaultNumCpuCores, &packet_router_, config_.Copy(),
+            process_thread_.get(), &call_stats_, clock, timing_);
   }
 
  protected:
@@ -212,4 +223,90 @@
   EXPECT_EQ(default_min_playout_latency, timing_->min_playout_delay());
 }
 
+class VideoReceiveStreamTestWithFakeDecoder : public ::testing::Test {
+ public:
+  VideoReceiveStreamTestWithFakeDecoder()
+      : fake_decoder_factory_(
+            []() { return absl::make_unique<test::FakeDecoder>(); }),
+        process_thread_(ProcessThread::Create("TestThread")),
+        task_queue_factory_(CreateDefaultTaskQueueFactory()),
+        config_(&mock_transport_),
+        call_stats_(Clock::GetRealTimeClock(), process_thread_.get()) {}
+
+  void SetUp() {
+    constexpr int kDefaultNumCpuCores = 2;
+    config_.rtp.remote_ssrc = 1111;
+    config_.rtp.local_ssrc = 2222;
+    config_.renderer = &fake_renderer_;
+    VideoReceiveStream::Decoder fake_decoder;
+    fake_decoder.payload_type = 99;
+    fake_decoder.video_format = SdpVideoFormat("VP8");
+    fake_decoder.decoder_factory = &fake_decoder_factory_;
+    config_.decoders.push_back(fake_decoder);
+    Clock* clock = Clock::GetRealTimeClock();
+    timing_ = new VCMTiming(clock);
+
+    video_receive_stream_.reset(new webrtc::internal::VideoReceiveStream(
+        task_queue_factory_.get(), &rtp_stream_receiver_controller_,
+        kDefaultNumCpuCores, &packet_router_, config_.Copy(),
+        process_thread_.get(), &call_stats_, clock, timing_));
+  }
+
+ protected:
+  test::FunctionVideoDecoderFactory fake_decoder_factory_;
+  std::unique_ptr<ProcessThread> process_thread_;
+  const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
+  VideoReceiveStream::Config config_;
+  CallStats call_stats_;
+  cricket::FakeVideoRenderer fake_renderer_;
+  MockTransport mock_transport_;
+  PacketRouter packet_router_;
+  RtpStreamReceiverController rtp_stream_receiver_controller_;
+  std::unique_ptr<webrtc::internal::VideoReceiveStream> video_receive_stream_;
+  VCMTiming* timing_;
+};
+
+TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesNtpTime) {
+  const int64_t kNtpTimestamp = 12345;
+  auto test_frame = absl::make_unique<FrameObjectFake>();
+  test_frame->SetPayloadType(99);
+  test_frame->id.picture_id = 0;
+  test_frame->SetNtpTime(kNtpTimestamp);
+
+  video_receive_stream_->Start();
+  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
+  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
+  EXPECT_EQ(kNtpTimestamp, fake_renderer_.ntp_time_ms());
+}
+
+TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesRotation) {
+  const webrtc::VideoRotation kRotation = webrtc::kVideoRotation_180;
+  auto test_frame = absl::make_unique<FrameObjectFake>();
+  test_frame->SetPayloadType(99);
+  test_frame->id.picture_id = 0;
+  test_frame->SetRotation(kRotation);
+
+  video_receive_stream_->Start();
+  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
+  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
+
+  EXPECT_EQ(kRotation, fake_renderer_.rotation());
+}
+
+TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesColorSpace) {
+  auto test_frame = absl::make_unique<FrameObjectFake>();
+  test_frame->SetPayloadType(99);
+  test_frame->id.picture_id = 0;
+  webrtc::ColorSpace color_space =
+      CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  test_frame->SetColorSpace(color_space);
+
+  video_receive_stream_->Start();
+  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
+  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
+
+  ASSERT_TRUE(fake_renderer_.color_space().has_value());
+  EXPECT_EQ(color_space, *fake_renderer_.color_space());
+}
+
 }  // namespace webrtc
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index d935ea0..289a7b9 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -490,7 +490,7 @@
       input_framerate_(kFrameRateAvergingWindowSizeMs, 1000),
       pending_frame_drops_(0),
       next_frame_types_(1, VideoFrameType::kVideoFrameDelta),
-      frame_encoder_timer_(this),
+      frame_encode_metadata_writer_(this),
       experiment_groups_(GetExperimentGroups()),
       next_frame_id_(0),
       encoder_queue_(task_queue_factory->CreateTaskQueue(
@@ -745,10 +745,11 @@
     } else {
       encoder_initialized_ = true;
       encoder_->RegisterEncodeCompleteCallback(this);
-      frame_encoder_timer_.OnEncoderInit(send_codec_, HasInternalSource());
+      frame_encode_metadata_writer_.OnEncoderInit(send_codec_,
+                                                  HasInternalSource());
     }
 
-    frame_encoder_timer_.Reset();
+    frame_encode_metadata_writer_.Reset();
     last_encode_info_ms_ = absl::nullopt;
   }
 
@@ -1074,7 +1075,7 @@
 
   if (settings_changes) {
     encoder_->SetRates(rate_settings);
-    frame_encoder_timer_.OnSetRates(
+    frame_encode_metadata_writer_.OnSetRates(
         rate_settings.bitrate,
         static_cast<uint32_t>(rate_settings.framerate_fps + 0.5));
   }
@@ -1334,8 +1335,7 @@
   TRACE_EVENT1("webrtc", "VCMGenericEncoder::Encode", "timestamp",
                out_frame.timestamp());
 
-  frame_encoder_timer_.OnEncodeStarted(out_frame.timestamp(),
-                                       out_frame.render_time_ms());
+  frame_encode_metadata_writer_.OnEncodeStarted(out_frame);
 
   const int32_t encode_status = encoder_->Encode(out_frame, &next_frame_types_);
 
@@ -1405,9 +1405,7 @@
   const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
   EncodedImage image_copy(encoded_image);
 
-  frame_encoder_timer_.FillTimingInfo(
-      spatial_idx, &image_copy,
-      rtc::TimeMicros() / rtc::kNumMicrosecsPerMillisec);
+  frame_encode_metadata_writer_.FillTimingInfo(spatial_idx, &image_copy);
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   const uint8_t experiment_id =
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index 6d8ad45..8bc3dc4 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -37,7 +37,7 @@
 #include "rtc_base/synchronization/sequence_checker.h"
 #include "rtc_base/task_queue.h"
 #include "video/encoder_bitrate_adjuster.h"
-#include "video/frame_encode_timer.h"
+#include "video/frame_encode_metadata_writer.h"
 #include "video/overuse_frame_detector.h"
 
 namespace webrtc {
@@ -345,7 +345,7 @@
   // turn this into a simple bool |pending_keyframe_request_|.
   std::vector<VideoFrameType> next_frame_types_ RTC_GUARDED_BY(&encoder_queue_);
 
-  FrameEncodeTimer frame_encoder_timer_;
+  FrameEncodeMetadataWriter frame_encode_metadata_writer_;
 
   // Experiment groups parsed from field trials for realtime video ([0]) and
   // screenshare ([1]). 0 means no group specified. Positive values are