Add passkey to TransformableFrameInterface to prevent external impls

This makes the downcasts currently used in eg
modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
safer.

Bug: webrtc:339815768
Change-Id: Ie6c7ab84666d399dbca4c95846b850aac5327f1a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/350361
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Tony Herre <herre@google.com>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42325}
diff --git a/api/BUILD.gn b/api/BUILD.gn
index 464e1be..64028ad 100644
--- a/api/BUILD.gn
+++ b/api/BUILD.gn
@@ -393,7 +393,10 @@
 
 rtc_source_set("frame_transformer_interface") {
   visibility = [ "*" ]
-  sources = [ "frame_transformer_interface.h" ]
+  sources = [
+    "frame_transformer_interface.cc",
+    "frame_transformer_interface.h",
+  ]
   deps = [
     ":make_ref_counted",
     ":ref_count",
diff --git a/api/frame_transformer_interface.cc b/api/frame_transformer_interface.cc
new file mode 100644
index 0000000..88d4d19
--- /dev/null
+++ b/api/frame_transformer_interface.cc
@@ -0,0 +1,26 @@
+/*
+ *  Copyright 2024 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "api/frame_transformer_interface.h"
+
+namespace webrtc {
+
+TransformableFrameInterface::TransformableFrameInterface(
+    TransformableFrameInterface::Passkey) {}
+
+TransformableVideoFrameInterface::TransformableVideoFrameInterface(
+    TransformableFrameInterface::Passkey passkey)
+    : TransformableFrameInterface(passkey) {}
+
+TransformableAudioFrameInterface::TransformableAudioFrameInterface(
+    TransformableFrameInterface::Passkey passkey)
+    : TransformableFrameInterface(passkey) {}
+
+}  // namespace webrtc
diff --git a/api/frame_transformer_interface.h b/api/frame_transformer_interface.h
index 89356df..03f7b48 100644
--- a/api/frame_transformer_interface.h
+++ b/api/frame_transformer_interface.h
@@ -24,6 +24,18 @@
 // Owns the frame payload data.
 class TransformableFrameInterface {
  public:
+  // Only a known list of internal implementations of transformable frames are
+  // permitted to allow internal downcasting. This is enforced via the
+  // internally-constructable Passkey.
+  // TODO: bugs.webrtc.org/339815768 - Remove this passkey once the
+  // downcasts are removed.
+  class Passkey;
+  RTC_EXPORT explicit TransformableFrameInterface(Passkey);
+
+  TransformableFrameInterface(TransformableFrameInterface&&) = default;
+  TransformableFrameInterface& operator=(TransformableFrameInterface&&) =
+      default;
+
   virtual ~TransformableFrameInterface() = default;
 
   // Returns the frame payload data. The data is valid until the next non-const
@@ -58,6 +70,7 @@
 
 class TransformableVideoFrameInterface : public TransformableFrameInterface {
  public:
+  RTC_EXPORT explicit TransformableVideoFrameInterface(Passkey passkey);
   virtual ~TransformableVideoFrameInterface() = default;
   virtual bool IsKeyFrame() const = 0;
 
@@ -69,6 +82,7 @@
 // Extends the TransformableFrameInterface to expose audio-specific information.
 class TransformableAudioFrameInterface : public TransformableFrameInterface {
  public:
+  RTC_EXPORT explicit TransformableAudioFrameInterface(Passkey passkey);
   virtual ~TransformableAudioFrameInterface() = default;
 
   virtual rtc::ArrayView<const uint32_t> GetContributingSources() const = 0;
@@ -137,6 +151,27 @@
   // virtual AddOutgoingMediaType(RtpCodec codec) = 0;
 };
 
+//------------------------------------------------------------------------------
+// Implementation details follow
+//------------------------------------------------------------------------------
+class TransformableFrameInterface::Passkey {
+ public:
+  ~Passkey() = default;
+
+ private:
+  // Explicit list of allowed internal implmentations of
+  // TransformableFrameInterface.
+  friend class TransformableOutgoingAudioFrame;
+  friend class TransformableIncomingAudioFrame;
+  friend class TransformableVideoSenderFrame;
+  friend class TransformableVideoReceiverFrame;
+
+  friend class MockTransformableFrame;
+  friend class MockTransformableAudioFrame;
+  friend class MockTransformableVideoFrame;
+  Passkey() = default;
+};
+
 }  // namespace webrtc
 
 #endif  // API_FRAME_TRANSFORMER_INTERFACE_H_
diff --git a/api/test/mock_transformable_audio_frame.h b/api/test/mock_transformable_audio_frame.h
index f243e38..d1f4044 100644
--- a/api/test/mock_transformable_audio_frame.h
+++ b/api/test/mock_transformable_audio_frame.h
@@ -20,6 +20,8 @@
 
 class MockTransformableAudioFrame : public TransformableAudioFrameInterface {
  public:
+  MockTransformableAudioFrame() : TransformableAudioFrameInterface(Passkey()) {}
+
   MOCK_METHOD(rtc::ArrayView<const uint8_t>, GetData, (), (const, override));
   MOCK_METHOD(void, SetData, (rtc::ArrayView<const uint8_t>), (override));
   MOCK_METHOD(void, SetRTPTimestamp, (uint32_t), (override));
diff --git a/api/test/mock_transformable_frame.h b/api/test/mock_transformable_frame.h
index df20b62..d6e0856 100644
--- a/api/test/mock_transformable_frame.h
+++ b/api/test/mock_transformable_frame.h
@@ -23,8 +23,10 @@
 
 namespace webrtc {
 
-class MockTransformableFrame : public webrtc::TransformableFrameInterface {
+class MockTransformableFrame : public TransformableFrameInterface {
  public:
+  MockTransformableFrame() : TransformableFrameInterface(Passkey()) {}
+
   MOCK_METHOD(rtc::ArrayView<const uint8_t>, GetData, (), (const, override));
   MOCK_METHOD(void, SetData, (rtc::ArrayView<const uint8_t>), (override));
   MOCK_METHOD(uint8_t, GetPayloadType, (), (const, override));
diff --git a/api/test/mock_transformable_video_frame.h b/api/test/mock_transformable_video_frame.h
index b3825dd..2cf7cb2 100644
--- a/api/test/mock_transformable_video_frame.h
+++ b/api/test/mock_transformable_video_frame.h
@@ -22,6 +22,7 @@
 class MockTransformableVideoFrame
     : public webrtc::TransformableVideoFrameInterface {
  public:
+  MockTransformableVideoFrame() : TransformableVideoFrameInterface(Passkey()) {}
   MOCK_METHOD(rtc::ArrayView<const uint8_t>, GetData, (), (const, override));
   MOCK_METHOD(void, SetData, (rtc::ArrayView<const uint8_t> data), (override));
   MOCK_METHOD(uint32_t, GetTimestamp, (), (const, override));
diff --git a/audio/channel_receive_frame_transformer_delegate.cc b/audio/channel_receive_frame_transformer_delegate.cc
index 953e27a..814127b 100644
--- a/audio/channel_receive_frame_transformer_delegate.cc
+++ b/audio/channel_receive_frame_transformer_delegate.cc
@@ -16,7 +16,6 @@
 #include "rtc_base/buffer.h"
 
 namespace webrtc {
-namespace {
 
 class TransformableIncomingAudioFrame
     : public TransformableAudioFrameInterface {
@@ -25,7 +24,8 @@
                                   const RTPHeader& header,
                                   uint32_t ssrc,
                                   const std::string& codec_mime_type)
-      : payload_(payload.data(), payload.size()),
+      : TransformableAudioFrameInterface(Passkey()),
+        payload_(payload.data(), payload.size()),
         header_(header),
         ssrc_(ssrc),
         codec_mime_type_(codec_mime_type) {}
@@ -83,7 +83,6 @@
   uint32_t ssrc_;
   std::string codec_mime_type_;
 };
-}  // namespace
 
 ChannelReceiveFrameTransformerDelegate::ChannelReceiveFrameTransformerDelegate(
     ReceiveFrameCallback receive_frame_callback,
diff --git a/audio/channel_send_frame_transformer_delegate.cc b/audio/channel_send_frame_transformer_delegate.cc
index 8bf1963..ef6ec26 100644
--- a/audio/channel_send_frame_transformer_delegate.cc
+++ b/audio/channel_send_frame_transformer_delegate.cc
@@ -45,6 +45,7 @@
   RTC_DCHECK_NOTREACHED();
   return AudioFrameType::kEmptyFrame;
 }
+}  // namespace
 
 class TransformableOutgoingAudioFrame
     : public TransformableAudioFrameInterface {
@@ -61,7 +62,8 @@
       const std::string& codec_mime_type,
       absl::optional<uint16_t> sequence_number,
       absl::optional<uint8_t> audio_level_dbov)
-      : frame_type_(frame_type),
+      : TransformableAudioFrameInterface(Passkey()),
+        frame_type_(frame_type),
         payload_type_(payload_type),
         rtp_timestamp_with_offset_(rtp_timestamp_with_offset),
         payload_(payload_data, payload_size),
@@ -119,7 +121,6 @@
   absl::optional<uint16_t> sequence_number_;
   absl::optional<uint8_t> audio_level_dbov_;
 };
-}  // namespace
 
 ChannelSendFrameTransformerDelegate::ChannelSendFrameTransformerDelegate(
     SendFrameCallback send_frame_callback,
diff --git a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
index 2bb7194..ae80f76 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
@@ -28,6 +28,7 @@
 // estimate of the RTT of the link,so 10ms should be a reasonable estimate for
 // frames being re-transmitted to a peer, probably on the same network.
 const TimeDelta kDefaultRetransmissionsTime = TimeDelta::Millis(10);
+}  // namespace
 
 class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
  public:
@@ -39,7 +40,8 @@
                                 TimeDelta expected_retransmission_time,
                                 uint32_t ssrc,
                                 std::vector<uint32_t> csrcs)
-      : encoded_data_(encoded_image.GetEncodedData()),
+      : TransformableVideoFrameInterface(Passkey()),
+        encoded_data_(encoded_image.GetEncodedData()),
         pre_transform_payload_size_(encoded_image.size()),
         header_(video_header),
         frame_type_(encoded_image._frameType),
@@ -128,7 +130,6 @@
   uint32_t ssrc_;
   std::vector<uint32_t> csrcs_;
 };
-}  // namespace
 
 RTPSenderVideoFrameTransformerDelegate::RTPSenderVideoFrameTransformerDelegate(
     RTPVideoFrameSenderInterface* sender,
diff --git a/modules/rtp_rtcp/source/rtp_video_stream_receiver_frame_transformer_delegate.cc b/modules/rtp_rtcp/source/rtp_video_stream_receiver_frame_transformer_delegate.cc
index fbd10c4..5ef0a80 100644
--- a/modules/rtp_rtcp/source/rtp_video_stream_receiver_frame_transformer_delegate.cc
+++ b/modules/rtp_rtcp/source/rtp_video_stream_receiver_frame_transformer_delegate.cc
@@ -16,20 +16,21 @@
 
 #include "absl/memory/memory.h"
 #include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h"
+#include "modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/thread.h"
 
 namespace webrtc {
 
-namespace {
 class TransformableVideoReceiverFrame
     : public TransformableVideoFrameInterface {
  public:
   TransformableVideoReceiverFrame(std::unique_ptr<RtpFrameObject> frame,
                                   uint32_t ssrc,
                                   RtpVideoFrameReceiver* receiver)
-      : frame_(std::move(frame)),
+      : TransformableVideoFrameInterface(Passkey()),
+        frame_(std::move(frame)),
         metadata_(frame_->GetRtpVideoHeader().GetAsMetadata()),
         receiver_(receiver) {
     metadata_.SetSsrc(ssrc);
@@ -89,7 +90,6 @@
   VideoFrameMetadata metadata_;
   RtpVideoFrameReceiver* receiver_;
 };
-}  // namespace
 
 RtpVideoStreamReceiverFrameTransformerDelegate::
     RtpVideoStreamReceiverFrameTransformerDelegate(