Expose audio mimeType for insertable streams

Split from
  https://webrtc-review.googlesource.com/c/src/+/318283
to reduce CL size. Takes a different and (hopefully) simpler
approach.

BUG=webrtc:15579

Change-Id: I8517ffbeb0f0a76db80e3e367de727fb6976211d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/325023
Commit-Queue: Philipp Hancke <phancke@microsoft.com>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Tony Herre <herre@google.com>
Cr-Commit-Position: refs/heads/main@{#41073}
diff --git a/audio/audio_send_stream.cc b/audio/audio_send_stream.cc
index 0caf59a..c9dc42c 100644
--- a/audio/audio_send_stream.cc
+++ b/audio/audio_send_stream.cc
@@ -636,12 +636,14 @@
   }
 
   // Wrap the encoder in a RED encoder, if RED is enabled.
+  SdpAudioFormat format = spec.format;
   if (spec.red_payload_type) {
     AudioEncoderCopyRed::Config red_config;
     red_config.payload_type = *spec.red_payload_type;
     red_config.speech_encoder = std::move(encoder);
     encoder = std::make_unique<AudioEncoderCopyRed>(std::move(red_config),
                                                     field_trials_);
+    format.name = cricket::kRedCodecName;
   }
 
   // Set currently known overhead (used in ANA, opus only).
@@ -655,7 +657,7 @@
   }
 
   StoreEncoderProperties(encoder->SampleRateHz(), encoder->NumChannels());
-  channel_send_->SetEncoder(new_config.send_codec_spec->payload_type,
+  channel_send_->SetEncoder(new_config.send_codec_spec->payload_type, format,
                             std::move(encoder));
 
   return true;
diff --git a/audio/audio_send_stream_unittest.cc b/audio/audio_send_stream_unittest.cc
index d842afd..c854f73 100644
--- a/audio/audio_send_stream_unittest.cc
+++ b/audio/audio_send_stream_unittest.cc
@@ -242,11 +242,11 @@
   void SetupMockForSetupSendCodec(bool expect_set_encoder_call) {
     if (expect_set_encoder_call) {
       EXPECT_CALL(*channel_send_, SetEncoder)
-          .WillOnce(
-              [this](int payload_type, std::unique_ptr<AudioEncoder> encoder) {
-                this->audio_encoder_ = std::move(encoder);
-                return true;
-              });
+          .WillOnce([this](int payload_type, const SdpAudioFormat& format,
+                           std::unique_ptr<AudioEncoder> encoder) {
+            this->audio_encoder_ = std::move(encoder);
+            return true;
+          });
     }
   }
 
@@ -595,6 +595,7 @@
     std::unique_ptr<AudioEncoder> stolen_encoder;
     EXPECT_CALL(*helper.channel_send(), SetEncoder)
         .WillOnce([&stolen_encoder](int payload_type,
+                                    const SdpAudioFormat& format,
                                     std::unique_ptr<AudioEncoder> encoder) {
           stolen_encoder = std::move(encoder);
           return true;
diff --git a/audio/channel_receive.cc b/audio/channel_receive.cc
index f5d214f..efd9668 100644
--- a/audio/channel_receive.cc
+++ b/audio/channel_receive.cc
@@ -47,6 +47,7 @@
 #include "rtc_base/numerics/safe_minmax.h"
 #include "rtc_base/numerics/sequence_number_unwrapper.h"
 #include "rtc_base/race_checker.h"
+#include "rtc_base/strings/string_builder.h"
 #include "rtc_base/synchronization/mutex.h"
 #include "rtc_base/system/no_unique_address.h"
 #include "rtc_base/time_utils.h"
@@ -312,6 +313,8 @@
   mutable Mutex rtcp_counter_mutex_;
   RtcpPacketTypeCounter rtcp_packet_type_counter_
       RTC_GUARDED_BY(rtcp_counter_mutex_);
+
+  std::map<int, SdpAudioFormat> payload_type_map_;
 };
 
 void ChannelReceive::OnReceivedPayloadData(
@@ -636,6 +639,7 @@
     RTC_DCHECK_GE(kv.second.clockrate_hz, 1000);
     payload_type_frequencies_[kv.first] = kv.second.clockrate_hz;
   }
+  payload_type_map_ = codecs;
   acm_receiver_.SetCodecs(codecs);
 }
 
@@ -722,7 +726,14 @@
   if (frame_transformer_delegate_) {
     // Asynchronously transform the received payload. After the payload is
     // transformed, the delegate will call OnReceivedPayloadData to handle it.
-    frame_transformer_delegate_->Transform(payload_data, header, remote_ssrc_);
+    char buf[1024];
+    rtc::SimpleStringBuilder mime_type(buf);
+    auto it = payload_type_map_.find(header.payloadType);
+    mime_type << MediaTypeToString(cricket::MEDIA_TYPE_AUDIO) << "/"
+              << (it != payload_type_map_.end() ? it->second.name
+                                                : "x-unknown");
+    frame_transformer_delegate_->Transform(payload_data, header, remote_ssrc_,
+                                           mime_type.str());
   } else {
     OnReceivedPayloadData(payload_data, header);
   }
diff --git a/audio/channel_receive_frame_transformer_delegate.cc b/audio/channel_receive_frame_transformer_delegate.cc
index 2d2893b..e87566b 100644
--- a/audio/channel_receive_frame_transformer_delegate.cc
+++ b/audio/channel_receive_frame_transformer_delegate.cc
@@ -10,6 +10,7 @@
 
 #include "audio/channel_receive_frame_transformer_delegate.h"
 
+#include <string>
 #include <utility>
 
 #include "rtc_base/buffer.h"
@@ -22,10 +23,12 @@
  public:
   TransformableIncomingAudioFrame(rtc::ArrayView<const uint8_t> payload,
                                   const RTPHeader& header,
-                                  uint32_t ssrc)
+                                  uint32_t ssrc,
+                                  const std::string& codec_mime_type)
       : payload_(payload.data(), payload.size()),
         header_(header),
-        ssrc_(ssrc) {}
+        ssrc_(ssrc),
+        codec_mime_type_(codec_mime_type) {}
   ~TransformableIncomingAudioFrame() override = default;
   rtc::ArrayView<const uint8_t> GetData() const override { return payload_; }
 
@@ -45,6 +48,7 @@
   }
   Direction GetDirection() const override { return Direction::kReceiver; }
 
+  std::string GetMimeType() const override { return codec_mime_type_; }
   const absl::optional<uint16_t> SequenceNumber() const override {
     return header_.sequenceNumber;
   }
@@ -65,6 +69,7 @@
   rtc::Buffer payload_;
   RTPHeader header_;
   uint32_t ssrc_;
+  std::string codec_mime_type_;
 };
 }  // namespace
 
@@ -92,10 +97,12 @@
 void ChannelReceiveFrameTransformerDelegate::Transform(
     rtc::ArrayView<const uint8_t> packet,
     const RTPHeader& header,
-    uint32_t ssrc) {
+    uint32_t ssrc,
+    const std::string& codec_mime_type) {
   RTC_DCHECK_RUN_ON(&sequence_checker_);
   frame_transformer_->Transform(
-      std::make_unique<TransformableIncomingAudioFrame>(packet, header, ssrc));
+      std::make_unique<TransformableIncomingAudioFrame>(packet, header, ssrc,
+                                                        codec_mime_type));
 }
 
 void ChannelReceiveFrameTransformerDelegate::OnTransformedFrame(
diff --git a/audio/channel_receive_frame_transformer_delegate.h b/audio/channel_receive_frame_transformer_delegate.h
index 04ad7c4..97bcacd 100644
--- a/audio/channel_receive_frame_transformer_delegate.h
+++ b/audio/channel_receive_frame_transformer_delegate.h
@@ -12,6 +12,7 @@
 #define AUDIO_CHANNEL_RECEIVE_FRAME_TRANSFORMER_DELEGATE_H_
 
 #include <memory>
+#include <string>
 
 #include "api/frame_transformer_interface.h"
 #include "api/sequence_checker.h"
@@ -48,7 +49,8 @@
   // the frame asynchronously.
   void Transform(rtc::ArrayView<const uint8_t> packet,
                  const RTPHeader& header,
-                 uint32_t ssrc);
+                 uint32_t ssrc,
+                 const std::string& codec_mime_type);
 
   // Implements TransformedFrameCallback. Can be called on any thread.
   void OnTransformedFrame(
diff --git a/audio/channel_receive_frame_transformer_delegate_unittest.cc b/audio/channel_receive_frame_transformer_delegate_unittest.cc
index 38ceb6d..9e655cc 100644
--- a/audio/channel_receive_frame_transformer_delegate_unittest.cc
+++ b/audio/channel_receive_frame_transformer_delegate_unittest.cc
@@ -93,7 +93,7 @@
           [&callback](std::unique_ptr<TransformableFrameInterface> frame) {
             callback->OnTransformedFrame(std::move(frame));
           });
-  delegate->Transform(packet, header, 1111 /*ssrc*/);
+  delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
   rtc::ThreadManager::ProcessAllMessageQueuesForTesting();
 }
 
@@ -126,7 +126,7 @@
             static_cast<TransformableAudioFrameInterface*>(frame.get());
         callback->OnTransformedFrame(CloneSenderAudioFrame(transformed_frame));
       });
-  delegate->Transform(packet, header, 1111 /*ssrc*/);
+  delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
   rtc::ThreadManager::ProcessAllMessageQueuesForTesting();
 }
 
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index 08dd745..b71f564 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -37,6 +37,7 @@
 #include "rtc_base/numerics/safe_conversions.h"
 #include "rtc_base/race_checker.h"
 #include "rtc_base/rate_limiter.h"
+#include "rtc_base/strings/string_builder.h"
 #include "rtc_base/synchronization/mutex.h"
 #include "rtc_base/task_queue.h"
 #include "rtc_base/time_utils.h"
@@ -79,6 +80,7 @@
 
   // Send using this encoder, with this payload type.
   void SetEncoder(int payload_type,
+                  const SdpAudioFormat& encoder_format,
                   std::unique_ptr<AudioEncoder> encoder) override;
   void ModifyEncoder(rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)>
                          modifier) override;
@@ -237,6 +239,8 @@
   // Defined last to ensure that there are no running tasks when the other
   // members are destroyed.
   rtc::TaskQueue encoder_queue_;
+
+  SdpAudioFormat encoder_format_;
 };
 
 const int kTelephoneEventAttenuationdB = 10;
@@ -279,10 +283,14 @@
   if (frame_transformer_delegate_) {
     // Asynchronously transform the payload before sending it. After the payload
     // is transformed, the delegate will call SendRtpAudio to send it.
+    char buf[1024];
+    rtc::SimpleStringBuilder mime_type(buf);
+    mime_type << MediaTypeToString(cricket::MEDIA_TYPE_AUDIO) << "/"
+              << encoder_format_.name;
     frame_transformer_delegate_->Transform(
         frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(),
         payloadData, payloadSize, absolute_capture_timestamp_ms,
-        rtp_rtcp_->SSRC());
+        rtp_rtcp_->SSRC(), mime_type.str());
     return 0;
   }
   return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload,
@@ -394,7 +402,8 @@
       crypto_options_(crypto_options),
       encoder_queue_(task_queue_factory->CreateTaskQueue(
           "AudioEncoder",
-          TaskQueueFactory::Priority::NORMAL)) {
+          TaskQueueFactory::Priority::NORMAL)),
+      encoder_format_("x-unknown", 0, 0) {
   audio_coding_ = AudioCodingModule::Create();
 
   RtpRtcpInterface::Configuration configuration;
@@ -495,6 +504,7 @@
 }
 
 void ChannelSend::SetEncoder(int payload_type,
+                             const SdpAudioFormat& encoder_format,
                              std::unique_ptr<AudioEncoder> encoder) {
   RTC_DCHECK_RUN_ON(&worker_thread_checker_);
   RTC_DCHECK_GE(payload_type, 0);
@@ -508,6 +518,7 @@
                                           encoder->RtpTimestampRateHz(),
                                           encoder->NumChannels(), 0);
 
+  encoder_format_ = encoder_format;
   audio_coding_->SetEncoder(std::move(encoder));
 }
 
diff --git a/audio/channel_send.h b/audio/channel_send.h
index 00d954c..b6a6a37 100644
--- a/audio/channel_send.h
+++ b/audio/channel_send.h
@@ -62,6 +62,7 @@
   virtual CallSendStatistics GetRTCPStatistics() const = 0;
 
   virtual void SetEncoder(int payload_type,
+                          const SdpAudioFormat& encoder_format,
                           std::unique_ptr<AudioEncoder> encoder) = 0;
   virtual void ModifyEncoder(
       rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0;
diff --git a/audio/channel_send_frame_transformer_delegate.cc b/audio/channel_send_frame_transformer_delegate.cc
index 0f85216..4e8d9fe 100644
--- a/audio/channel_send_frame_transformer_delegate.cc
+++ b/audio/channel_send_frame_transformer_delegate.cc
@@ -55,13 +55,15 @@
       const uint8_t* payload_data,
       size_t payload_size,
       absl::optional<uint64_t> absolute_capture_timestamp_ms,
-      uint32_t ssrc)
+      uint32_t ssrc,
+      const std::string& codec_mime_type)
       : frame_type_(frame_type),
         payload_type_(payload_type),
         rtp_timestamp_with_offset_(rtp_timestamp_with_offset),
         payload_(payload_data, payload_size),
         absolute_capture_timestamp_ms_(absolute_capture_timestamp_ms),
-        ssrc_(ssrc) {}
+        ssrc_(ssrc),
+        codec_mime_type_(codec_mime_type) {}
   ~TransformableOutgoingAudioFrame() override = default;
   rtc::ArrayView<const uint8_t> GetData() const override { return payload_; }
   void SetData(rtc::ArrayView<const uint8_t> data) override {
@@ -76,6 +78,7 @@
 
   uint8_t GetPayloadType() const override { return payload_type_; }
   Direction GetDirection() const override { return Direction::kSender; }
+  std::string GetMimeType() const override { return codec_mime_type_; }
 
   rtc::ArrayView<const uint32_t> GetContributingSources() const override {
     return {};
@@ -100,6 +103,7 @@
   rtc::Buffer payload_;
   absl::optional<uint64_t> absolute_capture_timestamp_ms_;
   uint32_t ssrc_;
+  std::string codec_mime_type_;
 };
 }  // namespace
 
@@ -131,11 +135,12 @@
     const uint8_t* payload_data,
     size_t payload_size,
     int64_t absolute_capture_timestamp_ms,
-    uint32_t ssrc) {
+    uint32_t ssrc,
+    const std::string& codec_mimetype) {
   frame_transformer_->Transform(
       std::make_unique<TransformableOutgoingAudioFrame>(
           frame_type, payload_type, rtp_timestamp, payload_data, payload_size,
-          absolute_capture_timestamp_ms, ssrc));
+          absolute_capture_timestamp_ms, ssrc, codec_mimetype));
 }
 
 void ChannelSendFrameTransformerDelegate::OnTransformedFrame(
@@ -173,7 +178,8 @@
       InterfaceFrameTypeToInternalFrameType(original->Type()),
       original->GetPayloadType(), original->GetTimestamp(),
       original->GetData().data(), original->GetData().size(),
-      original->AbsoluteCaptureTimestamp(), original->GetSsrc());
+      original->AbsoluteCaptureTimestamp(), original->GetSsrc(),
+      original->GetMimeType());
 }
 
 }  // namespace webrtc
diff --git a/audio/channel_send_frame_transformer_delegate.h b/audio/channel_send_frame_transformer_delegate.h
index eb0027e..2306dfc 100644
--- a/audio/channel_send_frame_transformer_delegate.h
+++ b/audio/channel_send_frame_transformer_delegate.h
@@ -12,6 +12,7 @@
 #define AUDIO_CHANNEL_SEND_FRAME_TRANSFORMER_DELEGATE_H_
 
 #include <memory>
+#include <string>
 
 #include "api/frame_transformer_interface.h"
 #include "api/sequence_checker.h"
@@ -57,7 +58,8 @@
                  const uint8_t* payload_data,
                  size_t payload_size,
                  int64_t absolute_capture_timestamp_ms,
-                 uint32_t ssrc);
+                 uint32_t ssrc,
+                 const std::string& codec_mime_type);
 
   // Implements TransformedFrameCallback. Can be called on any thread.
   void OnTransformedFrame(
diff --git a/audio/channel_send_frame_transformer_delegate_unittest.cc b/audio/channel_send_frame_transformer_delegate_unittest.cc
index f75d4a8..0c8e387 100644
--- a/audio/channel_send_frame_transformer_delegate_unittest.cc
+++ b/audio/channel_send_frame_transformer_delegate_unittest.cc
@@ -115,7 +115,7 @@
             callback->OnTransformedFrame(std::move(frame));
           });
   delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
-                      0);
+                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
   channel_queue.WaitForPreviouslyPostedTasks();
 }
 
@@ -145,7 +145,7 @@
             callback->OnTransformedFrame(CreateMockReceiverFrame());
           });
   delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
-                      0);
+                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
   channel_queue.WaitForPreviouslyPostedTasks();
 }
 
diff --git a/audio/channel_send_unittest.cc b/audio/channel_send_unittest.cc
index b9406e1..58d7c93 100644
--- a/audio/channel_send_unittest.cc
+++ b/audio/channel_send_unittest.cc
@@ -66,9 +66,10 @@
         &transport_, nullptr, &event_log_, nullptr, crypto_options_, false,
         kRtcpIntervalMs, kSsrc, nullptr, &transport_controller_, field_trials_);
     encoder_factory_ = CreateBuiltinAudioEncoderFactory();
-    std::unique_ptr<AudioEncoder> encoder = encoder_factory_->MakeAudioEncoder(
-        kPayloadType, SdpAudioFormat("opus", kRtpRateHz, 2), {});
-    channel_->SetEncoder(kPayloadType, std::move(encoder));
+    SdpAudioFormat opus = SdpAudioFormat("opus", kRtpRateHz, 2);
+    std::unique_ptr<AudioEncoder> encoder =
+        encoder_factory_->MakeAudioEncoder(kPayloadType, opus, {});
+    channel_->SetEncoder(kPayloadType, opus, std::move(encoder));
     transport_controller_.EnsureStarted();
     channel_->RegisterSenderCongestionControlObjects(&transport_controller_);
     ON_CALL(transport_, SendRtcp).WillByDefault(Return(true));
diff --git a/audio/mock_voe_channel_proxy.h b/audio/mock_voe_channel_proxy.h
index 2900517..71ef5d1 100644
--- a/audio/mock_voe_channel_proxy.h
+++ b/audio/mock_voe_channel_proxy.h
@@ -113,7 +113,9 @@
  public:
   MOCK_METHOD(void,
               SetEncoder,
-              (int payload_type, std::unique_ptr<AudioEncoder> encoder),
+              (int payload_type,
+               const SdpAudioFormat& encoder_format,
+               std::unique_ptr<AudioEncoder> encoder),
               (override));
   MOCK_METHOD(
       void,