Calculate the audio level of audio packets before encoded transforms

Calculate the RMS audio level of audio packets being sent before
invoking an encoded frame transform, and pass them with the encode frame
object.

Before this, the audio level was calculated at send time by having rms_levels_ look at all audio samples encoded since the last send. This
is fine without a transform, as this is done synchronously after
encoding, but with an async transform which might take arbitrarily long,
we could end up marking older audio packets with newer audio levels, or
not at all.

This also makes things work correctly if external encoded frames are
injected from elsewhere to be sent, and exposes the AudioLevel on the
TransformableFrame interface.

Bug: chromium:337193823, webrtc:42226202
Change-Id: If55d2c1d30dc03408ca9fb0193d791db44428316
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/349263
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Tony Herre <herre@google.com>
Cr-Commit-Position: refs/heads/main@{#42193}
diff --git a/api/frame_transformer_interface.h b/api/frame_transformer_interface.h
index d3d1541..89356df 100644
--- a/api/frame_transformer_interface.h
+++ b/api/frame_transformer_interface.h
@@ -82,6 +82,11 @@
   // TODO(crbug.com/1456628): Change this to pure virtual after it
   // is implemented everywhere.
   virtual FrameType Type() const { return FrameType::kEmptyFrame; }
+
+  // Audio level in -dBov. Values range from 0 to 127, representing 0 to -127
+  // dBov. 127 represents digital silence. Only present on remote frames if
+  // the audio level header extension was included.
+  virtual absl::optional<uint8_t> AudioLevel() const = 0;
 };
 
 // Objects implement this interface to be notified with the transformed frame.
diff --git a/api/test/mock_transformable_audio_frame.h b/api/test/mock_transformable_audio_frame.h
index 584c77f..f243e38 100644
--- a/api/test/mock_transformable_audio_frame.h
+++ b/api/test/mock_transformable_audio_frame.h
@@ -47,6 +47,7 @@
               Type,
               (),
               (const, override));
+  MOCK_METHOD(absl::optional<uint8_t>, AudioLevel, (), (const, override));
 };
 
 }  // namespace webrtc
diff --git a/audio/channel_receive_frame_transformer_delegate.cc b/audio/channel_receive_frame_transformer_delegate.cc
index dbced02..953e27a 100644
--- a/audio/channel_receive_frame_transformer_delegate.cc
+++ b/audio/channel_receive_frame_transformer_delegate.cc
@@ -70,6 +70,13 @@
                : FrameType::kAudioFrameCN;
   }
 
+  absl::optional<uint8_t> AudioLevel() const override {
+    if (header_.extension.audio_level()) {
+      return header_.extension.audio_level()->level();
+    }
+    return absl::nullopt;
+  }
+
  private:
   rtc::Buffer payload_;
   RTPHeader header_;
diff --git a/audio/channel_receive_frame_transformer_delegate_unittest.cc b/audio/channel_receive_frame_transformer_delegate_unittest.cc
index a206a09..8b819f1 100644
--- a/audio/channel_receive_frame_transformer_delegate_unittest.cc
+++ b/audio/channel_receive_frame_transformer_delegate_unittest.cc
@@ -174,5 +174,76 @@
   delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
 }
 
+TEST(ChannelReceiveFrameTransformerDelegateTest,
+     AudioLevelAbsentWithoutExtension) {
+  rtc::AutoThread main_thread;
+  rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
+      rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
+  rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate =
+      rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>(
+          /*receive_frame_callback=*/nullptr, mock_frame_transformer,
+          rtc::Thread::Current());
+  rtc::scoped_refptr<TransformedFrameCallback> callback;
+  EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
+      .WillOnce(SaveArg<0>(&callback));
+  delegate->Init();
+  ASSERT_TRUE(callback);
+
+  const uint8_t data[] = {1, 2, 3, 4};
+  rtc::ArrayView<const uint8_t> packet(data, sizeof(data));
+  RTPHeader header;
+  std::unique_ptr<TransformableFrameInterface> frame;
+  ON_CALL(*mock_frame_transformer, Transform)
+      .WillByDefault(
+          [&](std::unique_ptr<TransformableFrameInterface> transform_frame) {
+            frame = std::move(transform_frame);
+          });
+  delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
+
+  EXPECT_TRUE(frame);
+  auto* audio_frame =
+      static_cast<TransformableAudioFrameInterface*>(frame.get());
+  EXPECT_FALSE(audio_frame->AudioLevel());
+  EXPECT_EQ(audio_frame->Type(),
+            TransformableAudioFrameInterface::FrameType::kAudioFrameCN);
+}
+
+TEST(ChannelReceiveFrameTransformerDelegateTest,
+     AudioLevelPresentWithExtension) {
+  rtc::AutoThread main_thread;
+  rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
+      rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
+  rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate =
+      rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>(
+          /*receive_frame_callback=*/nullptr, mock_frame_transformer,
+          rtc::Thread::Current());
+  rtc::scoped_refptr<TransformedFrameCallback> callback;
+  EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
+      .WillOnce(SaveArg<0>(&callback));
+  delegate->Init();
+  ASSERT_TRUE(callback);
+
+  const uint8_t data[] = {1, 2, 3, 4};
+  rtc::ArrayView<const uint8_t> packet(data, sizeof(data));
+  RTPHeader header;
+  uint8_t audio_level_dbov = 67;
+  AudioLevel audio_level(/*voice_activity=*/true, audio_level_dbov);
+  header.extension.set_audio_level(audio_level);
+  std::unique_ptr<TransformableFrameInterface> frame;
+  ON_CALL(*mock_frame_transformer, Transform)
+      .WillByDefault(
+          [&](std::unique_ptr<TransformableFrameInterface> transform_frame) {
+            frame = std::move(transform_frame);
+          });
+  delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
+
+  EXPECT_TRUE(frame);
+  auto* audio_frame =
+      static_cast<TransformableAudioFrameInterface*>(frame.get());
+  EXPECT_EQ(*audio_frame->AudioLevel(), audio_level_dbov);
+  EXPECT_EQ(audio_frame->Type(),
+            TransformableAudioFrameInterface::FrameType::kAudioFrameSpeech);
+}
+
 }  // namespace
 }  // namespace webrtc
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index e8eaa31..1e211ab 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -170,7 +170,8 @@
                        uint32_t rtp_timestamp_without_offset,
                        rtc::ArrayView<const uint8_t> payload,
                        int64_t absolute_capture_timestamp_ms,
-                       rtc::ArrayView<const uint32_t> csrcs)
+                       rtc::ArrayView<const uint32_t> csrcs,
+                       absl::optional<uint8_t> audio_level_dbov)
       RTC_RUN_ON(encoder_queue_checker_);
 
   void OnReceivedRtt(int64_t rtt_ms);
@@ -280,6 +281,14 @@
                               int64_t absolute_capture_timestamp_ms) {
   RTC_DCHECK_RUN_ON(&encoder_queue_checker_);
   rtc::ArrayView<const uint8_t> payload(payloadData, payloadSize);
+
+  absl::optional<uint8_t> audio_level_dbov;
+  if (include_audio_level_indication_.load()) {
+    // Take the averaged audio levels from rms_level_ and reset it before
+    // invoking any async transformer.
+    audio_level_dbov = rms_level_.Average();
+  }
+
   if (frame_transformer_delegate_) {
     // Asynchronously transform the payload before sending it. After the payload
     // is transformed, the delegate will call SendRtpAudio to send it.
@@ -290,11 +299,12 @@
     frame_transformer_delegate_->Transform(
         frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(),
         payloadData, payloadSize, absolute_capture_timestamp_ms,
-        rtp_rtcp_->SSRC(), mime_type.str());
+        rtp_rtcp_->SSRC(), mime_type.str(), audio_level_dbov);
     return 0;
   }
   return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload,
-                      absolute_capture_timestamp_ms, /*csrcs=*/{});
+                      absolute_capture_timestamp_ms, /*csrcs=*/{},
+                      audio_level_dbov);
 }
 
 int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType,
@@ -302,7 +312,8 @@
                                   uint32_t rtp_timestamp_without_offset,
                                   rtc::ArrayView<const uint8_t> payload,
                                   int64_t absolute_capture_timestamp_ms,
-                                  rtc::ArrayView<const uint32_t> csrcs) {
+                                  rtc::ArrayView<const uint32_t> csrcs,
+                                  absl::optional<uint8_t> audio_level_dbov) {
   // E2EE Custom Audio Frame Encryption (This is optional).
   // Keep this buffer around for the lifetime of the send call.
   rtc::Buffer encrypted_audio_payload;
@@ -369,8 +380,8 @@
   if (absolute_capture_timestamp_ms > 0) {
     frame.capture_time = Timestamp::Millis(absolute_capture_timestamp_ms);
   }
-  if (include_audio_level_indication_.load()) {
-    frame.audio_level_dbov = rms_level_.Average();
+  if (include_audio_level_indication_.load() && audio_level_dbov) {
+    frame.audio_level_dbov = *audio_level_dbov;
   }
   if (!rtp_sender_audio_->SendAudio(frame)) {
     RTC_DLOG(LS_ERROR)
@@ -866,12 +877,13 @@
              uint32_t rtp_timestamp_with_offset,
              rtc::ArrayView<const uint8_t> payload,
              int64_t absolute_capture_timestamp_ms,
-             rtc::ArrayView<const uint32_t> csrcs) {
+             rtc::ArrayView<const uint32_t> csrcs,
+             absl::optional<uint8_t> audio_level_dbov) {
         RTC_DCHECK_RUN_ON(&encoder_queue_checker_);
         return SendRtpAudio(
             frameType, payloadType,
             rtp_timestamp_with_offset - rtp_rtcp_->StartTimestamp(), payload,
-            absolute_capture_timestamp_ms, csrcs);
+            absolute_capture_timestamp_ms, csrcs, audio_level_dbov);
       };
   frame_transformer_delegate_ =
       rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>(
diff --git a/audio/channel_send_frame_transformer_delegate.cc b/audio/channel_send_frame_transformer_delegate.cc
index 6d3c011..8bf1963 100644
--- a/audio/channel_send_frame_transformer_delegate.cc
+++ b/audio/channel_send_frame_transformer_delegate.cc
@@ -59,7 +59,8 @@
       uint32_t ssrc,
       std::vector<uint32_t> csrcs,
       const std::string& codec_mime_type,
-      absl::optional<uint16_t> sequence_number)
+      absl::optional<uint16_t> sequence_number,
+      absl::optional<uint8_t> audio_level_dbov)
       : frame_type_(frame_type),
         payload_type_(payload_type),
         rtp_timestamp_with_offset_(rtp_timestamp_with_offset),
@@ -68,7 +69,8 @@
         ssrc_(ssrc),
         csrcs_(std::move(csrcs)),
         codec_mime_type_(codec_mime_type),
-        sequence_number_(sequence_number) {}
+        sequence_number_(sequence_number),
+        audio_level_dbov_(audio_level_dbov) {}
   ~TransformableOutgoingAudioFrame() override = default;
   rtc::ArrayView<const uint8_t> GetData() const override { return payload_; }
   void SetData(rtc::ArrayView<const uint8_t> data) override {
@@ -101,6 +103,10 @@
     return absolute_capture_timestamp_ms_;
   }
 
+  absl::optional<uint8_t> AudioLevel() const override {
+    return audio_level_dbov_;
+  }
+
  private:
   AudioFrameType frame_type_;
   uint8_t payload_type_;
@@ -111,6 +117,7 @@
   std::vector<uint32_t> csrcs_;
   std::string codec_mime_type_;
   absl::optional<uint16_t> sequence_number_;
+  absl::optional<uint8_t> audio_level_dbov_;
 };
 }  // namespace
 
@@ -143,14 +150,15 @@
     size_t payload_size,
     int64_t absolute_capture_timestamp_ms,
     uint32_t ssrc,
-    const std::string& codec_mimetype) {
+    const std::string& codec_mimetype,
+    absl::optional<uint8_t> audio_level_dbov) {
   {
     MutexLock lock(&send_lock_);
     if (short_circuit_) {
       send_frame_callback_(
           frame_type, payload_type, rtp_timestamp,
           rtc::ArrayView<const uint8_t>(payload_data, payload_size),
-          absolute_capture_timestamp_ms, /*csrcs=*/{});
+          absolute_capture_timestamp_ms, /*csrcs=*/{}, audio_level_dbov);
       return;
     }
   }
@@ -159,7 +167,7 @@
           frame_type, payload_type, rtp_timestamp, payload_data, payload_size,
           absolute_capture_timestamp_ms, ssrc,
           /*csrcs=*/std::vector<uint32_t>(), codec_mimetype,
-          /*sequence_number=*/absl::nullopt));
+          /*sequence_number=*/absl::nullopt, audio_level_dbov));
 }
 
 void ChannelSendFrameTransformerDelegate::OnTransformedFrame(
@@ -194,7 +202,8 @@
       transformed_frame->AbsoluteCaptureTimestamp()
           ? *transformed_frame->AbsoluteCaptureTimestamp()
           : 0,
-      transformed_frame->GetContributingSources());
+      transformed_frame->GetContributingSources(),
+      transformed_frame->AudioLevel());
 }
 
 std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame(
@@ -207,7 +216,8 @@
       original->GetPayloadType(), original->GetTimestamp(),
       original->GetData().data(), original->GetData().size(),
       original->AbsoluteCaptureTimestamp(), original->GetSsrc(),
-      std::move(csrcs), original->GetMimeType(), original->SequenceNumber());
+      std::move(csrcs), original->GetMimeType(), original->SequenceNumber(),
+      original->AudioLevel());
 }
 
 }  // namespace webrtc
diff --git a/audio/channel_send_frame_transformer_delegate.h b/audio/channel_send_frame_transformer_delegate.h
index 30e63ff..5573052 100644
--- a/audio/channel_send_frame_transformer_delegate.h
+++ b/audio/channel_send_frame_transformer_delegate.h
@@ -36,7 +36,8 @@
                             uint32_t rtp_timestamp_with_offset,
                             rtc::ArrayView<const uint8_t> payload,
                             int64_t absolute_capture_timestamp_ms,
-                            rtc::ArrayView<const uint32_t> csrcs)>;
+                            rtc::ArrayView<const uint32_t> csrcs,
+                            absl::optional<uint8_t> audio_level_dbov)>;
   ChannelSendFrameTransformerDelegate(
       SendFrameCallback send_frame_callback,
       rtc::scoped_refptr<FrameTransformerInterface> frame_transformer,
@@ -60,7 +61,8 @@
                  size_t payload_size,
                  int64_t absolute_capture_timestamp_ms,
                  uint32_t ssrc,
-                 const std::string& codec_mime_type);
+                 const std::string& codec_mime_type,
+                 absl::optional<uint8_t> audio_level_dbov);
 
   // Implements TransformedFrameCallback. Can be called on any thread.
   void OnTransformedFrame(
diff --git a/audio/channel_send_frame_transformer_delegate_unittest.cc b/audio/channel_send_frame_transformer_delegate_unittest.cc
index 5c025bb..e8b7aef 100644
--- a/audio/channel_send_frame_transformer_delegate_unittest.cc
+++ b/audio/channel_send_frame_transformer_delegate_unittest.cc
@@ -28,6 +28,7 @@
 using ::testing::ElementsAre;
 using ::testing::ElementsAreArray;
 using ::testing::NiceMock;
+using ::testing::Optional;
 using ::testing::Return;
 using ::testing::SaveArg;
 
@@ -45,21 +46,24 @@
                uint32_t rtp_timestamp,
                rtc::ArrayView<const uint8_t> payload,
                int64_t absolute_capture_timestamp_ms,
-               rtc::ArrayView<const uint32_t> csrcs));
+               rtc::ArrayView<const uint32_t> csrcs,
+               absl::optional<uint8_t> audio_level_dbov));
 
   ChannelSendFrameTransformerDelegate::SendFrameCallback callback() {
     return [this](AudioFrameType frameType, uint8_t payloadType,
                   uint32_t rtp_timestamp, rtc::ArrayView<const uint8_t> payload,
                   int64_t absolute_capture_timestamp_ms,
-                  rtc::ArrayView<const uint32_t> csrcs) {
+                  rtc::ArrayView<const uint32_t> csrcs,
+                  absl::optional<uint8_t> audio_level_dbov) {
       return SendFrame(frameType, payloadType, rtp_timestamp, payload,
-                       absolute_capture_timestamp_ms, csrcs);
+                       absolute_capture_timestamp_ms, csrcs, audio_level_dbov);
     };
   }
 };
 
 std::unique_ptr<TransformableAudioFrameInterface> CreateMockReceiverFrame(
-    const std::vector<uint32_t>& csrcs) {
+    const std::vector<uint32_t>& csrcs,
+    absl::optional<uint8_t> audio_level_dbov) {
   std::unique_ptr<MockTransformableAudioFrame> mock_frame =
       std::make_unique<NiceMock<MockTransformableAudioFrame>>();
   rtc::ArrayView<const uint8_t> payload(mock_data);
@@ -69,6 +73,7 @@
       .WillByDefault(Return(TransformableFrameInterface::Direction::kReceiver));
   ON_CALL(*mock_frame, GetContributingSources).WillByDefault(Return(csrcs));
   ON_CALL(*mock_frame, SequenceNumber).WillByDefault(Return(987654321));
+  ON_CALL(*mock_frame, AudioLevel).WillByDefault(Return(audio_level_dbov));
   return mock_frame;
 }
 
@@ -88,9 +93,9 @@
               std::unique_ptr<TransformableFrameInterface> transform_frame) {
             frame = std::move(transform_frame);
           });
-  delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data,
-                      sizeof(mock_data), 0,
-                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
+  delegate->Transform(
+      AudioFrameType::kEmptyFrame, 0, 0, mock_data, sizeof(mock_data), 0,
+      /*ssrc=*/0, /*mimeType=*/"audio/opus", /*audio_level_dbov=*/123);
   return absl::WrapUnique(
       static_cast<webrtc::TransformableAudioFrameInterface*>(frame.release()));
 }
@@ -147,7 +152,8 @@
             callback->OnTransformedFrame(std::move(frame));
           });
   delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
-                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
+                      /*ssrc=*/0, /*mimeType=*/"audio/opus",
+                      /*audio_level_dbov=*/31);
   channel_queue.WaitForPreviouslyPostedTasks();
 }
 
@@ -169,16 +175,20 @@
   ASSERT_TRUE(callback);
 
   const std::vector<uint32_t> csrcs = {123, 234, 345, 456};
+  const uint8_t audio_level_dbov = 17;
   EXPECT_CALL(mock_channel, SendFrame).Times(0);
-  EXPECT_CALL(mock_channel, SendFrame(_, 0, 0, ElementsAreArray(mock_data), _,
-                                      ElementsAreArray(csrcs)));
+  EXPECT_CALL(mock_channel,
+              SendFrame(_, 0, 0, ElementsAreArray(mock_data), _,
+                        ElementsAreArray(csrcs), Optional(audio_level_dbov)));
   ON_CALL(*mock_frame_transformer, Transform)
       .WillByDefault([&](std::unique_ptr<TransformableFrameInterface> frame) {
-        callback->OnTransformedFrame(CreateMockReceiverFrame(csrcs));
+        callback->OnTransformedFrame(CreateMockReceiverFrame(
+            csrcs, absl::optional<uint8_t>(audio_level_dbov)));
       });
   delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data,
                       sizeof(mock_data), 0,
-                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
+                      /*ssrc=*/0, /*mimeType=*/"audio/opus",
+                      /*audio_level_dbov=*/absl::nullopt);
   channel_queue.WaitForPreviouslyPostedTasks();
 }
 
@@ -218,7 +228,8 @@
   EXPECT_CALL(mock_channel, SendFrame);
   const uint8_t data[] = {1, 2, 3, 4};
   delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
-                      /*ssrc=*/0, /*mimeType=*/"audio/opus");
+                      /*ssrc=*/0, /*mimeType=*/"audio/opus",
+                      /*audio_level_dbov=*/absl::nullopt);
 }
 
 TEST(ChannelSendFrameTransformerDelegateTest,
@@ -234,11 +245,13 @@
   EXPECT_EQ(cloned_frame->GetMimeType(), frame->GetMimeType());
   EXPECT_THAT(cloned_frame->GetContributingSources(),
               ElementsAreArray(frame->GetContributingSources()));
+  EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel());
 }
 
 TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) {
   std::unique_ptr<TransformableAudioFrameInterface> frame =
-      CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345});
+      CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345},
+                              absl::optional<uint8_t>(72));
   std::unique_ptr<TransformableAudioFrameInterface> cloned_frame =
       CloneSenderAudioFrame(frame.get());
 
@@ -254,6 +267,7 @@
   EXPECT_THAT(cloned_frame->GetContributingSources(),
               ElementsAreArray(frame->GetContributingSources()));
   EXPECT_EQ(cloned_frame->SequenceNumber(), frame->SequenceNumber());
+  EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel());
 }
 
 }  // namespace
diff --git a/audio/channel_send_unittest.cc b/audio/channel_send_unittest.cc
index 77d8479..523408e 100644
--- a/audio/channel_send_unittest.cc
+++ b/audio/channel_send_unittest.cc
@@ -18,6 +18,7 @@
 #include "api/environment/environment_factory.h"
 #include "api/scoped_refptr.h"
 #include "api/test/mock_frame_transformer.h"
+#include "api/test/mock_transformable_audio_frame.h"
 #include "api/units/time_delta.h"
 #include "api/units/timestamp.h"
 #include "call/rtp_transport_controller_send.h"
@@ -76,22 +77,29 @@
     ON_CALL(transport_, SendRtp).WillByDefault(Return(true));
   }
 
-  std::unique_ptr<AudioFrame> CreateAudioFrame() {
+  std::unique_ptr<AudioFrame> CreateAudioFrame(uint8_t data_init_value = 0) {
     auto frame = std::make_unique<AudioFrame>();
     frame->sample_rate_hz_ = kSampleRateHz;
     frame->samples_per_channel_ = kSampleRateHz / 100;
     frame->num_channels_ = 1;
     frame->set_absolute_capture_timestamp_ms(
         time_controller_.GetClock()->TimeInMilliseconds());
+    int16_t* dest = frame->mutable_data();
+    for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
+         i++, dest++) {
+      *dest = data_init_value;
+    }
     return frame;
   }
 
-  void ProcessNextFrame() {
-    channel_->ProcessAndEncodeAudio(CreateAudioFrame());
+  void ProcessNextFrame(std::unique_ptr<AudioFrame> audio_frame) {
+    channel_->ProcessAndEncodeAudio(std::move(audio_frame));
     // Advance time to process the task queue.
     time_controller_.AdvanceTime(TimeDelta::Millis(10));
   }
 
+  void ProcessNextFrame() { ProcessNextFrame(CreateAudioFrame()); }
+
   GlobalSimulatedTimeController time_controller_;
   webrtc::test::ScopedKeyValueConfig field_trials_;
   Environment env_;
@@ -189,6 +197,117 @@
   EXPECT_TRUE_WAIT(sent_timestamp, 1000);
   EXPECT_EQ(*sent_timestamp, transformable_frame_timestamp);
 }
+
+// Ensure that AudioLevel calculations are performed correctly per-packet even
+// if there's an async Encoded Frame Transform happening.
+TEST_F(ChannelSendTest, AudioLevelsAttachedToCorrectTransformedFrame) {
+  channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1);
+  RtpPacketReceived::ExtensionManager extension_manager;
+  extension_manager.RegisterByType(1, kRtpExtensionAudioLevel);
+
+  rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
+      rtc::make_ref_counted<MockFrameTransformer>();
+  channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer);
+  rtc::scoped_refptr<TransformedFrameCallback> callback;
+  EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
+      .WillOnce(SaveArg<0>(&callback));
+  EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback);
+
+  std::vector<uint8_t> sent_audio_levels;
+  auto send_rtp = [&](rtc::ArrayView<const uint8_t> data,
+                      const PacketOptions& options) {
+    RtpPacketReceived packet(&extension_manager);
+    packet.Parse(data);
+    RTPHeader header;
+    packet.GetHeader(&header);
+    sent_audio_levels.push_back(header.extension.audio_level()->level());
+    return true;
+  };
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
+
+  channel_->StartSend();
+  std::vector<std::unique_ptr<TransformableFrameInterface>> frames;
+  EXPECT_CALL(*mock_frame_transformer, Transform)
+      .Times(2)
+      .WillRepeatedly([&](std::unique_ptr<TransformableFrameInterface> frame) {
+        frames.push_back(std::move(frame));
+      });
+
+  // Insert two frames of 7s which should trigger a new packet.
+  ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7));
+  ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7));
+
+  // Insert two more frames of 3s, meaning a second packet is
+  // prepared and sent to the transform before the first packet has
+  // been sent.
+  ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3));
+  ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3));
+
+  // Wait for both packets to be encoded and sent to the transform.
+  EXPECT_EQ_WAIT(frames.size(), 2ul, 1000);
+  // Complete the transforms on both frames at the same time
+  callback->OnTransformedFrame(std::move(frames[0]));
+  callback->OnTransformedFrame(std::move(frames[1]));
+
+  // Allow things posted back to the encoder queue to run.
+  time_controller_.AdvanceTime(TimeDelta::Millis(10));
+
+  // Ensure the audio levels on both sent packets is present and
+  // matches their contents.
+  EXPECT_EQ_WAIT(sent_audio_levels.size(), 2ul, 1000);
+  // rms dbov of the packet with raw audio of 7s is 73.
+  EXPECT_EQ(sent_audio_levels[0], 73);
+  // rms dbov of the second packet with raw audio of 3s is 81.
+  EXPECT_EQ(sent_audio_levels[1], 81);
+}
+
+// Ensure that AudioLevels are attached to frames injected into the
+// Encoded Frame transform.
+TEST_F(ChannelSendTest, AudioLevelsAttachedToInsertedTransformedFrame) {
+  channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1);
+  RtpPacketReceived::ExtensionManager extension_manager;
+  extension_manager.RegisterByType(1, kRtpExtensionAudioLevel);
+
+  rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
+      rtc::make_ref_counted<MockFrameTransformer>();
+  channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer);
+  rtc::scoped_refptr<TransformedFrameCallback> callback;
+  EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
+      .WillOnce(SaveArg<0>(&callback));
+  EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback);
+
+  std::optional<uint8_t> sent_audio_level;
+  auto send_rtp = [&](rtc::ArrayView<const uint8_t> data,
+                      const PacketOptions& options) {
+    RtpPacketReceived packet(&extension_manager);
+    packet.Parse(data);
+    RTPHeader header;
+    packet.GetHeader(&header);
+    sent_audio_level = header.extension.audio_level()->level();
+    return true;
+  };
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
+
+  channel_->StartSend();
+
+  time_controller_.AdvanceTime(TimeDelta::Millis(10));
+  // Inject a frame encoded elsewhere.
+  auto mock_frame = std::make_unique<NiceMock<MockTransformableAudioFrame>>();
+  uint8_t audio_level = 67;
+  ON_CALL(*mock_frame, AudioLevel()).WillByDefault(Return(audio_level));
+  uint8_t payload[10];
+  ON_CALL(*mock_frame, GetData())
+      .WillByDefault(Return(rtc::ArrayView<uint8_t>(&payload[0], 10)));
+  EXPECT_TRUE_WAIT(callback, 1000);
+  callback->OnTransformedFrame(std::move(mock_frame));
+
+  // Allow things posted back to the encoder queue to run.
+  time_controller_.AdvanceTime(TimeDelta::Millis(10));
+
+  // Ensure the audio levels is set on the sent packet.
+  EXPECT_TRUE_WAIT(sent_audio_level, 1000);
+  EXPECT_EQ(*sent_audio_level, audio_level);
+}
 }  // namespace
 }  // namespace voe
 }  // namespace webrtc