Support RGB frames in RTCCVPixelBuffer

In addition to NV12 frames, also support cropping/scaling RGB frames and
converting RGB frames to i420.

This CL also removes the hardcoding of pixel format in
RTCCameraVideoCapturer. Instead, use the first available format for the
output device that our pipeline supports.

Bug: webrtc:8351
Change-Id: If479b4934c47cd2994936913f55e60fbbee3893b
Reviewed-on: https://webrtc-review.googlesource.com/8920
Commit-Queue: Anders Carlsson <andersc@webrtc.org>
Reviewed-by: Magnus Jedvert <magjed@webrtc.org>
Reviewed-by: Daniela Jovanoska Petrenko <denicija@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20396}
diff --git a/sdk/objc/Framework/Classes/PeerConnection/RTCCameraVideoCapturer.m b/sdk/objc/Framework/Classes/PeerConnection/RTCCameraVideoCapturer.m
index 9237484..b1fc11c 100644
--- a/sdk/objc/Framework/Classes/PeerConnection/RTCCameraVideoCapturer.m
+++ b/sdk/objc/Framework/Classes/PeerConnection/RTCCameraVideoCapturer.m
@@ -23,11 +23,6 @@
 
 const int64_t kNanosecondsPerSecond = 1000000000;
 
-static inline BOOL IsMediaSubTypeSupported(FourCharCode mediaSubType) {
-  return (mediaSubType == kCVPixelFormatType_420YpCbCr8PlanarFullRange ||
-          mediaSubType == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
-}
-
 @interface RTCCameraVideoCapturer ()<AVCaptureVideoDataOutputSampleBufferDelegate>
 @property(nonatomic, readonly) dispatch_queue_t frameQueue;
 @end
@@ -105,17 +100,9 @@
 }
 
 + (NSArray<AVCaptureDeviceFormat *> *)supportedFormatsForDevice:(AVCaptureDevice *)device {
-  NSMutableArray<AVCaptureDeviceFormat *> *eligibleDeviceFormats = [NSMutableArray array];
-
-  for (AVCaptureDeviceFormat *format in device.formats) {
-    // Filter out subTypes that we currently don't support in the stack
-    FourCharCode mediaSubType = CMFormatDescriptionGetMediaSubType(format.formatDescription);
-    if (IsMediaSubTypeSupported(mediaSubType)) {
-      [eligibleDeviceFormats addObject:format];
-    }
-  }
-
-  return eligibleDeviceFormats;
+  // Support opening the device in any format. We make sure it's converted to a format we
+  // can handle, if needed, in the method `-setupVideoDataOutput`.
+  return device.formats;
 }
 
 - (void)startCaptureWithDevice:(AVCaptureDevice *)device
@@ -387,14 +374,18 @@
 
 - (void)setupVideoDataOutput {
   NSAssert(_videoDataOutput == nil, @"Setup video data output called twice.");
-  // Make the capturer output NV12. Ideally we want I420 but that's not
-  // currently supported on iPhone / iPad.
   AVCaptureVideoDataOutput *videoDataOutput = [[AVCaptureVideoDataOutput alloc] init];
-  videoDataOutput.videoSettings = @{
-    (NSString *)
-    // TODO(denicija): Remove this color conversion and use the original capture format directly.
-    kCVPixelBufferPixelFormatTypeKey : @(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
-  };
+
+  // `videoDataOutput.availableVideoCVPixelFormatTypes` returns the pixel formats supported by the
+  // device with the most efficient output format first. Find the first format that we support.
+  NSSet<NSNumber *> *supportedPixelFormats = [RTCCVPixelBuffer supportedPixelFormats];
+  NSMutableOrderedSet *availablePixelFormats =
+      [NSMutableOrderedSet orderedSetWithArray:videoDataOutput.availableVideoCVPixelFormatTypes];
+  [availablePixelFormats intersectSet:supportedPixelFormats];
+  NSNumber *pixelFormat = availablePixelFormats.firstObject;
+  NSAssert(pixelFormat, @"Output device has no supported formats.");
+
+  videoDataOutput.videoSettings = @{(NSString *)kCVPixelBufferPixelFormatTypeKey : pixelFormat};
   videoDataOutput.alwaysDiscardsLateVideoFrames = NO;
   [videoDataOutput setSampleBufferDelegate:self queue:self.frameQueue];
   _videoDataOutput = videoDataOutput;
diff --git a/sdk/objc/Framework/Classes/Video/RTCCVPixelBuffer.mm b/sdk/objc/Framework/Classes/Video/RTCCVPixelBuffer.mm
index 17f14e3..454f655 100644
--- a/sdk/objc/Framework/Classes/Video/RTCCVPixelBuffer.mm
+++ b/sdk/objc/Framework/Classes/Video/RTCCVPixelBuffer.mm
@@ -14,6 +14,8 @@
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 
+#include "libyuv.h"
+
 @implementation RTCCVPixelBuffer {
   int _width;
   int _height;
@@ -27,6 +29,14 @@
 @synthesize cropX = _cropX;
 @synthesize cropY = _cropY;
 
++ (NSSet<NSNumber*>*)supportedPixelFormats {
+  return [NSSet setWithObjects:@(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange),
+                               @(kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange),
+                               @(kCVPixelFormatType_32BGRA),
+                               @(kCVPixelFormatType_32ARGB),
+                               nil];
+}
+
 - (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer {
   return [self initWithPixelBuffer:pixelBuffer
                       adaptedWidth:CVPixelBufferGetWidth(pixelBuffer)
@@ -82,22 +92,128 @@
 }
 
 - (int)bufferSizeForCroppingAndScalingToWidth:(int)width height:(int)height {
-  int srcChromaWidth = (_cropWidth + 1) / 2;
-  int srcChromaHeight = (_cropHeight + 1) / 2;
-  int dstChromaWidth = (width + 1) / 2;
-  int dstChromaHeight = (height + 1) / 2;
+  const OSType srcPixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
+  switch (srcPixelFormat) {
+    case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
+    case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
+      int srcChromaWidth = (_cropWidth + 1) / 2;
+      int srcChromaHeight = (_cropHeight + 1) / 2;
+      int dstChromaWidth = (width + 1) / 2;
+      int dstChromaHeight = (height + 1) / 2;
 
-  return srcChromaWidth * srcChromaHeight * 2 + dstChromaWidth * dstChromaHeight * 2;
+      return srcChromaWidth * srcChromaHeight * 2 + dstChromaWidth * dstChromaHeight * 2;
+    }
+    case kCVPixelFormatType_32BGRA:
+    case kCVPixelFormatType_32ARGB: {
+      return 0;  // Scaling RGBA frames does not require a temporary buffer.
+    }
+  }
+  RTC_NOTREACHED() << "Unsupported pixel format.";
+  return 0;
 }
 
 - (BOOL)cropAndScaleTo:(CVPixelBufferRef)outputPixelBuffer withTempBuffer:(uint8_t*)tmpBuffer {
+  const OSType srcPixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
+  switch (srcPixelFormat) {
+    case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
+    case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
+      [self cropAndScaleNV12To:outputPixelBuffer withTempBuffer:tmpBuffer];
+      break;
+    }
+    case kCVPixelFormatType_32BGRA:
+    case kCVPixelFormatType_32ARGB: {
+      [self cropAndScaleARGBTo:outputPixelBuffer];
+      break;
+    }
+    default: { RTC_NOTREACHED() << "Unsupported pixel format."; }
+  }
+
+  return YES;
+}
+
+- (id<RTCI420Buffer>)toI420 {
+  const OSType pixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
+
+  CVPixelBufferLockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
+
+  RTCMutableI420Buffer* i420Buffer =
+      [[RTCMutableI420Buffer alloc] initWithWidth:[self width] height:[self height]];
+
+  switch (pixelFormat) {
+    case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
+    case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: {
+      const uint8_t* srcY =
+          static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
+      const int srcYStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 0);
+      const uint8_t* srcUV =
+          static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 1));
+      const int srcUVStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 1);
+
+      // Crop just by modifying pointers.
+      srcY += srcYStride * _cropY + _cropX;
+      srcUV += srcUVStride * (_cropY / 2) + _cropX;
+
+      // TODO(magjed): Use a frame buffer pool.
+      webrtc::NV12ToI420Scaler nv12ToI420Scaler;
+      nv12ToI420Scaler.NV12ToI420Scale(srcY,
+                                       srcYStride,
+                                       srcUV,
+                                       srcUVStride,
+                                       _cropWidth,
+                                       _cropHeight,
+                                       i420Buffer.mutableDataY,
+                                       i420Buffer.strideY,
+                                       i420Buffer.mutableDataU,
+                                       i420Buffer.strideU,
+                                       i420Buffer.mutableDataV,
+                                       i420Buffer.strideV,
+                                       i420Buffer.width,
+                                       i420Buffer.height);
+      break;
+    }
+    case kCVPixelFormatType_32BGRA:
+    case kCVPixelFormatType_32ARGB: {
+      const uint8_t* src =
+          static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
+
+      uint32 libyuvPixelFormat = 0;
+      if (pixelFormat == kCVPixelFormatType_32BGRA) {
+        libyuvPixelFormat = libyuv::FOURCC_ARGB;
+      } else if (pixelFormat == kCVPixelFormatType_32ARGB) {
+        libyuvPixelFormat = libyuv::FOURCC_ABGR;
+      }
+
+      libyuv::ConvertToI420(src,
+                            0,
+                            i420Buffer.mutableDataY,
+                            i420Buffer.strideY,
+                            i420Buffer.mutableDataU,
+                            i420Buffer.strideU,
+                            i420Buffer.mutableDataV,
+                            i420Buffer.strideV,
+                            _cropX,
+                            _cropY,
+                            _cropWidth,
+                            _cropHeight,
+                            i420Buffer.width,
+                            i420Buffer.height,
+                            libyuv::kRotate0,
+                            libyuvPixelFormat);
+      break;
+    }
+    default: { RTC_NOTREACHED() << "Unsupported pixel format."; }
+  }
+
+  CVPixelBufferUnlockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
+
+  return i420Buffer;
+}
+
+- (void)cropAndScaleNV12To:(CVPixelBufferRef)outputPixelBuffer withTempBuffer:(uint8_t*)tmpBuffer {
   // Prepare output pointers.
-  RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(outputPixelBuffer),
-                kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
   CVReturn cvRet = CVPixelBufferLockBaseAddress(outputPixelBuffer, 0);
   if (cvRet != kCVReturnSuccess) {
     LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
-    return NO;
   }
   const int dstWidth = CVPixelBufferGetWidth(outputPixelBuffer);
   const int dstHeight = CVPixelBufferGetHeight(outputPixelBuffer);
@@ -109,9 +225,6 @@
   const int dstUVStride = CVPixelBufferGetBytesPerRowOfPlane(outputPixelBuffer, 1);
 
   // Prepare source pointers.
-  const OSType srcPixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
-  RTC_DCHECK(srcPixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
-             srcPixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
   CVPixelBufferLockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
   const uint8_t* srcY =
       static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
@@ -140,49 +253,40 @@
 
   CVPixelBufferUnlockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
   CVPixelBufferUnlockBaseAddress(outputPixelBuffer, 0);
-
-  return YES;
 }
 
-- (id<RTCI420Buffer>)toI420 {
-  const OSType pixelFormat = CVPixelBufferGetPixelFormatType(_pixelBuffer);
-  RTC_DCHECK(pixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
-             pixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
+- (void)cropAndScaleARGBTo:(CVPixelBufferRef)outputPixelBuffer {
+  // Prepare output pointers.
+  CVReturn cvRet = CVPixelBufferLockBaseAddress(outputPixelBuffer, 0);
+  if (cvRet != kCVReturnSuccess) {
+    LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
+  }
+  const int dstWidth = CVPixelBufferGetWidth(outputPixelBuffer);
+  const int dstHeight = CVPixelBufferGetHeight(outputPixelBuffer);
 
+  uint8_t* dst =
+      reinterpret_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(outputPixelBuffer, 0));
+  const int dstStride = CVPixelBufferGetBytesPerRowOfPlane(outputPixelBuffer, 0);
+
+  // Prepare source pointers.
   CVPixelBufferLockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
-  const uint8_t* srcY =
-      static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 0));
-  const int srcYStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 0);
-  const uint8_t* srcUV =
-      static_cast<const uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(_pixelBuffer, 1));
-  const int srcUVStride = CVPixelBufferGetBytesPerRowOfPlane(_pixelBuffer, 1);
+  const uint8_t* src = static_cast<const uint8_t*>(CVPixelBufferGetBaseAddress(_pixelBuffer));
+  const int srcStride = CVPixelBufferGetBytesPerRow(_pixelBuffer);
 
   // Crop just by modifying pointers.
-  srcY += srcYStride * _cropY + _cropX;
-  srcUV += srcUVStride * (_cropY / 2) + _cropX;
-
-  // TODO(magjed): Use a frame buffer pool.
-  webrtc::NV12ToI420Scaler nv12ToI420Scaler;
-  RTCMutableI420Buffer* i420Buffer =
-      [[RTCMutableI420Buffer alloc] initWithWidth:[self width] height:[self height]];
-  nv12ToI420Scaler.NV12ToI420Scale(srcY,
-                                   srcYStride,
-                                   srcUV,
-                                   srcUVStride,
-                                   _cropWidth,
-                                   _cropHeight,
-                                   i420Buffer.mutableDataY,
-                                   i420Buffer.strideY,
-                                   i420Buffer.mutableDataU,
-                                   i420Buffer.strideU,
-                                   i420Buffer.mutableDataV,
-                                   i420Buffer.strideV,
-                                   i420Buffer.width,
-                                   i420Buffer.height);
+  src += srcStride * _cropY + _cropX;
+  libyuv::ARGBScale(src,
+                    srcStride,
+                    _cropWidth,
+                    _cropHeight,
+                    dst,
+                    dstStride,
+                    dstWidth,
+                    dstHeight,
+                    libyuv::kFilterBox);
 
   CVPixelBufferUnlockBaseAddress(_pixelBuffer, kCVPixelBufferLock_ReadOnly);
-
-  return i420Buffer;
+  CVPixelBufferUnlockBaseAddress(outputPixelBuffer, 0);
 }
 
 @end
diff --git a/sdk/objc/Framework/Classes/Video/RTCI420Buffer+Private.h b/sdk/objc/Framework/Classes/Video/RTCI420Buffer+Private.h
index 6c942ce..d874925 100644
--- a/sdk/objc/Framework/Classes/Video/RTCI420Buffer+Private.h
+++ b/sdk/objc/Framework/Classes/Video/RTCI420Buffer+Private.h
@@ -18,6 +18,7 @@
 
 /** Initialize an RTCI420Buffer with its backing I420BufferInterface. */
 - (instancetype)initWithFrameBuffer:(rtc::scoped_refptr<webrtc::I420BufferInterface>)i420Buffer;
+- (rtc::scoped_refptr<webrtc::I420BufferInterface>)nativeI420Buffer;
 
 @end
 
diff --git a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
index 5b08e74..3d4ae71 100644
--- a/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
+++ b/sdk/objc/Framework/Classes/VideoToolbox/RTCVideoEncoderH264.mm
@@ -60,6 +60,8 @@
 const int kLowH264QpThreshold = 28;
 const int kHighH264QpThreshold = 39;
 
+const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
+
 // Struct that we pass to the encoder per frame to encode. We receive it again
 // in the encoder callback.
 struct RTCFrameEncodeParams {
@@ -90,10 +92,9 @@
 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the
 // encoder. This performs the copy and format conversion.
 // TODO(tkchin): See if encoder will accept i420 frames and compare performance.
-bool CopyVideoFrameToPixelBuffer(id<RTCI420Buffer> frameBuffer, CVPixelBufferRef pixelBuffer) {
+bool CopyVideoFrameToNV12PixelBuffer(id<RTCI420Buffer> frameBuffer, CVPixelBufferRef pixelBuffer) {
   RTC_DCHECK(pixelBuffer);
-  RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer),
-                kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
+  RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), kNV12PixelFormat);
   RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height);
   RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width);
 
@@ -286,7 +287,7 @@
   RTCVideoCodecMode _mode;
 
   webrtc::H264BitstreamParser _h264BitstreamParser;
-  std::vector<uint8_t> _nv12ScaleBuffer;
+  std::vector<uint8_t> _frameScaleBuffer;
 }
 
 // .5 is set as a mininum to prevent overcompensating for large temporary
@@ -333,7 +334,7 @@
   // TODO(tkchin): Try setting payload size via
   // kVTCompressionPropertyKey_MaxH264SliceBytes.
 
-  return [self resetCompressionSession];
+  return [self resetCompressionSessionWithPixelFormat:kNV12PixelFormat];
 }
 
 - (NSInteger)encode:(RTCVideoFrame *)frame
@@ -356,20 +357,10 @@
   // Get a pixel buffer from the pool and copy frame data over.
   CVPixelBufferPoolRef pixelBufferPool =
       VTCompressionSessionGetPixelBufferPool(_compressionSession);
-
-#if defined(WEBRTC_IOS)
-  if (!pixelBufferPool) {
-    // Kind of a hack. On backgrounding, the compression session seems to get
-    // invalidated, which causes this pool call to fail when the application
-    // is foregrounded and frames are being sent for encoding again.
-    // Resetting the session when this happens fixes the issue.
-    // In addition we request a keyframe so video can recover quickly.
-    [self resetCompressionSession];
+  if ([self resetCompressionSessionIfNeededForPool:pixelBufferPool withFrame:frame]) {
     pixelBufferPool = VTCompressionSessionGetPixelBufferPool(_compressionSession);
     isKeyframeRequired = YES;
-    LOG(LS_INFO) << "Resetting compression session due to invalid pool.";
   }
-#endif
 
   CVPixelBufferRef pixelBuffer = nullptr;
   if ([frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
@@ -393,12 +384,12 @@
       if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) {
         int size =
             [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight];
-        _nv12ScaleBuffer.resize(size);
+        _frameScaleBuffer.resize(size);
       } else {
-        _nv12ScaleBuffer.clear();
+        _frameScaleBuffer.clear();
       }
-      _nv12ScaleBuffer.shrink_to_fit();
-      if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_nv12ScaleBuffer.data()]) {
+      _frameScaleBuffer.shrink_to_fit();
+      if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_frameScaleBuffer.data()]) {
         return WEBRTC_VIDEO_CODEC_ERROR;
       }
     }
@@ -411,7 +402,7 @@
       return WEBRTC_VIDEO_CODEC_ERROR;
     }
     RTC_DCHECK(pixelBuffer);
-    if (!CopyVideoFrameToPixelBuffer([frame.buffer toI420], pixelBuffer)) {
+    if (!CopyVideoFrameToNV12PixelBuffer([frame.buffer toI420], pixelBuffer)) {
       LOG(LS_ERROR) << "Failed to copy frame data.";
       CVBufferRelease(pixelBuffer);
       return WEBRTC_VIDEO_CODEC_ERROR;
@@ -491,7 +482,56 @@
   return WEBRTC_VIDEO_CODEC_OK;
 }
 
-- (int)resetCompressionSession {
+- (BOOL)resetCompressionSessionIfNeededForPool:(CVPixelBufferPoolRef)pixelBufferPool
+                                     withFrame:(RTCVideoFrame *)frame {
+  BOOL resetCompressionSession = NO;
+
+#if defined(WEBRTC_IOS)
+  if (!pixelBufferPool) {
+    // Kind of a hack. On backgrounding, the compression session seems to get
+    // invalidated, which causes this pool call to fail when the application
+    // is foregrounded and frames are being sent for encoding again.
+    // Resetting the session when this happens fixes the issue.
+    // In addition we request a keyframe so video can recover quickly.
+    resetCompressionSession = YES;
+    LOG(LS_INFO) << "Resetting compression session due to invalid pool.";
+  }
+#endif
+
+  // If we're capturing native frames in another pixel format than the compression session is
+  // configured with, make sure the compression session is reset using the correct pixel format.
+  OSType framePixelFormat = kNV12PixelFormat;
+  if (pixelBufferPool && [frame.buffer isKindOfClass:[RTCCVPixelBuffer class]]) {
+    RTCCVPixelBuffer *rtcPixelBuffer = (RTCCVPixelBuffer *)frame.buffer;
+    framePixelFormat = CVPixelBufferGetPixelFormatType(rtcPixelBuffer.pixelBuffer);
+
+    // The pool attribute `kCVPixelBufferPixelFormatTypeKey` can contain either an array of pixel
+    // formats or a single pixel format.
+    NSDictionary *poolAttributes =
+        (__bridge NSDictionary *)CVPixelBufferPoolGetPixelBufferAttributes(pixelBufferPool);
+    id pixelFormats =
+        [poolAttributes objectForKey:(__bridge NSString *)kCVPixelBufferPixelFormatTypeKey];
+    NSArray<NSNumber *> *compressionSessionPixelFormats = nil;
+    if ([pixelFormats isKindOfClass:[NSArray class]]) {
+      compressionSessionPixelFormats = (NSArray *)pixelFormats;
+    } else {
+      compressionSessionPixelFormats = @[ (NSNumber *)pixelFormats ];
+    }
+
+    if (![compressionSessionPixelFormats
+            containsObject:[NSNumber numberWithLong:framePixelFormat]]) {
+      resetCompressionSession = YES;
+      LOG(LS_INFO) << "Resetting compression session due to non-matching pixel format.";
+    }
+  }
+
+  if (resetCompressionSession) {
+    [self resetCompressionSessionWithPixelFormat:framePixelFormat];
+  }
+  return resetCompressionSession;
+}
+
+- (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
   [self destroyCompressionSession];
 
   // Set source image buffer attributes. These attributes will be present on
@@ -507,8 +547,8 @@
     kCVPixelBufferPixelFormatTypeKey
   };
   CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0);
-  int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
-  CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);
+  int64_t pixelFormatType = framePixelFormat;
+  CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &pixelFormatType);
   CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, pixelFormat};
   CFDictionaryRef sourceAttributes = CreateCFTypeDictionary(keys, values, attributesSize);
   if (ioSurfaceValue) {
diff --git a/sdk/objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h b/sdk/objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h
index ffcdf26..4a683b0 100644
--- a/sdk/objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h
+++ b/sdk/objc/Framework/Headers/WebRTC/RTCVideoFrameBuffer.h
@@ -72,6 +72,8 @@
 @property(nonatomic, readonly) int cropX;
 @property(nonatomic, readonly) int cropY;
 
++ (NSSet<NSNumber *> *)supportedPixelFormats;
+
 - (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer;
 - (instancetype)initWithPixelBuffer:(CVPixelBufferRef)pixelBuffer
                        adaptedWidth:(int)adaptedWidth