Optimize Android NV12 capture

This CL optimizes the Android capture NV12 -> I420 + scaling code. For
example, when the input is 1280x720 and we adapt to 640x360, this CL:
 - Reduces conversion time from 3.37 ms to 1.46 ms.
 - Reduces memory footprint by 1 MB.

BUG=webrtc:6319

Review-Url: https://codereview.webrtc.org/2317443003
Cr-Commit-Position: refs/heads/master@{#14167}
diff --git a/webrtc/api/androidvideotracksource.cc b/webrtc/api/androidvideotracksource.cc
index 000337d..d232f5e 100644
--- a/webrtc/api/androidvideotracksource.cc
+++ b/webrtc/api/androidvideotracksource.cc
@@ -12,6 +12,9 @@
 
 #include <utility>
 
+#include "third_party/libyuv/include/libyuv/convert.h"
+#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
+
 namespace webrtc {
 
 AndroidVideoTrackSource::AndroidVideoTrackSource(rtc::Thread* signaling_thread,
@@ -106,42 +109,70 @@
     return;
   }
 
-  int rotated_width = crop_width;
-  int rotated_height = crop_height;
-
-  rtc::CritScope lock(&apply_rotation_crit_);
-  if (apply_rotation_ && (rotation == 90 || rotation == 270)) {
-    std::swap(adapted_width, adapted_height);
-    std::swap(rotated_width, rotated_height);
-  }
-
-  rtc::scoped_refptr<webrtc::VideoFrameBuffer> buffer =
-      pre_scale_pool_.CreateBuffer(rotated_width, rotated_height);
-
   const uint8_t* y_plane = static_cast<const uint8_t*>(frame_data);
   const uint8_t* uv_plane = y_plane + width * height;
-  int uv_width = (width + 1) / 2;
+  const int uv_width = (width + 1) / 2;
 
   RTC_CHECK_GE(length, width * height + 2 * uv_width * ((height + 1) / 2));
 
   // Can only crop at even pixels.
   crop_x &= ~1;
   crop_y &= ~1;
+  // Crop just by modifying pointers.
+  y_plane += width * crop_y + crop_x;
+  uv_plane += uv_width * crop_y + crop_x;
 
-  libyuv::NV12ToI420Rotate(
-      y_plane + width * crop_y + crop_x, width,
-      uv_plane + uv_width * crop_y + crop_x, width, buffer->MutableDataY(),
-      buffer->StrideY(),
-      // Swap U and V, since we have NV21, not NV12.
-      buffer->MutableDataV(), buffer->StrideV(), buffer->MutableDataU(),
-      buffer->StrideU(), crop_width, crop_height,
-      static_cast<libyuv::RotationMode>(apply_rotation_ ? rotation : 0));
+  rtc::scoped_refptr<webrtc::I420Buffer> buffer =
+      buffer_pool_.CreateBuffer(adapted_width, adapted_height);
 
-  if (adapted_width != buffer->width() || adapted_height != buffer->height()) {
-    rtc::scoped_refptr<webrtc::I420Buffer> scaled_buffer(
-        post_scale_pool_.CreateBuffer(adapted_width, adapted_height));
-    scaled_buffer->ScaleFrom(buffer);
-    buffer = scaled_buffer;
+  if (adapted_width == crop_width && adapted_height == crop_height) {
+    // No scaling.
+    libyuv::NV12ToI420(
+        y_plane, width,
+        uv_plane, uv_width * 2,
+        buffer->MutableDataY(), buffer->StrideY(),
+        // Swap U and V, since we have NV21, not NV12.
+        buffer->MutableDataV(), buffer->StrideV(),
+        buffer->MutableDataU(), buffer->StrideU(),
+        buffer->width(), buffer->height());
+
+  } else {
+    // Scaling.
+    const int crop_uv_width = (crop_width + 1) / 2;
+    const int crop_uv_height = (crop_height + 1) / 2;
+    unscaled_uv_planes_.resize(crop_uv_width * crop_uv_height * 2);
+
+    NV12ToI420Scale(
+        unscaled_uv_planes_.data(),
+        y_plane, width,
+        uv_plane, uv_width * 2,
+        crop_width, crop_height,
+        buffer->MutableDataY(), buffer->StrideY(),
+        // Swap U and V, since we have NV21, not NV12.
+        buffer->MutableDataV(), buffer->StrideV(),
+        buffer->MutableDataU(), buffer->StrideU(),
+        buffer->width(), buffer->height());
+  }
+
+  // Applying rotation is only supported for legacy reasons, and the performance
+  // for this path is not critical.
+  rtc::CritScope lock(&apply_rotation_crit_);
+  if (apply_rotation_ && rotation != 0) {
+    rtc::scoped_refptr<I420Buffer> rotated_buffer = I420Buffer::Create(
+        rotation == 180 ? buffer->width() : buffer->height(),
+        rotation == 180 ? buffer->height() : buffer->width());
+
+    libyuv::I420Rotate(
+        buffer->DataY(), buffer->StrideY(),
+        buffer->DataU(), buffer->StrideU(),
+        buffer->DataV(), buffer->StrideV(),
+        rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
+        rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
+        rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
+        buffer->width(), buffer->height(),
+        static_cast<libyuv::RotationMode>(rotation));
+
+    buffer = rotated_buffer;
   }
 
   OnFrame(cricket::WebRtcVideoFrame(
diff --git a/webrtc/api/androidvideotracksource.h b/webrtc/api/androidvideotracksource.h
index 2bbecc8..5cf52d0 100644
--- a/webrtc/api/androidvideotracksource.h
+++ b/webrtc/api/androidvideotracksource.h
@@ -23,7 +23,6 @@
 #include "webrtc/media/base/videoadapter.h"
 #include "webrtc/media/base/videobroadcaster.h"
 #include "webrtc/media/base/videosinkinterface.h"
-#include "third_party/libyuv/include/libyuv/convert.h"
 
 namespace webrtc {
 
@@ -92,8 +91,8 @@
   cricket::VideoAdapter video_adapter_;
   rtc::CriticalSection apply_rotation_crit_;
   bool apply_rotation_ GUARDED_BY(apply_rotation_crit_);
-  webrtc::I420BufferPool pre_scale_pool_;
-  webrtc::I420BufferPool post_scale_pool_;
+  std::vector<uint8_t> unscaled_uv_planes_;
+  webrtc::I420BufferPool buffer_pool_;
   rtc::scoped_refptr<webrtc_jni::SurfaceTextureHelper> surface_texture_helper_;
   const bool is_screencast_;
 
diff --git a/webrtc/common_video/libyuv/include/webrtc_libyuv.h b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
index ec3720e..1ceca4d 100644
--- a/webrtc/common_video/libyuv/include/webrtc_libyuv.h
+++ b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
@@ -123,6 +123,19 @@
 // Compute SSIM for an I420 frame (all planes).
 double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame);
 
+// Helper function for directly converting and scaling NV12 to I420. The
+// |tmp_data| argument will be used for intermediary splitting the UV plane into
+// separate U and V planes, so the size of that memory region must be at least
+// 2 * ((src_width + 1) / 2) * ((src_height + 1) / 2).
+void NV12ToI420Scale(uint8_t* tmp_data,
+                     const uint8_t* src_y, int src_stride_y,
+                     const uint8_t* src_uv, int src_stride_uv,
+                     int src_width, int src_height,
+                     uint8_t* dst_y, int dst_stride_y,
+                     uint8_t* dst_u, int dst_stride_u,
+                     uint8_t* dst_v, int dst_stride_v,
+                     int dst_width, int dst_height);
+
 }  // namespace webrtc
 
 #endif  // WEBRTC_COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_
diff --git a/webrtc/common_video/libyuv/webrtc_libyuv.cc b/webrtc/common_video/libyuv/webrtc_libyuv.cc
index 44577e9..5d9ab57 100644
--- a/webrtc/common_video/libyuv/webrtc_libyuv.cc
+++ b/webrtc/common_video/libyuv/webrtc_libyuv.cc
@@ -341,4 +341,34 @@
                           test_frame->video_frame_buffer()->StrideV(),
                           test_frame->width(), test_frame->height());
 }
+
+void NV12ToI420Scale(uint8_t* tmp_data,
+                     const uint8_t* src_y, int src_stride_y,
+                     const uint8_t* src_uv, int src_stride_uv,
+                     int src_width, int src_height,
+                     uint8_t* dst_y, int dst_stride_y,
+                     uint8_t* dst_u, int dst_stride_u,
+                     uint8_t* dst_v, int dst_stride_v,
+                     int dst_width, int dst_height) {
+  // Split source UV plane into separate U and V plane using the temporary data.
+  const int src_uv_width = (src_width + 1) / 2;
+  const int src_uv_height = (src_height + 1) / 2;
+  uint8_t* const src_u = tmp_data;
+  uint8_t* const src_v = tmp_data + src_uv_width * src_uv_height;
+  libyuv::SplitUVPlane(src_uv, src_stride_uv,
+                       src_u, src_uv_width,
+                       src_v, src_uv_width,
+                       src_uv_width, src_uv_height);
+  // Scale the planes into the destination.
+  libyuv::I420Scale(src_y, src_stride_y,
+                    src_u, src_uv_width,
+                    src_v, src_uv_width,
+                    src_width, src_height,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    dst_width, dst_height,
+                    libyuv::kFilterBox);
+}
+
 }  // namespace webrtc