Reland of Optimize Android NV12 capture (patchset #1 id:1 of https://codereview.webrtc.org/2327893002/ )

Reason for revert:
Import breakage has been fixed.

Original issue's description:
> Revert of Optimize Android NV12 capture (patchset #2 id:20001 of https://codereview.webrtc.org/2317443003/ )
>
> Reason for revert:
> Import breakage in g3.
>
> Original issue's description:
> > Optimize Android NV12 capture
> >
> > This CL optimizes the Android capture NV12 -> I420 + scaling code. For
> > example, when the input is 1280x720 and we adapt to 640x360, this CL:
> >  - Reduces conversion time from 3.37 ms to 1.46 ms.
> >  - Reduces memory footprint by 1 MB.
> >
> > BUG=webrtc:6319
> >
> > Committed: https://crrev.com/36d38cbb153e19bdc3c62a750aba6889da40aac2
> > Cr-Commit-Position: refs/heads/master@{#14167}
>
> TBR=sakal@webrtc.org
> # Not skipping CQ checks because original CL landed more than 1 days ago.
> BUG=webrtc:6319
TBR=sakal@webrtc.org
# Not skipping CQ checks because original CL landed more than 1 days ago.
BUG=webrtc:6319

Review-Url: https://codereview.webrtc.org/2332213011
Cr-Commit-Position: refs/heads/master@{#14273}
diff --git a/webrtc/api/androidvideotracksource.cc b/webrtc/api/androidvideotracksource.cc
index 7fd033e..f0bd26a 100644
--- a/webrtc/api/androidvideotracksource.cc
+++ b/webrtc/api/androidvideotracksource.cc
@@ -12,6 +12,8 @@
 
 #include <utility>
 
+#include "third_party/libyuv/include/libyuv/rotate.h"
+
 namespace webrtc {
 
 AndroidVideoTrackSource::AndroidVideoTrackSource(rtc::Thread* signaling_thread,
@@ -106,42 +108,51 @@
     return;
   }
 
-  int rotated_width = crop_width;
-  int rotated_height = crop_height;
-
-  rtc::CritScope lock(&apply_rotation_crit_);
-  if (apply_rotation_ && (rotation == 90 || rotation == 270)) {
-    std::swap(adapted_width, adapted_height);
-    std::swap(rotated_width, rotated_height);
-  }
-
-  rtc::scoped_refptr<webrtc::I420Buffer> buffer =
-      pre_scale_pool_.CreateBuffer(rotated_width, rotated_height);
-
   const uint8_t* y_plane = static_cast<const uint8_t*>(frame_data);
   const uint8_t* uv_plane = y_plane + width * height;
-  int uv_width = (width + 1) / 2;
+  const int uv_width = (width + 1) / 2;
 
   RTC_CHECK_GE(length, width * height + 2 * uv_width * ((height + 1) / 2));
 
   // Can only crop at even pixels.
   crop_x &= ~1;
   crop_y &= ~1;
+  // Crop just by modifying pointers.
+  y_plane += width * crop_y + crop_x;
+  uv_plane += uv_width * crop_y + crop_x;
 
-  libyuv::NV12ToI420Rotate(
-      y_plane + width * crop_y + crop_x, width,
-      uv_plane + uv_width * crop_y + crop_x, width, buffer->MutableDataY(),
-      buffer->StrideY(),
+  rtc::scoped_refptr<webrtc::I420Buffer> buffer =
+      buffer_pool_.CreateBuffer(adapted_width, adapted_height);
+
+  nv12toi420_scaler_.NV12ToI420Scale(
+      y_plane, width,
+      uv_plane, uv_width * 2,
+      crop_width, crop_height,
+      buffer->MutableDataY(), buffer->StrideY(),
       // Swap U and V, since we have NV21, not NV12.
-      buffer->MutableDataV(), buffer->StrideV(), buffer->MutableDataU(),
-      buffer->StrideU(), crop_width, crop_height,
-      static_cast<libyuv::RotationMode>(apply_rotation_ ? rotation : 0));
+      buffer->MutableDataV(), buffer->StrideV(),
+      buffer->MutableDataU(), buffer->StrideU(),
+      buffer->width(), buffer->height());
 
-  if (adapted_width != buffer->width() || adapted_height != buffer->height()) {
-    rtc::scoped_refptr<webrtc::I420Buffer> scaled_buffer(
-        post_scale_pool_.CreateBuffer(adapted_width, adapted_height));
-    scaled_buffer->ScaleFrom(buffer);
-    buffer = scaled_buffer;
+  // Applying rotation is only supported for legacy reasons, and the performance
+  // for this path is not critical.
+  rtc::CritScope lock(&apply_rotation_crit_);
+  if (apply_rotation_ && rotation != 0) {
+    rtc::scoped_refptr<I420Buffer> rotated_buffer =
+        rotation == 180 ? I420Buffer::Create(buffer->width(), buffer->height())
+                        : I420Buffer::Create(buffer->height(), buffer->width());
+
+    libyuv::I420Rotate(
+        buffer->DataY(), buffer->StrideY(),
+        buffer->DataU(), buffer->StrideU(),
+        buffer->DataV(), buffer->StrideV(),
+        rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
+        rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
+        rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
+        buffer->width(), buffer->height(),
+        static_cast<libyuv::RotationMode>(rotation));
+
+    buffer = rotated_buffer;
   }
 
   OnFrame(cricket::WebRtcVideoFrame(
diff --git a/webrtc/api/androidvideotracksource.h b/webrtc/api/androidvideotracksource.h
index 2bbecc8..1b2c4b2 100644
--- a/webrtc/api/androidvideotracksource.h
+++ b/webrtc/api/androidvideotracksource.h
@@ -20,10 +20,10 @@
 #include "webrtc/base/thread_checker.h"
 #include "webrtc/base/timestampaligner.h"
 #include "webrtc/common_video/include/i420_buffer_pool.h"
+#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
 #include "webrtc/media/base/videoadapter.h"
 #include "webrtc/media/base/videobroadcaster.h"
 #include "webrtc/media/base/videosinkinterface.h"
-#include "third_party/libyuv/include/libyuv/convert.h"
 
 namespace webrtc {
 
@@ -92,8 +92,8 @@
   cricket::VideoAdapter video_adapter_;
   rtc::CriticalSection apply_rotation_crit_;
   bool apply_rotation_ GUARDED_BY(apply_rotation_crit_);
-  webrtc::I420BufferPool pre_scale_pool_;
-  webrtc::I420BufferPool post_scale_pool_;
+  webrtc::NV12ToI420Scaler nv12toi420_scaler_;
+  webrtc::I420BufferPool buffer_pool_;
   rtc::scoped_refptr<webrtc_jni::SurfaceTextureHelper> surface_texture_helper_;
   const bool is_screencast_;
 
diff --git a/webrtc/common_video/libyuv/include/webrtc_libyuv.h b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
index ec3720e..6c7fab1 100644
--- a/webrtc/common_video/libyuv/include/webrtc_libyuv.h
+++ b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
@@ -16,6 +16,7 @@
 #define WEBRTC_COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_
 
 #include <stdio.h>
+#include <vector>
 
 #include "webrtc/common_types.h"  // RawVideoTypes.
 #include "webrtc/common_video/rotation.h"
@@ -123,6 +124,22 @@
 // Compute SSIM for an I420 frame (all planes).
 double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame);
 
+// Helper class for directly converting and scaling NV12 to I420. The Y-plane
+// will be scaled directly to the I420 destination, which makes this faster
+// than separate NV12->I420 + I420->I420 scaling.
+class NV12ToI420Scaler {
+ public:
+  void NV12ToI420Scale(const uint8_t* src_y, int src_stride_y,
+                       const uint8_t* src_uv, int src_stride_uv,
+                       int src_width, int src_height,
+                       uint8_t* dst_y, int dst_stride_y,
+                       uint8_t* dst_u, int dst_stride_u,
+                       uint8_t* dst_v, int dst_stride_v,
+                       int dst_width, int dst_height);
+ private:
+  std::vector<uint8_t> tmp_uv_planes_;
+};
+
 }  // namespace webrtc
 
 #endif  // WEBRTC_COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_
diff --git a/webrtc/common_video/libyuv/webrtc_libyuv.cc b/webrtc/common_video/libyuv/webrtc_libyuv.cc
index 44577e9..40fcf9b 100644
--- a/webrtc/common_video/libyuv/webrtc_libyuv.cc
+++ b/webrtc/common_video/libyuv/webrtc_libyuv.cc
@@ -341,4 +341,54 @@
                           test_frame->video_frame_buffer()->StrideV(),
                           test_frame->width(), test_frame->height());
 }
+
+void NV12ToI420Scaler::NV12ToI420Scale(
+    const uint8_t* src_y, int src_stride_y,
+    const uint8_t* src_uv, int src_stride_uv,
+    int src_width, int src_height,
+    uint8_t* dst_y, int dst_stride_y,
+    uint8_t* dst_u, int dst_stride_u,
+    uint8_t* dst_v, int dst_stride_v,
+    int dst_width, int dst_height) {
+  if (src_width == dst_width && src_height == dst_height) {
+    // No scaling.
+    tmp_uv_planes_.clear();
+    tmp_uv_planes_.shrink_to_fit();
+    libyuv::NV12ToI420(
+        src_y, src_stride_y,
+        src_uv, src_stride_uv,
+        dst_y, dst_stride_y,
+        dst_u, dst_stride_u,
+        dst_v, dst_stride_v,
+        src_width, src_height);
+    return;
+  }
+
+  // Scaling.
+  // Allocate temporary memory for spitting UV planes.
+  const int src_uv_width = (src_width + 1) / 2;
+  const int src_uv_height = (src_height + 1) / 2;
+  tmp_uv_planes_.resize(src_uv_width * src_uv_height * 2);
+  tmp_uv_planes_.shrink_to_fit();
+
+  // Split source UV plane into separate U and V plane using the temporary data.
+  uint8_t* const src_u = tmp_uv_planes_.data();
+  uint8_t* const src_v = tmp_uv_planes_.data() + src_uv_width * src_uv_height;
+  libyuv::SplitUVPlane(src_uv, src_stride_uv,
+                       src_u, src_uv_width,
+                       src_v, src_uv_width,
+                       src_uv_width, src_uv_height);
+
+  // Scale the planes into the destination.
+  libyuv::I420Scale(src_y, src_stride_y,
+                    src_u, src_uv_width,
+                    src_v, src_uv_width,
+                    src_width, src_height,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    dst_width, dst_height,
+                    libyuv::kFilterBox);
+}
+
 }  // namespace webrtc