Optimize Android NV12 capture
This CL optimizes the Android capture NV12 -> I420 + scaling code. For
example, when the input is 1280x720 and we adapt to 640x360, this CL:
- Reduces conversion time from 3.37 ms to 1.46 ms.
- Reduces memory footprint by 1 MB.
BUG=webrtc:6319
Review-Url: https://codereview.webrtc.org/2317443003
Cr-Commit-Position: refs/heads/master@{#14167}
diff --git a/webrtc/api/androidvideotracksource.cc b/webrtc/api/androidvideotracksource.cc
index 000337d..d232f5e 100644
--- a/webrtc/api/androidvideotracksource.cc
+++ b/webrtc/api/androidvideotracksource.cc
@@ -12,6 +12,9 @@
#include <utility>
+#include "third_party/libyuv/include/libyuv/convert.h"
+#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
+
namespace webrtc {
AndroidVideoTrackSource::AndroidVideoTrackSource(rtc::Thread* signaling_thread,
@@ -106,42 +109,70 @@
return;
}
- int rotated_width = crop_width;
- int rotated_height = crop_height;
-
- rtc::CritScope lock(&apply_rotation_crit_);
- if (apply_rotation_ && (rotation == 90 || rotation == 270)) {
- std::swap(adapted_width, adapted_height);
- std::swap(rotated_width, rotated_height);
- }
-
- rtc::scoped_refptr<webrtc::VideoFrameBuffer> buffer =
- pre_scale_pool_.CreateBuffer(rotated_width, rotated_height);
-
const uint8_t* y_plane = static_cast<const uint8_t*>(frame_data);
const uint8_t* uv_plane = y_plane + width * height;
- int uv_width = (width + 1) / 2;
+ const int uv_width = (width + 1) / 2;
RTC_CHECK_GE(length, width * height + 2 * uv_width * ((height + 1) / 2));
// Can only crop at even pixels.
crop_x &= ~1;
crop_y &= ~1;
+ // Crop just by modifying pointers.
+ y_plane += width * crop_y + crop_x;
+ uv_plane += uv_width * crop_y + crop_x;
- libyuv::NV12ToI420Rotate(
- y_plane + width * crop_y + crop_x, width,
- uv_plane + uv_width * crop_y + crop_x, width, buffer->MutableDataY(),
- buffer->StrideY(),
- // Swap U and V, since we have NV21, not NV12.
- buffer->MutableDataV(), buffer->StrideV(), buffer->MutableDataU(),
- buffer->StrideU(), crop_width, crop_height,
- static_cast<libyuv::RotationMode>(apply_rotation_ ? rotation : 0));
+ rtc::scoped_refptr<webrtc::I420Buffer> buffer =
+ buffer_pool_.CreateBuffer(adapted_width, adapted_height);
- if (adapted_width != buffer->width() || adapted_height != buffer->height()) {
- rtc::scoped_refptr<webrtc::I420Buffer> scaled_buffer(
- post_scale_pool_.CreateBuffer(adapted_width, adapted_height));
- scaled_buffer->ScaleFrom(buffer);
- buffer = scaled_buffer;
+ if (adapted_width == crop_width && adapted_height == crop_height) {
+ // No scaling.
+ libyuv::NV12ToI420(
+ y_plane, width,
+ uv_plane, uv_width * 2,
+ buffer->MutableDataY(), buffer->StrideY(),
+ // Swap U and V, since we have NV21, not NV12.
+ buffer->MutableDataV(), buffer->StrideV(),
+ buffer->MutableDataU(), buffer->StrideU(),
+ buffer->width(), buffer->height());
+
+ } else {
+ // Scaling.
+ const int crop_uv_width = (crop_width + 1) / 2;
+ const int crop_uv_height = (crop_height + 1) / 2;
+ unscaled_uv_planes_.resize(crop_uv_width * crop_uv_height * 2);
+
+ NV12ToI420Scale(
+ unscaled_uv_planes_.data(),
+ y_plane, width,
+ uv_plane, uv_width * 2,
+ crop_width, crop_height,
+ buffer->MutableDataY(), buffer->StrideY(),
+ // Swap U and V, since we have NV21, not NV12.
+ buffer->MutableDataV(), buffer->StrideV(),
+ buffer->MutableDataU(), buffer->StrideU(),
+ buffer->width(), buffer->height());
+ }
+
+ // Applying rotation is only supported for legacy reasons, and the performance
+ // for this path is not critical.
+ rtc::CritScope lock(&apply_rotation_crit_);
+ if (apply_rotation_ && rotation != 0) {
+ rtc::scoped_refptr<I420Buffer> rotated_buffer = I420Buffer::Create(
+ rotation == 180 ? buffer->width() : buffer->height(),
+ rotation == 180 ? buffer->height() : buffer->width());
+
+ libyuv::I420Rotate(
+ buffer->DataY(), buffer->StrideY(),
+ buffer->DataU(), buffer->StrideU(),
+ buffer->DataV(), buffer->StrideV(),
+ rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
+ rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
+ rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
+ buffer->width(), buffer->height(),
+ static_cast<libyuv::RotationMode>(rotation));
+
+ buffer = rotated_buffer;
}
OnFrame(cricket::WebRtcVideoFrame(
diff --git a/webrtc/api/androidvideotracksource.h b/webrtc/api/androidvideotracksource.h
index 2bbecc8..5cf52d0 100644
--- a/webrtc/api/androidvideotracksource.h
+++ b/webrtc/api/androidvideotracksource.h
@@ -23,7 +23,6 @@
#include "webrtc/media/base/videoadapter.h"
#include "webrtc/media/base/videobroadcaster.h"
#include "webrtc/media/base/videosinkinterface.h"
-#include "third_party/libyuv/include/libyuv/convert.h"
namespace webrtc {
@@ -92,8 +91,8 @@
cricket::VideoAdapter video_adapter_;
rtc::CriticalSection apply_rotation_crit_;
bool apply_rotation_ GUARDED_BY(apply_rotation_crit_);
- webrtc::I420BufferPool pre_scale_pool_;
- webrtc::I420BufferPool post_scale_pool_;
+ std::vector<uint8_t> unscaled_uv_planes_;
+ webrtc::I420BufferPool buffer_pool_;
rtc::scoped_refptr<webrtc_jni::SurfaceTextureHelper> surface_texture_helper_;
const bool is_screencast_;
diff --git a/webrtc/common_video/libyuv/include/webrtc_libyuv.h b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
index ec3720e..1ceca4d 100644
--- a/webrtc/common_video/libyuv/include/webrtc_libyuv.h
+++ b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
@@ -123,6 +123,19 @@
// Compute SSIM for an I420 frame (all planes).
double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame);
+// Helper function for directly converting and scaling NV12 to I420. The
+// |tmp_data| argument will be used for intermediary splitting the UV plane into
+// separate U and V planes, so the size of that memory region must be at least
+// 2 * ((src_width + 1) / 2) * ((src_height + 1) / 2).
+void NV12ToI420Scale(uint8_t* tmp_data,
+ const uint8_t* src_y, int src_stride_y,
+ const uint8_t* src_uv, int src_stride_uv,
+ int src_width, int src_height,
+ uint8_t* dst_y, int dst_stride_y,
+ uint8_t* dst_u, int dst_stride_u,
+ uint8_t* dst_v, int dst_stride_v,
+ int dst_width, int dst_height);
+
} // namespace webrtc
#endif // WEBRTC_COMMON_VIDEO_LIBYUV_INCLUDE_WEBRTC_LIBYUV_H_
diff --git a/webrtc/common_video/libyuv/webrtc_libyuv.cc b/webrtc/common_video/libyuv/webrtc_libyuv.cc
index 44577e9..5d9ab57 100644
--- a/webrtc/common_video/libyuv/webrtc_libyuv.cc
+++ b/webrtc/common_video/libyuv/webrtc_libyuv.cc
@@ -341,4 +341,34 @@
test_frame->video_frame_buffer()->StrideV(),
test_frame->width(), test_frame->height());
}
+
+void NV12ToI420Scale(uint8_t* tmp_data,
+ const uint8_t* src_y, int src_stride_y,
+ const uint8_t* src_uv, int src_stride_uv,
+ int src_width, int src_height,
+ uint8_t* dst_y, int dst_stride_y,
+ uint8_t* dst_u, int dst_stride_u,
+ uint8_t* dst_v, int dst_stride_v,
+ int dst_width, int dst_height) {
+ // Split source UV plane into separate U and V plane using the temporary data.
+ const int src_uv_width = (src_width + 1) / 2;
+ const int src_uv_height = (src_height + 1) / 2;
+ uint8_t* const src_u = tmp_data;
+ uint8_t* const src_v = tmp_data + src_uv_width * src_uv_height;
+ libyuv::SplitUVPlane(src_uv, src_stride_uv,
+ src_u, src_uv_width,
+ src_v, src_uv_width,
+ src_uv_width, src_uv_height);
+ // Scale the planes into the destination.
+ libyuv::I420Scale(src_y, src_stride_y,
+ src_u, src_uv_width,
+ src_v, src_uv_width,
+ src_width, src_height,
+ dst_y, dst_stride_y,
+ dst_u, dst_stride_u,
+ dst_v, dst_stride_v,
+ dst_width, dst_height,
+ libyuv::kFilterBox);
+}
+
} // namespace webrtc