Wayland screencast: use stream size to adjust source stride of DMA-BUFs

In commit a6ed749b12c63d252c6d893d5b5b62fcf35773d9 we used width of the
frame we copy into to calculate the source stride. This is a wrong
assumption as there might be implementations (e.g. GNOME) where we might
have to import a DMA-BUF with size of the whole screen and just having
information in SPA_META_VideoCrop metadata to get the real size of the
frame we will end up using. Given this, we always have to calculate
source stride using the size of the stream to not end up copying pixels
from the empty area of the imported DMA-BUF.

Also improve naming of variables to have names better describing what
they really represent and add some comments explaining why some things
are written the way they are.


Bug: chromium:1333304
Change-Id: I755a5139336c1da5abf95591a2b70a68659a255f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/267002
Commit-Queue: Jan Grulich <grulja@gmail.com>
Reviewed-by: Alexander Cooper <alcooper@chromium.org>
Cr-Commit-Position: refs/heads/main@{#37344}
diff --git a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
index 74bf29d..3c4344f 100644
--- a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
+++ b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
@@ -99,8 +99,8 @@
   uint32_t pw_stream_node_id_ = 0;
   int pw_fd_ = -1;
 
-  DesktopSize desktop_size_ = {};
-  DesktopSize video_size_;
+  DesktopSize stream_size_ = {};
+  DesktopSize frame_size_;
 
   webrtc::Mutex queue_lock_;
   ScreenCaptureFrameQueue<SharedDesktopFrame> queue_
@@ -246,7 +246,7 @@
   auto stride = SPA_ROUND_UP_N(width * kBytesPerPixel, 4);
   auto size = height * stride;
 
-  that->desktop_size_ = DesktopSize(width, height);
+  that->stream_size_ = DesktopSize(width, height);
 
   uint8_t buffer[1024] = {};
   auto builder = spa_pod_builder{buffer, sizeof(buffer)};
@@ -658,7 +658,7 @@
     // to BYTES_PER_PIXEL x WIDTH as that's the size of the DesktopFrame we
     // allocate for each captured frame.
     src_unique_ptr = egl_dmabuf_->ImageFromDmaBuf(
-        desktop_size_, spa_video_format_.format, plane_datas, modifier_);
+        stream_size_, spa_video_format_.format, plane_datas, modifier_);
     if (src_unique_ptr) {
       src = src_unique_ptr.get();
     } else {
@@ -685,66 +685,88 @@
     return;
   }
 
-  struct spa_meta_region* video_metadata =
+  // Use SPA_META_VideoCrop metadata to get the frame size. KDE and GNOME do
+  // handle screen/window sharing differently. KDE/KWin doesn't use
+  // SPA_META_VideoCrop metadata and when sharing a window, it always sets
+  // stream size to size of the window. With that we just allocate the
+  // DesktopFrame using the size of the stream itself. GNOME/Mutter
+  // always sets stream size to the size of the whole screen, even when sharing
+  // a window. To get the real window size we have to use SPA_META_VideoCrop
+  // metadata. This gives us the size we need in order to allocate the
+  // DesktopFrame.
+
+  struct spa_meta_region* videocrop_metadata =
       static_cast<struct spa_meta_region*>(spa_buffer_find_meta_data(
-          spa_buffer, SPA_META_VideoCrop, sizeof(*video_metadata)));
+          spa_buffer, SPA_META_VideoCrop, sizeof(*videocrop_metadata)));
 
   // Video size from metadata is bigger than an actual video stream size.
   // The metadata are wrong or we should up-scale the video...in both cases
   // just quit now.
-  if (video_metadata && (video_metadata->region.size.width >
-                             static_cast<uint32_t>(desktop_size_.width()) ||
-                         video_metadata->region.size.height >
-                             static_cast<uint32_t>(desktop_size_.height()))) {
+  if (videocrop_metadata &&
+      (videocrop_metadata->region.size.width >
+           static_cast<uint32_t>(stream_size_.width()) ||
+       videocrop_metadata->region.size.height >
+           static_cast<uint32_t>(stream_size_.height()))) {
     RTC_LOG(LS_ERROR) << "Stream metadata sizes are wrong!";
     return;
   }
 
-  // Use video metadata when video size from metadata is set and smaller than
-  // video stream size, so we need to adjust it.
-  bool video_metadata_use = false;
-  const struct spa_rectangle* video_metadata_size =
-      video_metadata ? &video_metadata->region.size : nullptr;
+  // Use SPA_META_VideoCrop metadata to get the DesktopFrame size in case
+  // a windows is shared and it represents just a small portion of the
+  // stream itself. This will be for example used in case of GNOME (Mutter)
+  // where the stream will have the size of the screen itself, but we care
+  // only about smaller portion representing the window inside.
+  bool videocrop_metadata_use = false;
+  const struct spa_rectangle* videocrop_metadata_size =
+      videocrop_metadata ? &videocrop_metadata->region.size : nullptr;
 
-  if (video_metadata_size && video_metadata_size->width != 0 &&
-      video_metadata_size->height != 0 &&
-      (static_cast<int>(video_metadata_size->width) < desktop_size_.width() ||
-       static_cast<int>(video_metadata_size->height) <
-           desktop_size_.height())) {
-    video_metadata_use = true;
+  if (videocrop_metadata_size && videocrop_metadata_size->width != 0 &&
+      videocrop_metadata_size->height != 0 &&
+      (static_cast<int>(videocrop_metadata_size->width) <
+           stream_size_.width() ||
+       static_cast<int>(videocrop_metadata_size->height) <
+           stream_size_.height())) {
+    videocrop_metadata_use = true;
   }
 
-  if (video_metadata_use) {
-    video_size_ =
-        DesktopSize(video_metadata_size->width, video_metadata_size->height);
+  if (videocrop_metadata_use) {
+    frame_size_ = DesktopSize(videocrop_metadata_size->width,
+                              videocrop_metadata_size->height);
   } else {
-    video_size_ = desktop_size_;
+    frame_size_ = stream_size_;
   }
 
-  uint32_t y_offset = video_metadata_use && (video_metadata->region.position.y +
-                                                 video_size_.height() <=
-                                             desktop_size_.height())
-                          ? video_metadata->region.position.y
-                          : 0;
-  uint32_t x_offset = video_metadata_use && (video_metadata->region.position.x +
-                                                 video_size_.width() <=
-                                             desktop_size_.width())
-                          ? video_metadata->region.position.x
-                          : 0;
+  // Get the position of the video crop within the stream. Just double-check
+  // that the position doesn't exceed the size of the stream itself. NOTE:
+  // Currently it looks there is no implementation using this.
+  uint32_t y_offset =
+      videocrop_metadata_use &&
+              (videocrop_metadata->region.position.y + frame_size_.height() <=
+               stream_size_.height())
+          ? videocrop_metadata->region.position.y
+          : 0;
+  uint32_t x_offset =
+      videocrop_metadata_use &&
+              (videocrop_metadata->region.position.x + frame_size_.width() <=
+               stream_size_.width())
+          ? videocrop_metadata->region.position.x
+          : 0;
 
-  const uint32_t frame_stride = kBytesPerPixel * video_size_.width();
-  uint32_t stride = spa_buffer->datas[0].chunk->stride;
+  const uint32_t stream_stride = kBytesPerPixel * stream_size_.width();
+  uint32_t buffer_stride = spa_buffer->datas[0].chunk->stride;
+  uint32_t src_stride = buffer_stride;
 
-  if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf && stride > frame_stride) {
+  if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf &&
+      buffer_stride > stream_stride) {
     // When DMA-BUFs are used, sometimes spa_buffer->stride we get might
     // contain additional padding, but after we import the buffer, the stride
     // we used is no longer relevant and we should just calculate it based on
-    // width. For more context see https://crbug.com/1333304.
-    stride = frame_stride;
+    // the stream width. For more context see https://crbug.com/1333304.
+    src_stride = stream_stride;
   }
 
   uint8_t* updated_src =
-      src + (stride * y_offset) + (kBytesPerPixel * x_offset);
+      src + (src_stride * y_offset) + (kBytesPerPixel * x_offset);
 
   webrtc::MutexLock lock(&queue_lock_);
 
@@ -759,20 +781,20 @@
   }
 
   if (!queue_.current_frame() ||
-      !queue_.current_frame()->size().equals(video_size_)) {
+      !queue_.current_frame()->size().equals(frame_size_)) {
     std::unique_ptr<DesktopFrame> frame(new BasicDesktopFrame(
-        DesktopSize(video_size_.width(), video_size_.height())));
+        DesktopSize(frame_size_.width(), frame_size_.height())));
     queue_.ReplaceCurrentFrame(SharedDesktopFrame::Wrap(std::move(frame)));
   }
 
   queue_.current_frame()->CopyPixelsFrom(
-      updated_src, (stride - (kBytesPerPixel * x_offset)),
-      DesktopRect::MakeWH(video_size_.width(), video_size_.height()));
+      updated_src, (src_stride - (kBytesPerPixel * x_offset)),
+      DesktopRect::MakeWH(frame_size_.width(), frame_size_.height()));
 
   if (spa_video_format_.format == SPA_VIDEO_FORMAT_RGBx ||
       spa_video_format_.format == SPA_VIDEO_FORMAT_RGBA) {
     uint8_t* tmp_src = queue_.current_frame()->data();
-    for (int i = 0; i < video_size_.height(); ++i) {
+    for (int i = 0; i < frame_size_.height(); ++i) {
       // If both sides decided to go with the RGBx format we need to convert
       // it to BGRx to match color format expected by WebRTC.
       ConvertRGBxToBGRx(tmp_src, queue_.current_frame()->stride());