pipewire capturer: Reduce the amount of copying

Improves the capture latency by reducing the amount of
copying needed from the frame. We keep track of the
damaged region of previous frame and union it with
the damaged region of this frame and only copy this
union of the frame over. X11 capturer already has
such synchronization in place.

The change is beneficial especially when there are
small changes on the screen (e.g. clock ticking).
For a 4k screen with 128 cores, I observed the
capture latencies drop from 5 - 8 ms to 0 ms when the
system is left idle. This is in line with the X11
capturer.

Bug: chromium:1291247
Change-Id: Iffb441f9e1902d2658031f5f35b5372ee8e94073
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/299720
Reviewed-by: Alexander Cooper <alcooper@chromium.org>
Commit-Queue: Salman Malik <salmanmalik@chromium.org>
Cr-Commit-Position: refs/heads/main@{#39968}
diff --git a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
index 1eea8bf..5878180 100644
--- a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
+++ b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
@@ -144,6 +144,7 @@
   uint32_t frame_rate_ = 60;
 
   bool use_damage_region_ = true;
+  DesktopRegion last_damage_region_;
 
   // Specifies whether the pipewire stream has been initialized with a request
   // to embed cursor into the captured frames.
@@ -158,7 +159,9 @@
   void ProcessBuffer(pw_buffer* buffer);
   bool ProcessMemFDBuffer(pw_buffer* buffer,
                           DesktopFrame& frame,
-                          const DesktopVector& offset);
+                          const DesktopFrame* previous_frame,
+                          const DesktopVector& offset,
+                          bool effectively_new_frame);
   bool ProcessDMABuffer(pw_buffer* buffer,
                         DesktopFrame& frame,
                         const DesktopVector& offset);
@@ -825,17 +828,22 @@
     }
   }
 
+  bool effectively_new_frame = false;
   if (!queue_.current_frame() ||
       !queue_.current_frame()->size().equals(frame_size_)) {
     std::unique_ptr<DesktopFrame> frame(new BasicDesktopFrame(
         DesktopSize(frame_size_.width(), frame_size_.height())));
     queue_.ReplaceCurrentFrame(SharedDesktopFrame::Wrap(std::move(frame)));
+    effectively_new_frame = true;
   }
 
+  UpdateFrameUpdatedRegions(spa_buffer, *queue_.current_frame());
+
   bool bufferProcessed = false;
   if (spa_buffer->datas[0].type == SPA_DATA_MemFd) {
-    bufferProcessed =
-        ProcessMemFDBuffer(buffer, *queue_.current_frame(), offset);
+    bufferProcessed = ProcessMemFDBuffer(buffer, *queue_.current_frame(),
+                                         queue_.previous_frame(), offset,
+                                         effectively_new_frame);
   } else if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf) {
     bufferProcessed = ProcessDMABuffer(buffer, *queue_.current_frame(), offset);
   }
@@ -862,7 +870,6 @@
     observer_->OnDesktopFrameChanged();
   }
 
-  UpdateFrameUpdatedRegions(spa_buffer, *queue_.current_frame());
   queue_.current_frame()->set_may_contain_cursor(is_cursor_embedded_);
 
   if (callback_) {
@@ -878,7 +885,9 @@
 bool SharedScreenCastStreamPrivate::ProcessMemFDBuffer(
     pw_buffer* buffer,
     DesktopFrame& frame,
-    const DesktopVector& offset) {
+    const DesktopFrame* previous_frame,
+    const DesktopVector& offset,
+    bool effectively_new_frame) {
   spa_buffer* spa_buffer = buffer->buffer;
   ScopedBuf map;
   uint8_t* src = nullptr;
@@ -904,9 +913,31 @@
   uint8_t* updated_src =
       src + (src_stride * offset.y()) + (kBytesPerPixel * offset.x());
 
-  frame.CopyPixelsFrom(
-      updated_src, (src_stride - (kBytesPerPixel * offset.x())),
-      DesktopRect::MakeWH(frame.size().width(), frame.size().height()));
+  const int stride = src_stride - (kBytesPerPixel * offset.x());
+
+  if (effectively_new_frame || !previous_frame || !use_damage_region_ ||
+      damage_region_.is_empty()) {
+    frame.CopyPixelsFrom(
+        updated_src, stride,
+        DesktopRect::MakeWH(frame.size().width(), frame.size().height()));
+  } else {
+    for (DesktopRegion::Iterator it(last_damage_region_); !it.IsAtEnd();
+         it.Advance()) {
+      const DesktopRect& r = it.rect();
+      frame.CopyPixelsFrom(*previous_frame, r.top_left(), r);
+    }
+
+    for (DesktopRegion::Iterator it(damage_region_); !it.IsAtEnd();
+         it.Advance()) {
+      const auto& rect = it.rect();
+      frame.CopyPixelsFrom(src + rect.top() * stride +
+                               rect.left() * DesktopFrame::kBytesPerPixel,
+                           stride,
+                           DesktopRect::MakeXYWH(rect.left(), rect.top(),
+                                                 rect.width(), rect.height()));
+    }
+  }
+  last_damage_region_ = damage_region_;
 
   return true;
 }