| /* |
| * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "rtc_tools/frame_analyzer/video_temporal_aligner.h" |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <deque> |
| #include <limits> |
| #include <vector> |
| |
| #include "api/video/i420_buffer.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/refcountedobject.h" |
| #include "rtc_tools/frame_analyzer/video_quality_analysis.h" |
| #include "third_party/libyuv/include/libyuv/compare.h" |
| |
| namespace webrtc { |
| namespace test { |
| |
| namespace { |
| |
| // This constant controls how many frames we look ahead while seeking for the |
| // match for the next frame. Note that we may span bigger gaps than this number |
| // since we reset the counter as soon as we find a better match. The seeking |
| // will stop when there is no improvement in the next kNumberOfFramesLookAhead |
| // frames. Typically, the SSIM will improve as we get closer and closer to the |
| // real match. |
| const int kNumberOfFramesLookAhead = 60; |
| |
| // Helper class that takes a video and generates an infinite looping video. |
| class LoopingVideo : public rtc::RefCountedObject<Video> { |
| public: |
| explicit LoopingVideo(const rtc::scoped_refptr<Video>& video) |
| : video_(video) {} |
| |
| size_t number_of_frames() const override { |
| return std::numeric_limits<size_t>::max(); |
| } |
| |
| rtc::scoped_refptr<I420BufferInterface> GetFrame( |
| size_t index) const override { |
| return video_->GetFrame(index % video_->number_of_frames()); |
| } |
| |
| private: |
| const rtc::scoped_refptr<Video> video_; |
| }; |
| |
| // Helper class that take a vector of frame indices and a video and produces a |
| // new video where the frames have been reshuffled. |
| class ReorderedVideo : public rtc::RefCountedObject<Video> { |
| public: |
| ReorderedVideo(const rtc::scoped_refptr<Video>& video, |
| const std::vector<size_t>& indices) |
| : video_(video), indices_(indices) {} |
| |
| size_t number_of_frames() const override { return indices_.size(); } |
| |
| rtc::scoped_refptr<I420BufferInterface> GetFrame( |
| size_t index) const override { |
| return video_->GetFrame(indices_.at(index)); |
| } |
| |
| private: |
| const rtc::scoped_refptr<Video> video_; |
| const std::vector<size_t> indices_; |
| }; |
| |
| // Helper class that takes a video and produces a downscaled video. |
| class DownscaledVideo : public rtc::RefCountedObject<Video> { |
| public: |
| DownscaledVideo(float scale_factor, const rtc::scoped_refptr<Video>& video) |
| : scale_factor_(scale_factor), video_(video) {} |
| |
| size_t number_of_frames() const override { |
| return video_->number_of_frames(); |
| } |
| |
| rtc::scoped_refptr<I420BufferInterface> GetFrame( |
| size_t index) const override { |
| const rtc::scoped_refptr<I420BufferInterface> frame = |
| video_->GetFrame(index); |
| rtc::scoped_refptr<I420Buffer> downscaled_frame = I420Buffer::Create( |
| static_cast<int>(round(scale_factor_ * frame->width())), |
| static_cast<int>(round(scale_factor_ * frame->height()))); |
| downscaled_frame->ScaleFrom(*frame); |
| return downscaled_frame; |
| } |
| |
| private: |
| const float scale_factor_; |
| const rtc::scoped_refptr<Video> video_; |
| }; |
| |
| // Helper class that takes a video and caches the latest frame access. This |
| // improves performance a lot since the original source is often from a file. |
| class CachedVideo : public rtc::RefCountedObject<Video> { |
| public: |
| CachedVideo(int max_cache_size, const rtc::scoped_refptr<Video>& video) |
| : max_cache_size_(max_cache_size), video_(video) {} |
| |
| size_t number_of_frames() const override { |
| return video_->number_of_frames(); |
| } |
| |
| rtc::scoped_refptr<I420BufferInterface> GetFrame( |
| size_t index) const override { |
| for (const CachedFrame& cached_frame : cache_) { |
| if (cached_frame.index == index) |
| return cached_frame.frame; |
| } |
| |
| rtc::scoped_refptr<I420BufferInterface> frame = video_->GetFrame(index); |
| cache_.push_front({index, frame}); |
| if (cache_.size() > max_cache_size_) |
| cache_.pop_back(); |
| |
| return frame; |
| } |
| |
| private: |
| struct CachedFrame { |
| size_t index; |
| rtc::scoped_refptr<I420BufferInterface> frame; |
| }; |
| |
| const size_t max_cache_size_; |
| const rtc::scoped_refptr<Video> video_; |
| mutable std::deque<CachedFrame> cache_; |
| }; |
| |
| // Try matching the test frame against all frames in the reference video and |
| // return the index of the best matching frame. |
| size_t FindBestMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame, |
| const Video& reference_video) { |
| std::vector<double> ssim; |
| for (const auto& ref_frame : reference_video) |
| ssim.push_back(Ssim(test_frame, ref_frame)); |
| return std::distance(ssim.begin(), |
| std::max_element(ssim.begin(), ssim.end())); |
| } |
| |
| // Find and return the index of the frame matching the test frame. The search |
| // starts at the starting index and continues until there is no better match |
| // within the next kNumberOfFramesLookAhead frames. |
| size_t FindNextMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame, |
| const Video& reference_video, |
| size_t start_index) { |
| const double start_ssim = |
| Ssim(test_frame, reference_video.GetFrame(start_index)); |
| for (int i = 1; i < kNumberOfFramesLookAhead; ++i) { |
| const size_t next_index = start_index + i; |
| // If we find a better match, restart the search at that point. |
| if (start_ssim < Ssim(test_frame, reference_video.GetFrame(next_index))) |
| return FindNextMatch(test_frame, reference_video, next_index); |
| } |
| // The starting index was the best match. |
| return start_index; |
| } |
| |
| } // namespace |
| |
| std::vector<size_t> FindMatchingFrameIndices( |
| const rtc::scoped_refptr<Video>& reference_video, |
| const rtc::scoped_refptr<Video>& test_video) { |
| // This is done to get a 10x speedup. We don't need the full resolution in |
| // order to match frames, and we should limit file access and not read the |
| // same memory tens of times. |
| const float kScaleFactor = 0.25f; |
| const rtc::scoped_refptr<Video> cached_downscaled_reference_video = |
| new CachedVideo(kNumberOfFramesLookAhead, |
| new DownscaledVideo(kScaleFactor, reference_video)); |
| const rtc::scoped_refptr<Video> downscaled_test_video = |
| new DownscaledVideo(kScaleFactor, test_video); |
| |
| // Assume the video is looping around. |
| const rtc::scoped_refptr<Video> looping_reference_video = |
| new LoopingVideo(cached_downscaled_reference_video); |
| |
| std::vector<size_t> match_indices; |
| for (const rtc::scoped_refptr<I420BufferInterface>& test_frame : |
| *downscaled_test_video) { |
| if (match_indices.empty()) { |
| // First frame. |
| match_indices.push_back( |
| FindBestMatch(test_frame, *cached_downscaled_reference_video)); |
| } else { |
| match_indices.push_back(FindNextMatch( |
| test_frame, *looping_reference_video, match_indices.back())); |
| } |
| } |
| |
| return match_indices; |
| } |
| |
| rtc::scoped_refptr<Video> ReorderVideo(const rtc::scoped_refptr<Video>& video, |
| const std::vector<size_t>& indices) { |
| return new ReorderedVideo(video, indices); |
| } |
| |
| rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo( |
| const rtc::scoped_refptr<Video>& reference_video, |
| const rtc::scoped_refptr<Video>& test_video) { |
| return ReorderVideo(new LoopingVideo(reference_video), |
| FindMatchingFrameIndices(reference_video, test_video)); |
| } |
| |
| } // namespace test |
| } // namespace webrtc |