| /* |
| * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <optional> |
| #include <utility> |
| |
| #include "modules/rtp_rtcp/source/leb128.h" |
| #include "modules/rtp_rtcp/source/rtp_video_header.h" |
| #include "rtc_base/byte_buffer.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/logging.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| |
| namespace webrtc { |
| namespace { |
| // AV1 format: |
| // |
| // RTP payload syntax: |
| // 0 1 2 3 4 5 6 7 |
| // +-+-+-+-+-+-+-+-+ |
| // |Z|Y| W |N|-|-|-| (REQUIRED) |
| // +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0) |
| // |1| | |
| // +-+ OBU fragment| |
| // |1| | (REQUIRED, leb128 encoded) |
| // +-+ size | |
| // |0| | |
| // +-+-+-+-+-+-+-+-+ |
| // | OBU fragment | |
| // | ... | |
| // +=+=+=+=+=+=+=+=+ |
| // | ... | |
| // +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field |
| // | OBU fragment | |
| // | ... | |
| // +=+=+=+=+=+=+=+=+ |
| // |
| // |
| // OBU syntax: |
| // 0 1 2 3 4 5 6 7 |
| // +-+-+-+-+-+-+-+-+ |
| // |0| type |X|S|-| (REQUIRED) |
| // +-+-+-+-+-+-+-+-+ |
| // X: | TID |SID|-|-|-| (OPTIONAL) |
| // +-+-+-+-+-+-+-+-+ |
| // |1| | |
| // +-+ OBU payload | |
| // S: |1| | (OPTIONAL, variable length leb128 encoded) |
| // +-+ size | |
| // |0| | |
| // +-+-+-+-+-+-+-+-+ |
| // | OBU payload | |
| // | ... | |
| class ArrayOfArrayViews { |
| public: |
| class const_iterator; |
| ArrayOfArrayViews() = default; |
| ArrayOfArrayViews(const ArrayOfArrayViews&) = default; |
| ArrayOfArrayViews& operator=(const ArrayOfArrayViews&) = default; |
| ~ArrayOfArrayViews() = default; |
| |
| const_iterator begin() const; |
| const_iterator end() const; |
| bool empty() const { return data_.empty(); } |
| size_t size() const { return size_; } |
| void CopyTo(uint8_t* destination, const_iterator first) const; |
| |
| void Append(const uint8_t* data, size_t size) { |
| data_.emplace_back(data, size); |
| size_ += size; |
| } |
| |
| private: |
| using Storage = absl::InlinedVector<rtc::ArrayView<const uint8_t>, 2>; |
| |
| size_t size_ = 0; |
| Storage data_; |
| }; |
| |
| class ArrayOfArrayViews::const_iterator { |
| public: |
| const_iterator() = default; |
| const_iterator(const const_iterator&) = default; |
| const_iterator& operator=(const const_iterator&) = default; |
| |
| const_iterator& operator++() { |
| if (++inner_ == outer_->size()) { |
| ++outer_; |
| inner_ = 0; |
| } |
| return *this; |
| } |
| uint8_t operator*() const { return (*outer_)[inner_]; } |
| |
| friend bool operator==(const const_iterator& lhs, const const_iterator& rhs) { |
| return lhs.outer_ == rhs.outer_ && lhs.inner_ == rhs.inner_; |
| } |
| |
| private: |
| friend ArrayOfArrayViews; |
| const_iterator(ArrayOfArrayViews::Storage::const_iterator outer, size_t inner) |
| : outer_(outer), inner_(inner) {} |
| |
| Storage::const_iterator outer_; |
| size_t inner_; |
| }; |
| |
| ArrayOfArrayViews::const_iterator ArrayOfArrayViews::begin() const { |
| return const_iterator(data_.begin(), 0); |
| } |
| |
| ArrayOfArrayViews::const_iterator ArrayOfArrayViews::end() const { |
| return const_iterator(data_.end(), 0); |
| } |
| |
| void ArrayOfArrayViews::CopyTo(uint8_t* destination, |
| const_iterator first) const { |
| if (first == end()) { |
| // Empty OBU payload. E.g. Temporal Delimiters are always empty. |
| return; |
| } |
| size_t first_chunk_size = first.outer_->size() - first.inner_; |
| memcpy(destination, first.outer_->data() + first.inner_, first_chunk_size); |
| destination += first_chunk_size; |
| for (auto it = std::next(first.outer_); it != data_.end(); ++it) { |
| memcpy(destination, it->data(), it->size()); |
| destination += it->size(); |
| } |
| } |
| |
| struct ObuInfo { |
| // Size of the obu_header and obu_size fields in the ouput frame. |
| size_t prefix_size = 0; |
| // obu_header() and obu_size (leb128 encoded payload_size). |
| // obu_header can be up to 2 bytes, obu_size - up to 5. |
| std::array<uint8_t, 7> prefix; |
| // Size of the obu payload in the output frame, i.e. excluding header |
| size_t payload_size = 0; |
| // iterator pointing to the beginning of the obu payload. |
| ArrayOfArrayViews::const_iterator payload_offset; |
| // OBU payloads as written in the rtp packet payloads. |
| ArrayOfArrayViews data; |
| }; |
| // Expect that majority of the frame won't use more than 4 obus. |
| // In a simple stream delta frame consist of single Frame OBU, while key frame |
| // also has Sequence Header OBU. |
| using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>; |
| |
| constexpr uint8_t kObuSizePresentBit = 0b0'0000'010; |
| |
| bool ObuHasExtension(uint8_t obu_header) { |
| return obu_header & 0b0'0000'100u; |
| } |
| |
| bool ObuHasSize(uint8_t obu_header) { |
| return obu_header & kObuSizePresentBit; |
| } |
| |
| bool RtpStartsWithFragment(uint8_t aggregation_header) { |
| return aggregation_header & 0b1000'0000u; |
| } |
| bool RtpEndsWithFragment(uint8_t aggregation_header) { |
| return aggregation_header & 0b0100'0000u; |
| } |
| int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus. |
| return (aggregation_header & 0b0011'0000u) >> 4; |
| } |
| int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) { |
| return aggregation_header & 0b0000'1000u; |
| } |
| |
| // Reorgonizes array of rtp payloads into array of obus: |
| // fills ObuInfo::data field. |
| // Returns empty vector on error. |
| VectorObuInfo ParseObus( |
| rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) { |
| VectorObuInfo obu_infos; |
| bool expect_continues_obu = false; |
| for (rtc::ArrayView<const uint8_t> rtp_payload : rtp_payloads) { |
| rtc::ByteBufferReader payload(rtp_payload); |
| uint8_t aggregation_header; |
| if (!payload.ReadUInt8(&aggregation_header)) { |
| RTC_DLOG(LS_WARNING) |
| << "Failed to find aggregation header in the packet."; |
| return {}; |
| } |
| // Z-bit: 1 if the first OBU contained in the packet is a continuation of a |
| // previous OBU. |
| bool continues_obu = RtpStartsWithFragment(aggregation_header); |
| if (continues_obu != expect_continues_obu) { |
| RTC_DLOG(LS_WARNING) << "Unexpected Z-bit " << continues_obu; |
| return {}; |
| } |
| int num_expected_obus = RtpNumObus(aggregation_header); |
| if (payload.Length() == 0) { |
| // rtp packet has just the aggregation header. That may be valid only when |
| // there is exactly one fragment in the packet of size 0. |
| if (num_expected_obus != 1) { |
| RTC_DLOG(LS_WARNING) |
| << "Invalid packet with just an aggregation header."; |
| return {}; |
| } |
| if (!continues_obu) { |
| // Empty packet just to notify there is a new OBU. |
| obu_infos.emplace_back(); |
| } |
| expect_continues_obu = RtpEndsWithFragment(aggregation_header); |
| continue; |
| } |
| |
| for (int obu_index = 1; payload.Length() > 0; ++obu_index) { |
| ObuInfo& obu_info = (obu_index == 1 && continues_obu) |
| ? obu_infos.back() |
| : obu_infos.emplace_back(); |
| uint64_t fragment_size; |
| // When num_expected_obus > 0, last OBU (fragment) is not preceeded by |
| // the size field. See W field in |
| // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header |
| bool has_fragment_size = (obu_index != num_expected_obus); |
| if (has_fragment_size) { |
| if (!payload.ReadUVarint(&fragment_size)) { |
| RTC_DLOG(LS_WARNING) << "Failed to read fragment size for obu #" |
| << obu_index << "/" << num_expected_obus; |
| return {}; |
| } |
| if (fragment_size > payload.Length()) { |
| // Malformed input: written size is larger than remaining buffer. |
| RTC_DLOG(LS_WARNING) << "Malformed fragment size " << fragment_size |
| << " is larger than remaining size " |
| << payload.Length() << " while reading obu #" |
| << obu_index << "/" << num_expected_obus; |
| return {}; |
| } |
| } else { |
| fragment_size = payload.Length(); |
| } |
| // While it is in-practical to pass empty fragments, it is still possible. |
| if (fragment_size > 0) { |
| obu_info.data.Append(reinterpret_cast<const uint8_t*>(payload.Data()), |
| fragment_size); |
| payload.Consume(fragment_size); |
| } |
| } |
| // Z flag should be same as Y flag of the next packet. |
| expect_continues_obu = RtpEndsWithFragment(aggregation_header); |
| } |
| if (expect_continues_obu) { |
| RTC_DLOG(LS_WARNING) << "Last packet shouldn't have last obu fragmented."; |
| return {}; |
| } |
| return obu_infos; |
| } |
| |
| // Calculates sizes for the Obu, i.e. base on ObuInfo::data field calculates |
| // all other fields in the ObuInfo structure. |
| // Returns false if obu found to be misformed. |
| bool CalculateObuSizes(ObuInfo* obu_info) { |
| if (obu_info->data.empty()) { |
| RTC_DLOG(LS_WARNING) << "Invalid bitstream: empty obu provided."; |
| return false; |
| } |
| auto it = obu_info->data.begin(); |
| uint8_t obu_header = *it; |
| obu_info->prefix[0] = obu_header | kObuSizePresentBit; |
| obu_info->prefix_size = 1; |
| ++it; |
| if (ObuHasExtension(obu_header)) { |
| if (it == obu_info->data.end()) { |
| return false; |
| } |
| obu_info->prefix[1] = *it; // obu_extension_header |
| obu_info->prefix_size = 2; |
| ++it; |
| } |
| // Read, validate, and skip size, if present. |
| if (!ObuHasSize(obu_header)) { |
| obu_info->payload_size = obu_info->data.size() - obu_info->prefix_size; |
| } else { |
| // Read leb128 encoded field obu_size. |
| uint64_t obu_size_bytes = 0; |
| // Number of bytes obu_size field occupy in the bitstream. |
| int size_of_obu_size_bytes = 0; |
| uint8_t leb128_byte; |
| do { |
| if (it == obu_info->data.end() || size_of_obu_size_bytes >= 8) { |
| RTC_DLOG(LS_WARNING) |
| << "Failed to read obu_size. obu_size field is too long: " |
| << size_of_obu_size_bytes << " bytes processed."; |
| return false; |
| } |
| leb128_byte = *it; |
| obu_size_bytes |= uint64_t{leb128_byte & 0x7Fu} |
| << (size_of_obu_size_bytes * 7); |
| ++size_of_obu_size_bytes; |
| ++it; |
| } while ((leb128_byte & 0x80) != 0); |
| |
| obu_info->payload_size = |
| obu_info->data.size() - obu_info->prefix_size - size_of_obu_size_bytes; |
| if (obu_size_bytes != obu_info->payload_size) { |
| // obu_size was present in the bitstream and mismatches calculated size. |
| RTC_DLOG(LS_WARNING) << "Mismatch in obu_size. signaled: " |
| << obu_size_bytes |
| << ", actual: " << obu_info->payload_size; |
| return false; |
| } |
| } |
| obu_info->payload_offset = it; |
| obu_info->prefix_size += |
| WriteLeb128(rtc::dchecked_cast<uint64_t>(obu_info->payload_size), |
| obu_info->prefix.data() + obu_info->prefix_size); |
| return true; |
| } |
| |
| } // namespace |
| |
| rtc::scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizerAv1::AssembleFrame( |
| rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) { |
| VectorObuInfo obu_infos = ParseObus(rtp_payloads); |
| if (obu_infos.empty()) { |
| return nullptr; |
| } |
| |
| size_t frame_size = 0; |
| for (ObuInfo& obu_info : obu_infos) { |
| if (!CalculateObuSizes(&obu_info)) { |
| return nullptr; |
| } |
| frame_size += (obu_info.prefix_size + obu_info.payload_size); |
| } |
| |
| rtc::scoped_refptr<EncodedImageBuffer> bitstream = |
| EncodedImageBuffer::Create(frame_size); |
| uint8_t* write_at = bitstream->data(); |
| for (const ObuInfo& obu_info : obu_infos) { |
| // Copy the obu_header and obu_size fields. |
| memcpy(write_at, obu_info.prefix.data(), obu_info.prefix_size); |
| write_at += obu_info.prefix_size; |
| // Copy the obu payload. |
| obu_info.data.CopyTo(write_at, obu_info.payload_offset); |
| write_at += obu_info.payload_size; |
| } |
| RTC_CHECK_EQ(write_at - bitstream->data(), bitstream->size()); |
| return bitstream; |
| } |
| |
| std::optional<VideoRtpDepacketizer::ParsedRtpPayload> |
| VideoRtpDepacketizerAv1::Parse(rtc::CopyOnWriteBuffer rtp_payload) { |
| if (rtp_payload.size() == 0) { |
| RTC_DLOG(LS_ERROR) << "Empty rtp payload."; |
| return std::nullopt; |
| } |
| uint8_t aggregation_header = rtp_payload.cdata()[0]; |
| if (RtpStartsNewCodedVideoSequence(aggregation_header) && |
| RtpStartsWithFragment(aggregation_header)) { |
| // new coded video sequence can't start from an OBU fragment. |
| return std::nullopt; |
| } |
| std::optional<ParsedRtpPayload> parsed(std::in_place); |
| |
| // To assemble frame, all of the rtp payload is required, including |
| // aggregation header. |
| parsed->video_payload = std::move(rtp_payload); |
| |
| parsed->video_header.codec = VideoCodecType::kVideoCodecAV1; |
| // These are not accurate since frame may consist of several packet aligned |
| // chunks of obus, but should be good enough for most cases. It might produce |
| // frame that do not map to any real frame, but av1 decoder should be able to |
| // handle it since it promise to handle individual obus rather than full |
| // frames. |
| parsed->video_header.is_first_packet_in_frame = |
| !RtpStartsWithFragment(aggregation_header); |
| parsed->video_header.is_last_packet_in_frame = |
| !RtpEndsWithFragment(aggregation_header); |
| |
| parsed->video_header.frame_type = |
| RtpStartsNewCodedVideoSequence(aggregation_header) |
| ? VideoFrameType::kVideoFrameKey |
| : VideoFrameType::kVideoFrameDelta; |
| return parsed; |
| } |
| |
| } // namespace webrtc |