Add OptionalBlobEncoder for RTC event logs.

Bug: webrtc:14801
Change-Id: I7c14597e39b312c26573f034dca444cc1d90e332
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/295480
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Björn Terelius <terelius@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#39449}
diff --git a/logging/BUILD.gn b/logging/BUILD.gn
index 408d4ec..05c1f53 100644
--- a/logging/BUILD.gn
+++ b/logging/BUILD.gn
@@ -292,6 +292,8 @@
     "rtc_event_log/encoder/blob_encoding.h",
     "rtc_event_log/encoder/delta_encoding.cc",
     "rtc_event_log/encoder/delta_encoding.h",
+    "rtc_event_log/encoder/optional_blob_encoding.cc",
+    "rtc_event_log/encoder/optional_blob_encoding.h",
   ]
 
   defines = []
@@ -482,6 +484,7 @@
       sources = [
         "rtc_event_log/encoder/blob_encoding_unittest.cc",
         "rtc_event_log/encoder/delta_encoding_unittest.cc",
+        "rtc_event_log/encoder/optional_blob_encoding_unittest.cc",
         "rtc_event_log/encoder/rtc_event_log_encoder_common_unittest.cc",
         "rtc_event_log/encoder/rtc_event_log_encoder_unittest.cc",
         "rtc_event_log/events/rtc_event_field_encoding_unittest.cc",
diff --git a/logging/rtc_event_log/encoder/optional_blob_encoding.cc b/logging/rtc_event_log/encoder/optional_blob_encoding.cc
new file mode 100644
index 0000000..358d4e6
--- /dev/null
+++ b/logging/rtc_event_log/encoder/optional_blob_encoding.cc
@@ -0,0 +1,113 @@
+/*
+ *  Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "logging/rtc_event_log/encoder/optional_blob_encoding.h"
+
+#include <cstdint>
+
+#include "rtc_base/bit_buffer.h"
+#include "rtc_base/bitstream_reader.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+std::string EncodeOptionalBlobs(
+    const std::vector<absl::optional<std::string>>& blobs) {
+  if (blobs.empty()) {
+    return {};
+  }
+
+  size_t reserve_size_bits = 1;
+  size_t num_blobs_present = 0;
+  for (const auto& blob : blobs) {
+    if (blob.has_value()) {
+      ++num_blobs_present;
+      reserve_size_bits +=
+          (rtc::BitBufferWriter::kMaxLeb128Length.bytes() + blob->size()) * 8;
+    }
+  }
+
+  const bool all_blobs_present = num_blobs_present == blobs.size();
+  if (!all_blobs_present) {
+    reserve_size_bits += blobs.size();
+  }
+
+  std::vector<uint8_t> buffer((reserve_size_bits + 7) / 8);
+  rtc::BitBufferWriter writer(buffer.data(), buffer.size());
+
+  // Write present bits if all blobs are not present.
+  writer.WriteBits(all_blobs_present, 1);
+  if (!all_blobs_present) {
+    for (const auto& blob : blobs) {
+      writer.WriteBits(blob.has_value(), 1);
+    }
+  }
+
+  // Byte align the writer.
+  writer.ConsumeBits(writer.RemainingBitCount() % 8);
+
+  // Write blobs.
+  for (const auto& blob : blobs) {
+    if (blob.has_value()) {
+      writer.WriteLeb128(blob->length());
+      writer.WriteString(*blob);
+    }
+  }
+
+  size_t bytes_written;
+  size_t bits_written;
+  writer.GetCurrentOffset(&bytes_written, &bits_written);
+  RTC_CHECK_EQ(bits_written, 0);
+  RTC_CHECK_LE(bytes_written, buffer.size());
+
+  return std::string(buffer.data(), buffer.data() + bytes_written);
+}
+
+std::vector<absl::optional<std::string>> DecodeOptionalBlobs(
+    absl::string_view encoded_blobs,
+    size_t num_of_blobs) {
+  if (encoded_blobs.empty() || num_of_blobs == 0) {
+    return {};
+  }
+
+  std::vector<absl::optional<std::string>> res(num_of_blobs);
+  BitstreamReader reader(encoded_blobs);
+  const bool all_blobs_present = reader.ReadBit();
+
+  // Read present bits if all blobs are not present.
+  std::vector<uint8_t> present;
+  if (!all_blobs_present) {
+    present.resize(num_of_blobs);
+    for (size_t i = 0; i < num_of_blobs; ++i) {
+      present[i] = reader.ReadBit();
+    }
+  }
+
+  // Byte align the reader.
+  reader.ConsumeBits(reader.RemainingBitCount() % 8);
+
+  // Read the blobs.
+  for (size_t i = 0; i < num_of_blobs; ++i) {
+    if (!all_blobs_present && !present[i]) {
+      continue;
+    }
+    res[i] = reader.ReadString(reader.ReadLeb128());
+  }
+
+  // The result is only valid if exactly all bits was consumed during decoding.
+  if (!reader.Ok() || reader.RemainingBitCount() > 0) {
+    return {};
+  }
+
+  return res;
+}
+
+}  // namespace webrtc
diff --git a/logging/rtc_event_log/encoder/optional_blob_encoding.h b/logging/rtc_event_log/encoder/optional_blob_encoding.h
new file mode 100644
index 0000000..32f5278
--- /dev/null
+++ b/logging/rtc_event_log/encoder/optional_blob_encoding.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef LOGGING_RTC_EVENT_LOG_ENCODER_OPTIONAL_BLOB_ENCODING_H_
+#define LOGGING_RTC_EVENT_LOG_ENCODER_OPTIONAL_BLOB_ENCODING_H_
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+
+namespace webrtc {
+
+// Encode a sequence of optional strings, whose length is not known to be
+// discernable from the blob itself (i.e. without being transmitted OOB),
+// in a way that would allow us to separate them again on the decoding side.
+// EncodeOptionalBlobs() may not fail but may return an empty string
+std::string EncodeOptionalBlobs(
+    const std::vector<absl::optional<std::string>>& blobs);
+
+// Calling DecodeOptionalBlobs() on an empty string, or with `num_of_blobs` set
+// to 0, is an error. DecodeOptionalBlobs() returns an empty vector if it fails,
+// which can happen if `encoded_blobs` is corrupted.
+std::vector<absl::optional<std::string>> DecodeOptionalBlobs(
+    absl::string_view encoded_blobs,
+    size_t num_of_blobs);
+
+}  // namespace webrtc
+
+#endif  // LOGGING_RTC_EVENT_LOG_ENCODER_OPTIONAL_BLOB_ENCODING_H_
diff --git a/logging/rtc_event_log/encoder/optional_blob_encoding_unittest.cc b/logging/rtc_event_log/encoder/optional_blob_encoding_unittest.cc
new file mode 100644
index 0000000..b4d87d7
--- /dev/null
+++ b/logging/rtc_event_log/encoder/optional_blob_encoding_unittest.cc
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "logging/rtc_event_log/encoder/optional_blob_encoding.h"
+
+#include <string>
+#include <vector>
+
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+namespace webrtc {
+namespace {
+
+class BitBuilder {
+ public:
+  BitBuilder& Bit(uint8_t bit) {
+    if (total_bits_ % 8 == 0) {
+      bits_.push_back(0);
+    }
+    bits_[total_bits_ / 8] |= bit << (7 - (total_bits_ % 8));
+    ++total_bits_;
+    return *this;
+  }
+
+  BitBuilder& Bytes(const std::vector<uint8_t>& bytes) {
+    for (uint8_t byte : bytes) {
+      for (int i = 1; i <= 8; ++i) {
+        uint8_t bit = (byte >> (8 - i)) & 1;
+        Bit(bit);
+      }
+    }
+    return *this;
+  }
+
+  BitBuilder& ByteAlign() {
+    while (total_bits_ % 8 > 0) {
+      Bit(0);
+    }
+    return *this;
+  }
+
+  std::string AsString() { return std::string(bits_.begin(), bits_.end()); }
+
+ private:
+  std::vector<uint8_t> bits_;
+  uint64_t total_bits_ = 0;
+};
+
+TEST(OptionalBlobEncoding, AllBlobsPresent) {
+  std::string encoded = EncodeOptionalBlobs({"a", "b", "c"});
+  std::string expected = BitBuilder()
+                             .Bit(1)
+                             .ByteAlign()
+                             .Bytes({0x01, 'a'})
+                             .Bytes({0x01, 'b'})
+                             .Bytes({0x01, 'c'})
+                             .AsString();
+  EXPECT_EQ(encoded, expected);
+}
+
+TEST(OptionalBlobEncoding, SomeBlobsPresent) {
+  std::string encoded = EncodeOptionalBlobs({"a", absl::nullopt, "c"});
+  std::string expected = BitBuilder()
+                             .Bit(0)
+                             .Bit(1)
+                             .Bit(0)
+                             .Bit(1)
+                             .ByteAlign()
+                             .Bytes({0x01, 'a'})
+                             .Bytes({0x01, 'c'})
+                             .AsString();
+  EXPECT_EQ(encoded, expected);
+}
+
+TEST(OptionalBlobEncoding, NoBlobsPresent) {
+  std::string encoded =
+      EncodeOptionalBlobs({absl::nullopt, absl::nullopt, absl::nullopt});
+  std::string expected = BitBuilder().Bit(0).Bit(0).Bit(0).Bit(0).AsString();
+  EXPECT_EQ(encoded, expected);
+}
+
+TEST(OptionalBlobEncoding, ZeroBlobs) {
+  std::string encoded = EncodeOptionalBlobs({});
+  EXPECT_EQ(encoded, std::string());
+}
+
+TEST(OptionalBlobEncoding, LongBlobs) {
+  std::string medium_string(100, 'a');
+  std::string long_string(200, 'b');
+  std::string encoded = EncodeOptionalBlobs({medium_string, long_string});
+  std::string expected =
+      BitBuilder()
+          .Bit(1)
+          .ByteAlign()
+          .Bytes({0x64})
+          .Bytes({medium_string.begin(), medium_string.end()})
+          .Bytes({0xC8, 0x01})
+          .Bytes({long_string.begin(), long_string.end()})
+          .AsString();
+  EXPECT_EQ(encoded, expected);
+}
+
+TEST(OptionalBlobDecoding, AllBlobsPresent) {
+  std::string encoded = BitBuilder()
+                            .Bit(1)
+                            .ByteAlign()
+                            .Bytes({0x01, 'a'})
+                            .Bytes({0x01, 'b'})
+                            .Bytes({0x01, 'c'})
+                            .AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 3);
+  EXPECT_THAT(decoded, ElementsAre("a", "b", "c"));
+}
+
+TEST(OptionalBlobDecoding, SomeBlobsPresent) {
+  std::string encoded = BitBuilder()
+                            .Bit(0)
+                            .Bit(1)
+                            .Bit(0)
+                            .Bit(1)
+                            .ByteAlign()
+                            .Bytes({0x01, 'a'})
+                            .Bytes({0x01, 'c'})
+                            .AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 3);
+  EXPECT_THAT(decoded, ElementsAre("a", absl::nullopt, "c"));
+}
+
+TEST(OptionalBlobDecoding, NoBlobsPresent) {
+  std::string encoded =
+      BitBuilder().Bit(0).Bit(0).Bit(0).Bit(0).ByteAlign().AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 3);
+  EXPECT_THAT(decoded,
+              ElementsAre(absl::nullopt, absl::nullopt, absl::nullopt));
+}
+
+TEST(OptionalBlobDecoding, ZeroBlobs) {
+  std::string encoded;
+  auto decoded = DecodeOptionalBlobs(encoded, 0);
+  EXPECT_THAT(decoded, IsEmpty());
+}
+
+TEST(OptionalBlobDecoding, LongBlobs) {
+  std::string medium_string(100, 'a');
+  std::string long_string(200, 'b');
+  std::string encoded = BitBuilder()
+                            .Bit(1)
+                            .ByteAlign()
+                            .Bytes({0x64})
+                            .Bytes({medium_string.begin(), medium_string.end()})
+                            .Bytes({0xC8, 0x01})
+                            .Bytes({long_string.begin(), long_string.end()})
+                            .AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 2);
+  EXPECT_THAT(decoded, ElementsAre(medium_string, long_string));
+}
+
+TEST(OptionalBlobDecoding, TooShortEncodedBlobLength) {
+  std::string encoded =
+      BitBuilder().Bit(1).ByteAlign().Bytes({0x01, 'a', 'b'}).AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 1);
+  EXPECT_THAT(decoded, IsEmpty());
+}
+
+TEST(OptionalBlobDecoding, TooLongEncodedBlobLength) {
+  std::string encoded =
+      BitBuilder().Bit(1).ByteAlign().Bytes({0x03, 'a', 'b'}).AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 1);
+  EXPECT_THAT(decoded, IsEmpty());
+}
+
+TEST(OptionalBlobDecoding, TooLongEncodedBufferLength) {
+  std::string encoded = BitBuilder().Bytes({0x00, 0x00, 0x00}).AsString();
+  auto decoded = DecodeOptionalBlobs(encoded, 8);
+  EXPECT_THAT(decoded, IsEmpty());
+}
+
+}  // namespace
+}  // namespace webrtc
diff --git a/rtc_base/BUILD.gn b/rtc_base/BUILD.gn
index fc92b01..a657a44 100644
--- a/rtc_base/BUILD.gn
+++ b/rtc_base/BUILD.gn
@@ -138,8 +138,14 @@
     "bit_buffer.cc",
     "bit_buffer.h",
   ]
-  deps = [ ":checks" ]
-  absl_deps = [ "//third_party/abseil-cpp/absl/numeric:bits" ]
+  deps = [
+    ":checks",
+    "../api/units:data_size",
+  ]
+  absl_deps = [
+    "//third_party/abseil-cpp/absl/numeric:bits",
+    "//third_party/abseil-cpp/absl/strings:strings",
+  ]
 }
 
 rtc_library("byte_buffer") {
diff --git a/rtc_base/bit_buffer.cc b/rtc_base/bit_buffer.cc
index 7dc7428..fd57e13 100644
--- a/rtc_base/bit_buffer.cc
+++ b/rtc_base/bit_buffer.cc
@@ -14,6 +14,7 @@
 #include <limits>
 
 #include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
 #include "rtc_base/checks.h"
 
 namespace {
@@ -205,4 +206,25 @@
   }
 }
 
+bool BitBufferWriter::WriteLeb128(uint64_t val) {
+  bool success = true;
+  do {
+    uint8_t byte = static_cast<uint8_t>(val & 0x7f);
+    val >>= 7;
+    if (val > 0) {
+      byte |= 0x80;
+    }
+    success &= WriteUInt8(byte);
+  } while (val > 0);
+  return success;
+}
+
+bool BitBufferWriter::WriteString(absl::string_view data) {
+  bool success = true;
+  for (char c : data) {
+    success &= WriteUInt8(c);
+  }
+  return success;
+}
+
 }  // namespace rtc
diff --git a/rtc_base/bit_buffer.h b/rtc_base/bit_buffer.h
index b4991bc..fe50b2b 100644
--- a/rtc_base/bit_buffer.h
+++ b/rtc_base/bit_buffer.h
@@ -14,6 +14,9 @@
 #include <stddef.h>  // For size_t.
 #include <stdint.h>  // For integer types.
 
+#include "absl/strings/string_view.h"
+#include "api/units/data_size.h"
+
 namespace rtc {
 
 // A BitBuffer API for write operations. Supports symmetric write APIs to the
@@ -22,6 +25,9 @@
 // Byte order is assumed big-endian/network.
 class BitBufferWriter {
  public:
+  static constexpr webrtc::DataSize kMaxLeb128Length =
+      webrtc::DataSize::Bytes(10);
+
   // Constructs a bit buffer for the writable buffer of `bytes`.
   BitBufferWriter(uint8_t* bytes, size_t byte_count);
 
@@ -72,6 +78,12 @@
   // sequence 0, 1, -1, 2, -2, etc. in order.
   bool WriteSignedExponentialGolomb(int32_t val);
 
+  // Writes the Leb128 encoded value.
+  bool WriteLeb128(uint64_t val);
+
+  // Writes the string as bytes of data.
+  bool WriteString(absl::string_view data);
+
  private:
   // The buffer, as a writable array.
   uint8_t* const writable_bytes_;
diff --git a/rtc_base/bit_buffer_unittest.cc b/rtc_base/bit_buffer_unittest.cc
index 198be50..7dfe0c8 100644
--- a/rtc_base/bit_buffer_unittest.cc
+++ b/rtc_base/bit_buffer_unittest.cc
@@ -221,4 +221,36 @@
   EXPECT_EQ(0x7F, bytes[1]);
 }
 
+TEST(BitBufferWriterTest, WriteLeb128) {
+  uint8_t small_number[2];
+  BitBufferWriter small_buffer(small_number, sizeof(small_number));
+  EXPECT_TRUE(small_buffer.WriteLeb128(129));
+  EXPECT_THAT(small_number, ElementsAre(0x81, 0x01));
+
+  uint8_t large_number[10];
+  BitBufferWriter large_buffer(large_number, sizeof(large_number));
+  EXPECT_TRUE(large_buffer.WriteLeb128(std::numeric_limits<uint64_t>::max()));
+  EXPECT_THAT(large_number, ElementsAre(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                                        0xFF, 0xFF, 0xFF, 0x01));
+}
+
+TEST(BitBufferWriterTest, WriteLeb128TooSmallBuffer) {
+  uint8_t bytes[1];
+  BitBufferWriter buffer(bytes, sizeof(bytes));
+  EXPECT_FALSE(buffer.WriteLeb128(12345));
+}
+
+TEST(BitBufferWriterTest, WriteString) {
+  uint8_t buffer[2];
+  BitBufferWriter writer(buffer, sizeof(buffer));
+  EXPECT_TRUE(writer.WriteString("ab"));
+  EXPECT_THAT(buffer, ElementsAre('a', 'b'));
+}
+
+TEST(BitBufferWriterTest, WriteStringTooSmallBuffer) {
+  uint8_t buffer[2];
+  BitBufferWriter writer(buffer, sizeof(buffer));
+  EXPECT_FALSE(writer.WriteString("abc"));
+}
+
 }  // namespace rtc
diff --git a/rtc_base/bitstream_reader.cc b/rtc_base/bitstream_reader.cc
index d2c622d..3e1b94d 100644
--- a/rtc_base/bitstream_reader.cc
+++ b/rtc_base/bitstream_reader.cc
@@ -132,4 +132,36 @@
   }
 }
 
+uint64_t BitstreamReader::ReadLeb128() {
+  uint64_t decoded = 0;
+  size_t i = 0;
+  uint8_t byte;
+  // A LEB128 value can in theory be arbitrarily large, but for convenience sake
+  // consider it invalid if it can't fit in an uint64_t.
+  do {
+    byte = Read<uint8_t>();
+    decoded +=
+        (static_cast<uint64_t>(byte & 0x7f) << static_cast<uint64_t>(7 * i));
+    ++i;
+  } while (i < 10 && (byte & 0x80));
+
+  // The first 9 bytes represent the first 63 bits. The tenth byte can therefore
+  // not be larger than 1 as it would overflow an uint64_t.
+  if (i == 10 && byte > 1) {
+    Invalidate();
+  }
+
+  return Ok() ? decoded : 0;
+}
+
+std::string BitstreamReader::ReadString(int num_bytes) {
+  std::string res;
+  res.reserve(num_bytes);
+  for (int i = 0; i < num_bytes; ++i) {
+    res += Read<uint8_t>();
+  }
+
+  return Ok() ? res : std::string();
+}
+
 }  // namespace webrtc
diff --git a/rtc_base/bitstream_reader.h b/rtc_base/bitstream_reader.h
index 51c7914..c367b9d 100644
--- a/rtc_base/bitstream_reader.h
+++ b/rtc_base/bitstream_reader.h
@@ -104,6 +104,12 @@
   // unspecified value.
   int ReadSignedExponentialGolomb();
 
+  // Reads a LEB128 encoded value. The value will be considered invalid if it
+  // can't fit into a uint64_t.
+  uint64_t ReadLeb128();
+
+  std::string ReadString(int num_bytes);
+
  private:
   void set_last_read_is_verified(bool value) const;
 
diff --git a/rtc_base/bitstream_reader_unittest.cc b/rtc_base/bitstream_reader_unittest.cc
index 997abdf..46309b2 100644
--- a/rtc_base/bitstream_reader_unittest.cc
+++ b/rtc_base/bitstream_reader_unittest.cc
@@ -341,5 +341,33 @@
   EXPECT_TRUE(reader3.Ok());
 }
 
+TEST(BitstreamReaderTest, ReadLeb128) {
+  const uint8_t bytes[] = {0xFF, 0x7F};
+  BitstreamReader reader(bytes);
+  EXPECT_EQ(reader.ReadLeb128(), 0x3FFFu);
+  EXPECT_TRUE(reader.Ok());
+}
+
+TEST(BitstreamReaderTest, ReadLeb128Large) {
+  const uint8_t max_uint64[] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                                0xFF, 0xFF, 0xFF, 0xFF, 0x1};
+  BitstreamReader max_reader(max_uint64);
+  EXPECT_EQ(max_reader.ReadLeb128(), std::numeric_limits<uint64_t>::max());
+  EXPECT_TRUE(max_reader.Ok());
+
+  const uint8_t overflow_unit64_t[] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                                       0xFF, 0xFF, 0xFF, 0xFF, 0x2};
+  BitstreamReader overflow_reader(overflow_unit64_t);
+  EXPECT_EQ(overflow_reader.ReadLeb128(), uint64_t{0});
+  EXPECT_FALSE(overflow_reader.Ok());
+}
+
+TEST(BitstreamReaderTest, ReadLeb128NoEndByte) {
+  const uint8_t bytes[] = {0xFF, 0xFF};
+  BitstreamReader reader(bytes);
+  EXPECT_EQ(reader.ReadLeb128(), uint64_t{0});
+  EXPECT_FALSE(reader.Ok());
+}
+
 }  // namespace
 }  // namespace webrtc