Rewrite 'generate_sslroots' w/o OpenSSL.

OpenSSL removed ability to generate C code:
https://github.com/openssl/openssl/commit/a18cf8fc634a8834e505e60ebb7f947d4c0c2552

CL rewrites generation script to use pure Python asn1crypto library.

The changes in generated code leading to huge diff in generated file:
- Certificate array names are based on certificate fingerprints instead
of semi-human readable names, which were not referenced externally;
- Order of arrays in generated file matches the order of certificates
as they are appeared in source pem file. Previously re-ordering happen
due to writing temporary files on disk;


Bug: webrtc:11710
Change-Id: Ie7a97b3658f6ccb397f0fd0c21d341934a2cc12e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/304642
Commit-Queue: Yury Yarashevich <yura.yaroshevich@gmail.com>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#40039}
diff --git a/.vpython3 b/.vpython3
index 31a2c59..96feab5 100644
--- a/.vpython3
+++ b/.vpython3
@@ -86,3 +86,10 @@
   name: "infra/python/wheels/requests-py2_py3"
   version: "version:2.13.0"
 >
+
+# Used by:
+#   tools_webrtc/sslroots
+wheel: <
+  name: "infra/python/wheels/asn1crypto-py2_py3"
+  version: "version:1.0.1"
+>
diff --git a/tools_webrtc/sslroots/generate_sslroots.py b/tools_webrtc/sslroots/generate_sslroots.py
index 291c3ce..14acff9 100644
--- a/tools_webrtc/sslroots/generate_sslroots.py
+++ b/tools_webrtc/sslroots/generate_sslroots.py
@@ -1,195 +1,238 @@
 #!/usr/bin/env vpython3
 
 # -*- coding:utf-8 -*-
-# Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+# Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
 #
 # Use of this source code is governed by a BSD-style license
 # that can be found in the LICENSE file in the root of the source
 # tree. An additional intellectual property rights grant can be found
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.
-"""This is a tool to transform a crt file into a C/C++ header.
-
-Usage:
-python3 generate_sslroots.py certfile.pem [--verbose | -v] [--full_cert | -f]
-
-Arguments:
-  -v  Print output while running.
-  -f  Add public key and certificate name.  Default is to skip and reduce
-      generated file size.
-
-The supported cert files are:
-  - Google: https://pki.goog/roots.pem
-  - Mozilla: https://curl.se/docs/caextract.html
-"""
-
-import subprocess
-from optparse import OptionParser
-import os
-import re
+import argparse
+import logging
+from pathlib import Path
+import tempfile
+from typing import Tuple, Any, List, ByteString
+from datetime import datetime, timezone
+from hashlib import sha256
+from urllib.request import urlopen
+from asn1crypto import pem, x509
 
 _GENERATED_FILE = 'ssl_roots.h'
-_PREFIX = '__generated__'
-_EXTENSION = '.crt'
-_SUBJECT_NAME_ARRAY = 'subject_name'
-_SUBJECT_NAME_VARIABLE = 'SubjectName'
-_PUBLIC_KEY_ARRAY = 'public_key'
-_PUBLIC_KEY_VARIABLE = 'PublicKey'
-_CERTIFICATE_ARRAY = 'certificate'
-_CERTIFICATE_VARIABLE = 'Certificate'
-_CERTIFICATE_SIZE_VARIABLE = 'CertificateSize'
-_INT_TYPE = 'size_t'
-_CHAR_TYPE = 'unsigned char* const'
-_VERBOSE = 'verbose'
-_MOZILLA_BUNDLE_CHECK = '## Certificate data from Mozilla as of:'
-
 
 def main():
-  """The main entrypoint."""
-  parser = OptionParser('usage %prog FILE')
-  parser.add_option('-v', '--verbose', dest='verbose', action='store_true')
-  parser.add_option('-f', '--full_cert', dest='full_cert', action='store_true')
-  options, args = parser.parse_args()
-  if len(args) < 1:
-    parser.error('No crt file specified.')
-    return
-  root_dir, bundle_type = _SplitCrt(args[0], options)
-  _GenCFiles(root_dir, options, bundle_type)
-  _Cleanup(root_dir)
+  parser = argparse.ArgumentParser(
+      description='This is a tool to transform a crt file '
+      f'into a C/C++ header: {_GENERATED_FILE}.')
 
+  parser.add_argument('source_path_or_url',
+                      help='File path or URL to PEM storage file. '
+                      'The supported cert files are: '
+                      '- Google: https://pki.goog/roots.pem; '
+                      '- Mozilla: https://curl.se/ca/cacert.pem')
+  parser.add_argument('-v',
+                      '--verbose',
+                      dest='verbose',
+                      action='store_true',
+                      help='Print output while running')
+  parser.add_argument('-f',
+                      '--full_cert',
+                      dest='full_cert',
+                      action='store_true',
+                      help='Add public key and certificate name. '
+                      'Default is to skip and reduce generated file size.')
+  args = parser.parse_args()
+  logging.basicConfig(level=logging.DEBUG if args.verbose else logging.WARNING)
 
-def _SplitCrt(source_file, options):
-  sub_file_blocks = []
-  label_name = ''
-  prev_line = None
-  root_dir = os.path.dirname(os.path.abspath(source_file)) + '/'
-  _PrintOutput(root_dir, options)
-  lines = None
-  with open(source_file) as f:
-    lines = f.readlines()
-  mozilla_bundle = any(l.startswith(_MOZILLA_BUNDLE_CHECK) for l in lines)
-  for line in lines:
-    if line.startswith('#'):
-      if mozilla_bundle:
-        continue
-      if line.startswith('# Label: '):
-        sub_file_blocks.append(line)
-        label = re.search(r'\".*\"', line)
-        temp_label = label.group(0)
-        end = len(temp_label) - 1
-        label_name = _SafeName(temp_label[1:end])
-    if mozilla_bundle and line.startswith('==='):
-      sub_file_blocks.append(line)
-      label_name = _SafeName(prev_line)
-    elif line.startswith('-----END CERTIFICATE-----'):
-      sub_file_blocks.append(line)
-      new_file_name = root_dir + _PREFIX + label_name + _EXTENSION
-      _PrintOutput('Generating: ' + new_file_name, options)
-      new_file = open(new_file_name, 'w')
-      for out_line in sub_file_blocks:
-        new_file.write(out_line)
-      new_file.close()
-      sub_file_blocks = []
+  with tempfile.TemporaryDirectory() as temp_dir:
+    cert_file = Path(temp_dir) / "cacert.pem"
+
+    if args.source_path_or_url.startswith(
+        'https://') or args.source_path_or_url.startswith('http://'):
+      _DownloadCertificatesStore(args.source_path_or_url, cert_file)
+      destination_dir = Path.cwd()
     else:
-      sub_file_blocks.append(line)
-    prev_line = line
-  return root_dir, 'Mozilla' if mozilla_bundle else 'Google'
+      source_path = Path(args.source_path_or_url)
+      cert_file.write_bytes(source_path.read_bytes())
+      destination_dir = source_path.parent
+
+    logging.debug('Stored certificate from %s into %s', args.source_path_or_url,
+                  cert_file)
+
+    header_file = destination_dir / _GENERATED_FILE
+
+    digest, certificates = _LoadCertificatesStore(cert_file)
+    _GenerateCHeader(header_file, args.source_path_or_url, digest, certificates,
+                     args.full_cert)
+
+    logging.debug('Did generate %s from %s [%s]', header_file,
+                  args.source_path_or_url, digest)
 
 
-def _GenCFiles(root_dir, options, bundle_type):
-  output_header_file = open(root_dir + _GENERATED_FILE, 'w')
-  output_header_file.write(_CreateOutputHeader(bundle_type))
-  if options.full_cert:
-    subject_name_list = _CreateArraySectionHeader(_SUBJECT_NAME_VARIABLE,
-                                                  _CHAR_TYPE, options)
-    public_key_list = _CreateArraySectionHeader(_PUBLIC_KEY_VARIABLE,
-                                                _CHAR_TYPE, options)
-  certificate_list = _CreateArraySectionHeader(_CERTIFICATE_VARIABLE,
-                                               _CHAR_TYPE, options)
-  certificate_size_list = _CreateArraySectionHeader(_CERTIFICATE_SIZE_VARIABLE,
-                                                    _INT_TYPE, options)
+def _DownloadCertificatesStore(pem_url: str, destination_file: Path):
+  with urlopen(pem_url) as response:
+    pem_file = response.read()
+    logging.info('Got response with status [%d]: %s', response.status, pem_url)
 
-  for _, _, files in os.walk(root_dir):
-    for current_file in files:
-      if current_file.startswith(_PREFIX):
-        prefix_length = len(_PREFIX)
-        length = len(current_file) - len(_EXTENSION)
-        label = current_file[prefix_length:length]
-        filtered_output, cert_size = _CreateCertSection(root_dir, current_file,
-                                                        label, options)
-        output_header_file.write(filtered_output + '\n\n\n')
-        if options.full_cert:
-          subject_name_list += _AddLabelToArray(label, _SUBJECT_NAME_ARRAY)
-          public_key_list += _AddLabelToArray(label, _PUBLIC_KEY_ARRAY)
-        certificate_list += _AddLabelToArray(label, _CERTIFICATE_ARRAY)
-        certificate_size_list += ('  %s,\n') % (cert_size)
+  if destination_file.parent.exists():
+    logging.debug('Creating directory and it\'s parents %s',
+                  destination_file.parent)
+    destination_file.parent.mkdir(parents=True, exist_ok=True)
+  if destination_file.exists():
+    logging.debug('Unlink existing file %s', destination_file)
+    destination_file.unlink(missing_ok=True)
 
-  if options.full_cert:
-    subject_name_list += _CreateArraySectionFooter()
-    output_header_file.write(subject_name_list)
-    public_key_list += _CreateArraySectionFooter()
-    output_header_file.write(public_key_list)
-  certificate_list += _CreateArraySectionFooter()
-  output_header_file.write(certificate_list)
-  certificate_size_list += _CreateArraySectionFooter()
-  output_header_file.write(certificate_size_list)
-  output_header_file.write(_CreateOutputFooter())
-  output_header_file.close()
+  destination_file.write_bytes(pem_file)
+  logging.info('Stored downloaded %d bytes pem file to `%s`', len(pem_file),
+               destination_file)
 
 
-def _Cleanup(root_dir):
-  for f in os.listdir(root_dir):
-    if f.startswith(_PREFIX):
-      os.remove(root_dir + f)
+def _LoadCertificatesStore(
+    source_file: Path) -> Tuple[str, List[x509.Certificate]]:
+  pem_bytes = source_file.read_bytes()
+
+  certificates = [
+      x509.Certificate.load(der)
+      for type, _, der in pem.unarmor(pem_bytes, True) if type == 'CERTIFICATE'
+  ]
+  digest = f'sha256:{sha256(pem_bytes).hexdigest()}'
+  logging.debug('Loaded %d certificates from %s [%s] ', len(certificates),
+                source_file, digest)
+  return digest, certificates
 
 
-def _CreateCertSection(root_dir, source_file, label, options):
-  command = 'openssl x509 -in %s%s -noout -C' % (root_dir, source_file)
-  _PrintOutput(command, options)
-  output = subprocess.getstatusoutput(command)[1]
-  decl_block = 'unsigned char .*_(%s|%s|%s)' %\
-    (_SUBJECT_NAME_ARRAY, _PUBLIC_KEY_ARRAY, _CERTIFICATE_ARRAY)
-  prog = re.compile(decl_block, re.IGNORECASE)
-  renamed_output = prog.sub('const unsigned char ' + label + r'_\1', output)
+def _GenerateCHeader(header_file: Path, source: str, source_digest: str,
+                     certificates: List[x509.Certificate], full_cert: bool):
+  header_file.parent.mkdir(parents=True, exist_ok=True)
+  with header_file.open('w') as output:
+    output.write(_CreateOutputHeader(source, source_digest))
 
-  filtered_output = ''
-  cert_block = '^const unsigned char.*?};$'
-  prog2 = re.compile(cert_block, re.IGNORECASE | re.MULTILINE | re.DOTALL)
-  if not options.full_cert:
-    filtered_output = prog2.sub('', renamed_output, count=2)
-  else:
-    filtered_output = renamed_output
+    named_certificates = [(cert,
+                           f'kCertificateWithFingerprint_{cert.sha256.hex()}')
+                          for cert in certificates]
 
-  cert_size_block = r'\d\d\d+'
-  prog3 = re.compile(cert_size_block, re.MULTILINE | re.VERBOSE)
-  result = prog3.findall(renamed_output)
-  cert_size = result[len(result) - 1]
+    names = list(map(lambda x: x[1], named_certificates))
+    unique_names = list(set(names))
+    if len(names) != len(unique_names):
+      raise RuntimeError(
+          f'There are {len(names) - len(unique_names)} non-unique '
+          'certificate names generated. Generator script must be '
+          'fixed to handle collision.')
 
-  return filtered_output, cert_size
+    for cert, name in named_certificates:
+
+      output.write(_CreateCertificateMetadataHeader(cert))
+
+      if full_cert:
+        output.write(
+            _CArrayConstantDefinition('unsigned char',
+                                      f'{name}_subject_name',
+                                      _CreateHexList(cert.subject.dump()),
+                                      max_items_per_line=16))
+        output.write('\n')
+        output.write(
+            _CArrayConstantDefinition('unsigned char',
+                                      f'{name}_public_key',
+                                      _CreateHexList(cert.public_key.dump()),
+                                      max_items_per_line=16))
+        output.write('\n')
+
+      output.write(
+          _CArrayConstantDefinition('unsigned char',
+                                    f'{name}_certificate',
+                                    _CreateHexList(cert.dump()),
+                                    max_items_per_line=16))
+      output.write('\n\n')
+
+    if full_cert:
+      output.write(
+          _CArrayConstantDefinition('unsigned char* const',
+                                    'kSSLCertSubjectNameList',
+                                    [f'{name}_subject_name' for name in names]))
+      output.write('\n\n')
+
+      output.write(
+          _CArrayConstantDefinition('unsigned char* const',
+                                    'kSSLCertPublicKeyList',
+                                    [f'{name}_public_key' for name in names]))
+      output.write('\n\n')
+
+    output.write(
+        _CArrayConstantDefinition('unsigned char* const',
+                                  'kSSLCertCertificateList',
+                                  [f'{name}_certificate' for name in names]))
+    output.write('\n\n')
+
+    output.write(
+        _CArrayConstantDefinition(
+            'size_t', 'kSSLCertCertificateSizeList',
+            [f'{len(cert.dump())}' for cert, _ in named_certificates]))
+    output.write('\n\n')
+
+    output.write(_CreateOutputFooter())
 
 
-def _CreateOutputHeader(bundle_type):
-  output = ('/*\n'
-            ' *  Copyright 2004 The WebRTC Project Authors. All rights '
-            'reserved.\n'
-            ' *\n'
-            ' *  Use of this source code is governed by a BSD-style license\n'
-            ' *  that can be found in the LICENSE file in the root of the '
-            'source\n'
-            ' *  tree. An additional intellectual property rights grant can be '
-            'found\n'
-            ' *  in the file PATENTS.  All contributing project authors may\n'
-            ' *  be found in the AUTHORS file in the root of the source tree.\n'
-            ' */\n\n'
-            '#ifndef RTC_BASE_SSL_ROOTS_H_\n'
-            '#define RTC_BASE_SSL_ROOTS_H_\n\n'
-            '// This file is the root certificates in C form.\n\n'
-            '// It was generated with the following script:\n'
-            '// tools_webrtc/sslroots/generate_sslroots.py'
-            ' %s_CA_bundle.pem\n\n'
-            '// clang-format off\n'
-            '// Don\'t bother formatting generated code,\n'
-            '// also it would breaks subject/issuer lines.\n\n' % bundle_type)
+def _CreateHexList(items: ByteString) -> List[str]:
+  """
+  Produces list of strings each item is hex literal of byte of source sequence
+  """
+  return [f'0x{item:02X}' for item in items]
+
+
+def _CArrayConstantDefinition(type_name: str,
+                              array_name: str,
+                              items: List[Any],
+                              max_items_per_line: int = 1) -> str:
+  """
+  Produces C array definition like: `const type_name array_name = { items };`
+  """
+  return (f'const {type_name} {array_name}[{len(items)}]='
+          f'{_CArrayInitializerList(items, max_items_per_line)};')
+
+
+def _CArrayInitializerList(items: List[Any],
+                           max_items_per_line: int = 1) -> str:
+  """
+  Produces C initializer list like: `{\\nitems[0], \\n ...}`
+  """
+  return '{\n' + '\n'.join([
+      ','.join(items[i:i + max_items_per_line]) + ','
+      for i in range(0, len(items), max_items_per_line)
+  ]) + '\n}'
+
+
+def _CreateCertificateMetadataHeader(cert: x509.Certificate) -> str:
+  return (f'/* subject: {cert.subject.human_friendly} */\n'
+          f'/* issuer: {cert.issuer.human_friendly} */\n'
+          f'/* link: https://crt.sh/?q={cert.sha256.hex()} */\n')
+
+
+def _CreateOutputHeader(source_path_or_url: str, source_digest: str) -> str:
+  now_utc = datetime.now(timezone.utc).replace(microsecond=0)
+  output = (
+      '/*\n'
+      f' *  Copyright {now_utc.year} The WebRTC Project Authors. All rights '
+      'reserved.\n'
+      ' *\n'
+      ' *  Use of this source code is governed by a BSD-style license\n'
+      ' *  that can be found in the LICENSE file in the root of the '
+      'source\n'
+      ' *  tree. An additional intellectual property rights grant can be '
+      'found\n'
+      ' *  in the file PATENTS.  All contributing project authors may\n'
+      ' *  be found in the AUTHORS file in the root of the source tree.\n'
+      ' */\n\n'
+      '#ifndef RTC_BASE_SSL_ROOTS_H_\n'
+      '#define RTC_BASE_SSL_ROOTS_H_\n\n'
+      '// This file is the root certificates in C form.\n\n'
+      f'// It was generated at {now_utc.isoformat()} by the following script:\n'
+      '// `tools_webrtc/sslroots/generate_sslroots.py '
+      f'{source_path_or_url}`\n\n'
+      '// clang-format off\n'
+      '// Don\'t bother formatting generated code,\n'
+      '// also it would breaks subject/issuer lines.\n\n'
+      f'// Source bundle `{source_path_or_url}` digest is [{source_digest}]\n\n'
+  )
   return output
 
 
@@ -197,33 +240,5 @@
   return '// clang-format on\n\n#endif  // RTC_BASE_SSL_ROOTS_H_\n'
 
 
-def _CreateArraySectionHeader(type_name, type_type, options):
-  output = ('const %s kSSLCert%sList[] = {\n') % (type_type, type_name)
-  _PrintOutput(output, options)
-  return output
-
-
-def _AddLabelToArray(label, type_name):
-  return ' %s_%s,\n' % (label, type_name)
-
-
-def _CreateArraySectionFooter():
-  return '};\n\n'
-
-
-def _SafeName(original_file_name):
-  bad_chars = ' -./\\()áéíőú\r\n'
-  replacement_chars = ''
-  for _ in bad_chars:
-    replacement_chars += '_'
-  translation_table = str.maketrans(bad_chars, replacement_chars)
-  return original_file_name.translate(translation_table)
-
-
-def _PrintOutput(output, options):
-  if options.verbose:
-    print(output)
-
-
 if __name__ == '__main__':
   main()