blob: 395a34b53df65d60ec78ab636a33de9e51690e1e [file]
#!/usr/bin/env python3
# Copyright (c) 2026 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
"""Script to summarize external contributors to the libwebrtc codebase. """
import subprocess
from collections import defaultdict
import operator
from dataclasses import dataclass
@dataclass
class CommitSummaries:
"""Holds various summaries of commits."""
author_counts: defaultdict
domain_counts: defaultdict
domain_non_hancke_counts: defaultdict
monthly_summary: defaultdict
monthly_non_hancke_summary: defaultdict
monthly_all_summary: defaultdict
def get_external_commits():
# Define corporate domains and service account patterns to exclude
corporate_domains = ('chromium.org', 'webrtc.org', 'google.com',
'gserviceaccount.com')
# Use origin/main as the reference for the main branch
# We use --since="3 years ago" and format to get author email
# and commit date
cmd = [
'git', 'log', 'origin/main', '--since="3 years ago"',
'--format=%ae %cs'
]
try:
output = subprocess.check_output(
cmd, stderr=subprocess.STDOUT).decode('utf-8')
except subprocess.CalledProcessError as err:
print(f"Error running git log: {err.output.decode('utf-8')}")
return CommitSummaries(defaultdict(int), defaultdict(int),
defaultdict(int), defaultdict(int),
defaultdict(int), defaultdict(int))
monthly_summary = defaultdict(int)
monthly_non_hancke_summary = defaultdict(int)
monthly_all_summary = defaultdict(int)
author_counts = defaultdict(int)
domain_counts = defaultdict(int)
domain_non_hancke_counts = defaultdict(int)
for line in output.strip().split('\n'):
if not line:
continue
parts = line.split(' ')
if len(parts) < 2:
continue
email = parts[0].lower() # Normalize to lowercase
date_str = parts[1]
month_key = date_str[:7]
# Exclude common bot/service account prefixes from all totals
if email.startswith('webrtc-version-updater') or email.startswith(
'chromium-webrtc-autoroll'):
continue
# Track all commits per month (before filtering)
monthly_all_summary[month_key] += 1
# Check if email domain is corporate or a service account
domain = email.split('@')[-1] if '@' in email else ''
if any(domain == d or domain.endswith('.' + d)
for d in corporate_domains):
continue
# Update author counts
author_counts[email] += 1
domain_counts[domain] += 1
# Convert date_str (YYYY-MM-DD) to YYYY-MM for monthly summary
monthly_summary[month_key] += 1
# Track commits not authored by "hancke"
if 'hancke' not in email:
monthly_non_hancke_summary[month_key] += 1
domain_non_hancke_counts[domain] += 1
return CommitSummaries(author_counts, domain_counts,
domain_non_hancke_counts, monthly_summary,
monthly_non_hancke_summary, monthly_all_summary)
def main():
summaries = get_external_commits()
author_counts = summaries.author_counts
domain_counts = summaries.domain_counts
domain_non_hancke_counts = summaries.domain_non_hancke_counts
monthly_summary = summaries.monthly_summary
monthly_non_hancke_summary = summaries.monthly_non_hancke_summary
monthly_all_summary = summaries.monthly_all_summary
if not author_counts:
print("No external commits found in the last 3 years.")
return
# Sort authors by commit count descending
top_20_authors = sorted(author_counts.items(),
key=operator.itemgetter(1),
reverse=True)[:20]
print("Top 20 External Committers (Last 3 Years):")
print(f"{'Author Email':<40} | {'Commits':<8}")
print("-" * 52)
for email, count in top_20_authors:
print(f"{email:<40} | {count:<8}")
# Sort domains by commit count descending
top_20_domains = sorted(domain_counts.items(),
key=operator.itemgetter(1),
reverse=True)[:20]
print("\nTop 20 External Domains (Last 3 Years):")
print(f"{'Domain':<40} | {'External':<10} | {'Non-Hancke':<12}")
print("-" * 68)
for domain, count in top_20_domains:
non_hancke = domain_non_hancke_counts.get(domain, 0)
print(f"{domain:<40} | {count:<10} | {non_hancke:<12}")
print("\nMonthly Summary of Commits:")
print(f"{'Month':<10} | {'All':<8} | {'External':<8} | {'Non-Hancke':<12}")
print("-" * 49)
# We use monthly_all_summary keys to ensure we cover all months with
# activity
for month in sorted(monthly_all_summary.keys(), reverse=True):
all_commits = monthly_all_summary[month]
external = monthly_summary.get(month, 0)
non_hancke = monthly_non_hancke_summary.get(month, 0)
print(f"{month:<10} | {all_commits:<8} | {external:<8} | "
f"{non_hancke:<12}")
if __name__ == "__main__":
main()