#!/usr/bin/env python3 # Copyright (c) 2026 The WebRTC project authors. All Rights Reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. """Script to summarize external contributors to the libwebrtc codebase. """ import subprocess from collections import defaultdict import operator from dataclasses import dataclass @dataclass class CommitSummaries: """Holds various summaries of commits.""" author_counts: defaultdict domain_counts: defaultdict domain_non_hancke_counts: defaultdict monthly_summary: defaultdict monthly_non_hancke_summary: defaultdict monthly_all_summary: defaultdict def get_external_commits(): # Define corporate domains and service account patterns to exclude corporate_domains = ('chromium.org', 'webrtc.org', 'google.com', 'gserviceaccount.com') # Use origin/main as the reference for the main branch # We use --since="3 years ago" and format to get author email # and commit date cmd = [ 'git', 'log', 'origin/main', '--since="3 years ago"', '--format=%ae %cs' ] try: output = subprocess.check_output( cmd, stderr=subprocess.STDOUT).decode('utf-8') except subprocess.CalledProcessError as err: print(f"Error running git log: {err.output.decode('utf-8')}") return CommitSummaries(defaultdict(int), defaultdict(int), defaultdict(int), defaultdict(int), defaultdict(int), defaultdict(int)) monthly_summary = defaultdict(int) monthly_non_hancke_summary = defaultdict(int) monthly_all_summary = defaultdict(int) author_counts = defaultdict(int) domain_counts = defaultdict(int) domain_non_hancke_counts = defaultdict(int) for line in output.strip().split('\n'): if not line: continue parts = line.split(' ') if len(parts) < 2: continue email = parts[0].lower() # Normalize to lowercase date_str = parts[1] month_key = date_str[:7] # Exclude common bot/service account prefixes from all totals if email.startswith('webrtc-version-updater') or email.startswith( 'chromium-webrtc-autoroll'): continue # Track all commits per month (before filtering) monthly_all_summary[month_key] += 1 # Check if email domain is corporate or a service account domain = email.split('@')[-1] if '@' in email else '' if any(domain == d or domain.endswith('.' + d) for d in corporate_domains): continue # Update author counts author_counts[email] += 1 domain_counts[domain] += 1 # Convert date_str (YYYY-MM-DD) to YYYY-MM for monthly summary monthly_summary[month_key] += 1 # Track commits not authored by "hancke" if 'hancke' not in email: monthly_non_hancke_summary[month_key] += 1 domain_non_hancke_counts[domain] += 1 return CommitSummaries(author_counts, domain_counts, domain_non_hancke_counts, monthly_summary, monthly_non_hancke_summary, monthly_all_summary) def main(): summaries = get_external_commits() author_counts = summaries.author_counts domain_counts = summaries.domain_counts domain_non_hancke_counts = summaries.domain_non_hancke_counts monthly_summary = summaries.monthly_summary monthly_non_hancke_summary = summaries.monthly_non_hancke_summary monthly_all_summary = summaries.monthly_all_summary if not author_counts: print("No external commits found in the last 3 years.") return # Sort authors by commit count descending top_20_authors = sorted(author_counts.items(), key=operator.itemgetter(1), reverse=True)[:20] print("Top 20 External Committers (Last 3 Years):") print(f"{'Author Email':<40} | {'Commits':<8}") print("-" * 52) for email, count in top_20_authors: print(f"{email:<40} | {count:<8}") # Sort domains by commit count descending top_20_domains = sorted(domain_counts.items(), key=operator.itemgetter(1), reverse=True)[:20] print("\nTop 20 External Domains (Last 3 Years):") print(f"{'Domain':<40} | {'External':<10} | {'Non-Hancke':<12}") print("-" * 68) for domain, count in top_20_domains: non_hancke = domain_non_hancke_counts.get(domain, 0) print(f"{domain:<40} | {count:<10} | {non_hancke:<12}") print("\nMonthly Summary of Commits:") print(f"{'Month':<10} | {'All':<8} | {'External':<8} | {'Non-Hancke':<12}") print("-" * 49) # We use monthly_all_summary keys to ensure we cover all months with # activity for month in sorted(monthly_all_summary.keys(), reverse=True): all_commits = monthly_all_summary[month] external = monthly_summary.get(month, 0) non_hancke = monthly_non_hancke_summary.get(month, 0) print(f"{month:<10} | {all_commits:<8} | {external:<8} | " f"{non_hancke:<12}") if __name__ == "__main__": main()