#!/usr/bin/env python3 """ _ _ _ _ | |__| |__ _ _ __ ___ ___| |_ _ _ __ _ __| |_____ _ _ | '_ \ / _` | ' \/ -_)___| _| '_/ _` / _| / / -_) '_| |_.__/_\__,_|_|_|_\___| \__|_| \__,_\__|_\_\___|_| Author: Adam McDaniel File: blame-tracker.py Date: 01/25/2022 Description: This script accuses the author of commits of all files between two dates in ISO 8601 format. License: GNU General Public License v3.0 """ ################################################## # Import libraries ################################################## import subprocess # For running git commands import glob # For finding files import argparse # For reading command line arguments import re # For extracting the information from the `git blame` output from sys import stdout, stderr # For printing to stderr from os.path import basename, abspath # For getting the filename from a path from datetime import datetime, timedelta # For parsing dates ################################################## # Constants ################################################## # The name of this program. PROGRAM_NAME = basename(__file__) # The default format string for printing accusations. DEFAULT_FORMAT = '{name:12} ({author} on {date} at {time}): {content}' ################################################## # Main ################################################## def main(): ''' Main function. Parses command line arguments, collects the accusations, and prints them to the output file. ''' ################################################## # Parse command line arguments ################################################## # Create the argument parser parser = argparse.ArgumentParser( prog = PROGRAM_NAME, description='Adam McDaniel -- This script accuses an author of commits of all files between two dates in ISO 8601 format.') # Add arguments to the parser parser.add_argument('-r', '--repo', type=str, help='The directory📁 to search for files in (default is ".").', default='./') parser.add_argument('-by', '--author', type=str, help='The name📛 of the author to blame', nargs='+', default=[]) parser.add_argument('-t0', '--since', dest='start_date', type=str, help='The start date📅 in ISO 8601 format.') parser.add_argument('-t1', '--until', dest='end_date', type=str, help='The end date📅 in ISO 8601 format (default is now).', default=datetime.now().isoformat()) parser.add_argument('-d', '--days-ago', type=int, help='The number of days ago⏱️ to shift the search.', default=0) parser.add_argument('-w', '--weeks-ago', type=int, help='The number of weeks ago⏱️ to shift the search.', default=0) parser.add_argument('-m', '--minutes-ago', type=int, help='The number of minutes ago⏱️ to shift the search.', default=0) parser.add_argument('-in', '--include', dest='include', type=str, help='The file patterns📂 to search for (default is "**/*").', default=['**/*'], nargs='+') parser.add_argument('-ex', '--exclude', type=str, help='The file patterns📂 to exclude (default is none).', default=[], nargs='+') parser.add_argument('-f', '--format', type=str, help=f'The format string🧶 to print each accusation (default is "{DEFAULT_FORMAT}")', default=DEFAULT_FORMAT) parser.add_argument('-o', '--output', type=str, help='The output file📝 to write the accusations to.') parser.add_argument('-i', '--info', action='store_const', const=1, dest='verbose', help='Print info messagesℹ️ (default is disabled).', default=0) parser.add_argument('-v', '--verbose', type=int, choices=range(1, 4), help='Level of verbose output📢 (default is 0).', default=0) parser.add_argument('-ws', '--keep-whitespace', action="store_true", help='Keep whitespace in blame output📜 (default is false).', default=False) parser.add_argument('-s', '--silence-warnings', action='store_true', help='Silence warnings🔇 (default is false).', default=False) # Parse the arguments args = parser.parse_args() ################################################## # Validate arguments ################################################## # Convert the author names to lowercase args.author = [author.lower() for author in args.author] if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info("Calculating start date...") # Calculate the start date using the arguments if args.start_date or args.days_ago or args.weeks_ago: # If a start date is specified, use that. # If no start date is specified, shift the current date. start_date = parse_date(args.start_date) if args.start_date else datetime.now() start_date -= timedelta(days=args.days_ago, weeks=args.weeks_ago) else: # If no start date is specified, use the beginning of time. start_date = datetime.min if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info("Parsing end date...") # Parse the end date end_date = parse_date(args.end_date) # Check that end date is after start date if end_date < start_date: error('end date must be after start date.') ################################################## # Begin accusing ################################################## if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info("Collecting accusations...") # Get the list of files to search files = get_files(args.repo, args.include, args.exclude) if args.verbose >= 1: # Print info messages if verbose level is 1 or higher info(f"searching for blame in:") for i, file in enumerate(files): info(f" {i+1}. {file}") # Collect the accusations accusations_by_file = accuse_files(args.repo, files, args.verbose, args.silence_warnings) if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info(f"writing accusations for {len(accusations_by_file)} files...") ################################################## # Write the accusations to the output file ################################################## # Get the output file output_file = open(args.output, 'w') if args.output else stdout # Print the accusations to the output file for file_path, file_lines in accusations_by_file.items(): # Unpack each accusation for author, date, content in file_lines: # If the user specified an author, filter out accusations by # other authors. If the user did not specify an author, include # all accusations. if args.author and author.lower() not in args.author: if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info(f'ignoring accusation by {author} in {file_path}') continue # If the user specified to ignore whitespace, filter out # accusations that are only whitespace. if not args.keep_whitespace and content.strip() == '': if args.verbose >= 3: # Print debug information if verbose level is 3 or higher info(f'ignoring whitespace accusation by {author} in {file_path}') continue # If the accusation is within the date range, print it if start_date <= date <= end_date: # Get the format string specified by the user format_string = args.format # Put the accusation into the desired format formatted_line = format_string.format( name=basename(file_path), path=file_path, author=author, content=content, date=f'{date.month:02d}/{date.day:02d}/{date.year}', time=f'{date.hour:02d}:{date.minute:02d}') # Print the formatted accusation to the output file output_file.write(formatted_line + '\n') # Close the output file if args.output: output_file.close() ################################################## # Print statistics ################################################## # Collect all the accusations by files into a single list all_accusations = [accusation for file_accusations in accusations_by_file.values() for accusation in file_accusations] # Filter the accusations by the date range accusations_within_timeframe = [a for a in all_accusations if start_date <= a[1] <= end_date] if args.verbose: # Print the total number of accusations authors_stats = analyze_authors(accusations_within_timeframe) # Calculate the total number of non-blank characters total_non_blank = sum([stats['non-blank'] for stats in authors_stats.values()]) for author, stats in authors_stats.items(): # Print the statistics for each author info(f'{author}:') info(f' {stats["chars"]} characters') info(f' {stats["lines"]} lines') info(f' {stats["non-blank-lines"]} non-whitespace lines') info(f' {stats["two-or-more-char-lines"]} two-or-more-char lines') info(f' {stats["non-blank"]} non-whitespace characters') # Print the percentage of non-blank characters written by the author info(f' Composes {stats["non-blank"] / float(total_non_blank) * 100:2.0f}% of changes since {start_date.month:02d}/{start_date.day:02d}/{start_date.year}') if not all_accusations and not args.silence_warnings: # Print a message if no authors were found warn('No accusations found. Your filters might have excluded all the available files. There also might not be any commits within the specified date range, or at all. Try using the --verbose flag to see more information. Additionally, you might try adjusting the date range using the --start-date and --end-date flags.') ################################################## # End ################################################## if args.verbose and args.output: info(f'Accusations written to {args.output}.') ################################################## # Helper functions ################################################## def analyze_author(author: str, accusations: list[tuple[str, datetime, str]]) -> dict[str, int]: ''' Analyzes a list of accusations and returns a dictionary of statistics. Parameters ---------- author : str The author to filter the accusations by. accusations : list[tuple[str, datetime, str]] The list of accusations to analyze. Each accusation is a tuple of the author, date, and the line content commited. Returns ------- dict[str, int] A dictionary of statistics for the author. ''' # Convert the author name to lowercase author = author.lower() # Filter the accusations by the author accusations_by_author = [a for a, _, _ in accusations if a.lower() == author] # Analyze the accusations stats = analyze_authors(accusations_by_author) # Return the statistics for the author if stats.get(author) is not None: return stats[author] else: # If the author has no accusations, return 0 for all statistics return {'lines': 0, 'chars': 0, 'non-blank': 0, 'two-or-more-char-lines': 0, 'non-blank-lines': 0, 'avg-line-len': 0} def analyze_authors(accusations: list[tuple[str, datetime, str]]) -> dict[str, dict[str, int]]: ''' Analyzes a list of accusations and returns a dictionary of users to dictionaries of statistics. Parameters ---------- accusations : list[tuple[str, datetime, str]] The list of accusations to analyze. Each accusation is a tuple of the author, date, and the line content commited. Returns ------- dict[str, dict[str, int]] A dictionary of statistics. The key of the outer dictionary is the author. The value of the outer dictionary is a dictionary of statistics for the author. ''' # The dictionary of author statistics result = {} # Calculate the statistics for each author for author, _, content in accusations: author = author.lower() # Get the author's statistics result.setdefault(author, {'lines': 0, 'chars': 0, 'two-or-more-char-lines': 0, 'non-blank': 0, 'non-blank-lines': 0, 'avg-line-len': 0}) result[author]['lines'] += 1 if content.strip() != '': # Check that the line is not blank, then increment the non-blank line count result[author]['non-blank-lines'] += 1 if len(content.strip()) >= 2: # Check that the line is at least two characters long, then increment the two-or-more-char line count result[author]['two-or-more-char-lines'] += 1 result[author]['chars'] += len(content) result[author]['non-blank'] += len(content.replace(' ', '').replace('\t', '')) # Calculate the average line length for each author for stats in result.values(): stats['avg-line-len'] = stats['chars'] / stats['lines'] # Return the author statistics return result def parse_date(date: str) -> datetime: ''' Parses a date string into a datetime object. The date can be formatted either in ISO 8601 format or American date format `m/d/y`. Parameters ---------- date : str The date string to parse. ''' try: # Try to parse the date as an ISO 8601 date return datetime.fromisoformat(date) except ValueError: # Try to parse the date as an American date `m/d/y` try: return datetime.strptime(date, '%m/%d/%Y') except ValueError: # If the date cannot be parsed, throw an error error('date must be in ISO 8601 format or American date format `m/d/y`.') def get_files(directory = "./", include = ['**/*'], exclude = []) -> list[str]: ''' Gets a list of file paths from a list of file patterns, excluding files matching other file patterns. Parameters ---------- directory : str, optional The directory to search (default is the current directory) include : list[str], optional The file patterns to include (default is "**/*") exclude : list[str], optional The file patterns to exclude (default is none) ''' files = set() directory = abspath(directory) # Add files matching file patterns for file_pattern in include: files.update(glob.glob(directory + "/" + file_pattern, recursive=True)) # Add files in subdirectories files.update(glob.glob(directory + "/" + file_pattern + "/**/*", recursive=True)) # Remove files matching exclude patterns for file_pattern in exclude: files.difference_update(glob.glob(directory + "/" + file_pattern, recursive=True)) # Remove files in subdirectories files.difference_update(glob.glob(directory + "/" + file_pattern + "/**/*", recursive=True)) return list(files) def extract_accusations_from_line_porcelain_output(line_porcelain_output: str) -> list[tuple[str, datetime, str]]: ''' Extracts the author, date, and the line content commited from each line of the `git blame --line-porcelain` output using regex. The format of the `git blame --line-porcelain` output can be found here: https://git-scm.com/docs/git-blame#_the_porcelain_format Parameters ---------- line_porcelain_output : str The output of `git blame --line-porcelain`. Returns ------- list[tuple[str, datetime, str]] A list of tuples containing the author, date, and commited content for each line. ''' # The list of accusations to return accusations = [] # Split the `git blame` output by each starting line of a section. # # The first line for each section is composed of: # 1. 40-byte SHA-1 of the commit the line is attributed to. # 2. The line number of the line in the original file. # 3. The line number of the line in the final file. for i, section in enumerate(re.split('[a-fA-F0-9]{40} \d+ \d+.*', line_porcelain_output)): # Skip the first section, which is empty if i == 0: continue # Extract the author, date, filename, and content from the section author = re.search('author (.*)', section).group(1) date_string = re.search('author-time (\d+)', section).group(1) date = datetime.fromtimestamp(int(date_string)) content = section.split('\n')[-2].removeprefix('\t') # Add the accusation to the list accusations.append((author.lower(), date, content)) # Return the list of accusations return accusations def accuse_files(directory: str, files: list[str], verbose: int=0, warnings_disabled=False) -> dict[str, list[tuple[str, datetime, str]]]: ''' Accuses the author of commits of all files between two dates. Parameters ---------- directory : str The directory to search through for commits. files : list[str] The files to search through for commits. verbose : bool, optional Print verbose output (default is false). warnings_disabled : bool, optional Disable warnings (default is false). Returns ------- dict[str, list[tuple[str, datetime, str]]] A dictionary of accusations. The key is the file path, and the value is a list of accusations. Each accusation is a tuple of the author, date, and content. ''' directory = abspath(directory) # Run git blame on each file all_accusations = {} for file in files: if verbose >= 2: # Print file information if verbose level is 2 or higher info(f'Checking {file}...') # Run git blame try: # First attempt UTF-8 to retain unicode characters line_porcelain_output = subprocess.run(['git', 'blame', '--line-porcelain', file], capture_output=True, text=True, cwd=directory, encoding='utf-8').stdout if verbose >= 3: info("Valid UTF-8 file.") except UnicodeDecodeError: # If the file is not UTF-8, try latin1 as a fallback if not warnings_disabled: warn(f'File {file} is not UTF-8. Falling back to latin1 encoding. (You might not want to include this file in your search.)') line_porcelain_output = subprocess.run(['git', 'blame', '--line-porcelain', file], capture_output=True, text=True, cwd=directory, encoding='latin1').stdout if verbose >= 3: # Print debug information if verbose level is 3 or higher info("Extracting accusations from line porcelain output...") # Extract author, date, and content from each line using regex. # Then, add the file accusations to the list of all accusations all_accusations[file] = extract_accusations_from_line_porcelain_output(line_porcelain_output) # Return the list of all accusations return all_accusations def info(*messages: list[str]): ''' Prints an info message to stdout. Parameters ---------- messages : list[str] The info messages to print. ''' print(f'{PROGRAM_NAME}: info:', *messages, file=stderr, flush=True) def warn(*messages: list[str]): ''' Prints an info message to stdout. Parameters ---------- messages : list[str] The info messages to print. ''' print(f'{PROGRAM_NAME}: warning:\x1b[33m', *messages, '\x1b[0m', file=stderr, flush=True) def error(*messages: list[str]): ''' Prints an error message to stderr. Parameters ---------- messages : list[str] The error messages to print. ''' print(f'{PROGRAM_NAME}: error:', *messages, 'see `--help` for more information.', file=stderr) exit(1) ################################################## # Main execution ################################################## if __name__ == '__main__': # Run the main function if this file is run as a script main()