|
#!/usr/bin/env python3 |
|
|
|
# /// script |
|
# dependencies = [ |
|
# "tqdm", |
|
# ] |
|
# /// |
|
|
|
# Run as follows: |
|
# uvx typos --format json --no-check-filenames | uv run ./blame_counter.py |
|
# or |
|
# uvx typos --format json --no-check-filenames | uv run https://gist.githubusercontent.com/jvacek/888f840c8acd703429e21d8dec70f531/raw/blame_counter.py |
|
# |
|
# Command-line options: |
|
# --skip-authors Skip the authors leaderboard |
|
# --skip-words Skip the misspelled words leaderboard |
|
# --max-words N Maximum number of words to display (0 = all) |
|
|
|
import argparse |
|
import json |
|
import re |
|
import subprocess |
|
import sys |
|
from collections import Counter |
|
|
|
from tqdm import tqdm |
|
|
|
|
|
def get_author_email_for_line(filepath, line_number): |
|
""" |
|
Runs `git blame` on a specific file and line number to find the author's email. |
|
|
|
Args: |
|
filepath (str): The path to the file. |
|
line_number (str): The line number to check. |
|
|
|
Returns: |
|
str: The email of the author, or None if it could not be determined. |
|
""" |
|
try: |
|
# The --line-porcelain format is stable and easy to parse |
|
command = [ |
|
"git", |
|
"blame", |
|
"--line-porcelain", |
|
f"-L{line_number},{line_number}", |
|
filepath, |
|
] |
|
|
|
# Execute the git blame command |
|
result = subprocess.run( |
|
command, |
|
capture_output=True, |
|
text=True, |
|
check=True, |
|
encoding="utf-8", # Ensure consistent encoding |
|
) |
|
|
|
# Find the line starting with 'author-mail' and extract the email |
|
for line in result.stdout.splitlines(): |
|
if line.startswith("author-mail "): |
|
# The email is enclosed in <...> |
|
email_match = re.search(r"<(.*)>", line) |
|
if email_match: |
|
return email_match.group(1).strip() |
|
|
|
except FileNotFoundError: |
|
# This error occurs if the 'git' command is not found |
|
print( |
|
"Error: 'git' command not found. Is Git installed and in your PATH?", |
|
file=sys.stderr, |
|
) |
|
# Exit the script if git isn't available |
|
sys.exit(1) |
|
except subprocess.CalledProcessError as e: |
|
# This error occurs if git blame fails (e.g., file not in git, line number invalid) |
|
print( |
|
f"Warning: `git blame` failed for {filepath}:{line_number}. Error: {e.stderr.strip()}", |
|
file=sys.stderr, |
|
) |
|
return None |
|
except Exception as e: |
|
print( |
|
f"An unexpected error occurred while processing {filepath}:{line_number}: {e}", |
|
file=sys.stderr, |
|
) |
|
return None |
|
|
|
print( |
|
f"Warning: Could not determine author for {filepath}:{line_number}", |
|
file=sys.stderr, |
|
) |
|
return None |
|
|
|
|
|
def main(): |
|
""" |
|
Main function to read spellchecker output from stdin, process it, |
|
and print summaries of mistakes per author and per misspelled word. |
|
|
|
Command-line arguments: |
|
--skip-authors: Skip displaying the authors leaderboard |
|
--skip-words: Skip displaying the misspelled words leaderboard |
|
--max-words <int>: Maximum number of words to display (0 = all) |
|
""" |
|
# Parse command line arguments |
|
parser = argparse.ArgumentParser( |
|
description="Process spelling mistakes and attribute them to authors." |
|
) |
|
parser.add_argument( |
|
"--skip-authors", action="store_true", help="Skip the authors leaderboard" |
|
) |
|
parser.add_argument( |
|
"--skip-words", |
|
action="store_true", |
|
help="Skip the misspelled words leaderboard", |
|
) |
|
parser.add_argument( |
|
"--max-words", |
|
type=int, |
|
default=10, |
|
help="Maximum number of words to display (0 = all)", |
|
) |
|
args = parser.parse_args() |
|
|
|
author_counts = Counter() |
|
word_counts = Counter() |
|
|
|
print("Processing spellchecker output from stdin...", file=sys.stderr) |
|
print("This may take a moment depending on the number of errors.", file=sys.stderr) |
|
|
|
# Read each line from the standard input |
|
for line in tqdm(sys.stdin): |
|
if not line.strip(): # Skip empty lines |
|
continue |
|
|
|
try: |
|
data = json.loads(line) |
|
|
|
# Basic validation for required keys in the JSON object |
|
if "path" not in data or "line_num" not in data or "typo" not in data: |
|
print( |
|
f"Warning: Skipping JSON object with missing keys: {line.strip()}", |
|
file=sys.stderr, |
|
) |
|
continue |
|
|
|
filepath = data["path"] |
|
# The git blame function expects a string for the line number |
|
line_number = str(data["line_num"]) |
|
misspelled_word = data["typo"] |
|
|
|
# Track the count of each misspelled word |
|
word_counts[misspelled_word] += 1 |
|
if not args.skip_authors: |
|
author_email = get_author_email_for_line(filepath, line_number) |
|
|
|
if author_email: |
|
author_counts[author_email] += 1 |
|
|
|
except json.JSONDecodeError: |
|
print( |
|
f"Warning: Skipping malformed JSON line: {line.strip()}", |
|
file=sys.stderr, |
|
) |
|
continue |
|
|
|
# --- Print the final results --- |
|
|
|
# Print author leaderboard if not skipped |
|
if not args.skip_authors: |
|
if not author_counts: |
|
print("\nNo mistakes were successfully attributed to an author.") |
|
else: |
|
print("\n--- Spelling Mistake Leaderboard (by Email) ---") |
|
for email, count in author_counts.most_common(): |
|
plural = "s" if count > 1 else "" |
|
print(f"{email}: {count} mistake{plural}") |
|
|
|
# Print word leaderboard if not skipped |
|
if not args.skip_words: |
|
if not word_counts: |
|
print("\nNo misspelled words were found.") |
|
else: |
|
print("\n--- Most Common Misspellings ---") |
|
# If max_words is specified and > 0, limit the number of words displayed |
|
word_items = word_counts.most_common() |
|
if args.max_words > 0: |
|
word_items = word_items[: args.max_words] |
|
|
|
for word, count in word_items: |
|
plural = "s" if count > 1 else "" |
|
print(f"'{word}': {count} time{plural}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |