Last active
October 20, 2023 09:09
-
-
Save ShaneTsui/59dfde9a1014c7c1702e3a71c5c281ec to your computer and use it in GitHub Desktop.
Personal git repo contribution statistics based on git log
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| import math | |
| import subprocess | |
| import matplotlib.pyplot as plt | |
| from collections import defaultdict | |
| import numpy as np | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| def execute_command(cmd): | |
| """Execute shell command and return its output.""" | |
| try: | |
| return subprocess.getoutput(cmd).strip() | |
| except Exception as e: | |
| logging.error(f"Failed to execute command: {cmd}, Error: {e}") | |
| return '' | |
| def process_time_args(since, until): | |
| """Generate time arguments for git commands.""" | |
| time_args = "" | |
| if since: | |
| time_args += f" --since='{since}'" | |
| if until: | |
| time_args += f" --until='{until}'" | |
| return time_args | |
| def analyze_repo(repo, author, line_stats, daily_commits, since=None, until=None): | |
| os.chdir(repo) | |
| time_args = "" | |
| if since: | |
| time_args += f" --since='{since}'" | |
| if until: | |
| time_args += f" --until='{until}'" | |
| cmd_line_stats = (f"git log --author={author}{time_args} --pretty=tformat: --numstat | " | |
| "awk '{ add += $1; subs += $2; loc += $1 - $2 } END { printf \"%s,%s,%s\", add, subs, loc }'") | |
| output = execute_command(cmd_line_stats).split(',') | |
| if '' not in output and len(output) == 3: | |
| added, removed, total = map(int, output) | |
| line_stats[repo] = {'added': added, 'removed': removed, 'total': total} | |
| else: | |
| print(f"No commits found in the time range for repo {repo}") | |
| cmd_daily_commits = f"git log --author={author} --pretty='%ad' --date=format:'%Y-%m-%d'" | |
| output = execute_command(cmd_daily_commits).split('\n') | |
| for date in output: | |
| daily_commits[date] += 1 | |
| os.chdir('..') | |
| def add_text_labels(ax, values, bar_positions, bar_width, vertical_offset=0.1): | |
| """Add text labels to bars in a bar chart.""" | |
| for index, value in enumerate(values): | |
| text_position_x = bar_positions[index] + bar_width / 2 - 0.15 | |
| if value > 0: | |
| log_value = math.log10(value) | |
| else: | |
| log_value = 0 | |
| text_position_y = 10 ** (log_value + vertical_offset) | |
| ax.text(text_position_x, text_position_y, str(value), ha='center') | |
| def plot_line_stats(author, stats): | |
| """Plot a bar chart for line statistics.""" | |
| repos = list(stats.keys()) | |
| added = [stats[repo]['added'] for repo in repos] | |
| removed = [stats[repo]['removed'] for repo in repos] | |
| total = [stats[repo]['total'] for repo in repos] | |
| sum_added = sum(added) | |
| sum_removed = sum(removed) | |
| sum_total = sum(total) | |
| barWidth = 0.3 | |
| r1 = np.arange(len(repos)) | |
| r2 = [x + barWidth for x in r1] | |
| r3 = [x + barWidth for x in r2] | |
| fig, ax = plt.subplots(figsize=(15, 7)) | |
| plt.subplots_adjust(right=0.8) | |
| ax.bar(r1, added, color='g', width=barWidth, label='added', log=True) | |
| ax.bar(r2, removed, color='r', width=barWidth, label='removed', log=True) | |
| ax.bar(r3, total, color='b', width=barWidth, label='total', log=True) | |
| add_text_labels(ax, added, r1, barWidth) | |
| add_text_labels(ax, removed, r2, barWidth) | |
| add_text_labels(ax, total, r3, barWidth) | |
| plt.xlabel('Repositories', fontsize=15) | |
| plt.ylabel('Line counts (Log scale)', fontsize=15) | |
| plt.title(f'Line Stats by {author}', fontsize=15) | |
| plt.xticks([r + barWidth for r in range(len(repos))], repos, rotation=45, ha="right", rotation_mode="anchor") | |
| legend_text = f"Total Added: {sum_added}\nTotal of Removed: {sum_removed}\nTotal of Total: {sum_total}" | |
| ax.text(1.05, 1, legend_text, transform=ax.transAxes, fontsize=12, verticalalignment='top', horizontalalignment='left') | |
| plt.legend() | |
| plt.show() | |
| def plot_daily_commits(author, daily_commit_count): | |
| dates, commit_counts = zip(*sorted(daily_commit_count.items())) # Sort based on dates | |
| total_commits = sum(commit_counts) | |
| fig, ax = plt.subplots() | |
| plt.subplots_adjust(right=0.8) # Make space for the external legend | |
| ax.bar(dates, commit_counts, color='b') | |
| ax.set(xlabel='Dates', ylabel='Commit Counts', | |
| title=f'Daily Commit Counts by {author}') | |
| ax.grid() | |
| # Adding a total commit count legend in the top right corner | |
| legend_text = f"Total Commits: {total_commits}" | |
| ax.text(1.05, 1, legend_text, transform=ax.transAxes, fontsize=12, verticalalignment='top', horizontalalignment='left') | |
| plt.xticks(rotation=45, ha="right", rotation_mode="anchor") # Rotate labels for better visibility | |
| plt.show() | |
| def get_daily_line_stats_for_repo(repo, author, daily_line_stats, since=None, until=None): | |
| os.chdir(repo) | |
| time_args = "" | |
| if since: | |
| time_args += f" --since='{since}'" | |
| if until: | |
| time_args += f" --until='{until}'" | |
| cmd = f"git log --author={author}{time_args} --pretty='%ad' --date=format:'%Y-%m-%d' --numstat" | |
| output = subprocess.getoutput(cmd).split('\n') | |
| current_date = "" | |
| date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}") | |
| for line in output: | |
| if date_pattern.match(line): | |
| current_date = line | |
| elif line.count('\t') == 2: | |
| added, removed, _ = map(str.strip, line.split('\t')) | |
| if added.replace('-', '').isdigit() and removed.replace('-', '').isdigit(): | |
| daily_line_stats[current_date]['added'] += int(added) | |
| daily_line_stats[current_date]['removed'] += int(removed) | |
| daily_line_stats[current_date]['total'] += int(added) - int(removed) | |
| os.chdir("..") | |
| def plot_line_stats_per_day(author, daily_line_stats): | |
| dates = sorted(daily_line_stats.keys()) | |
| added = [daily_line_stats[date]['added'] for date in dates] | |
| removed = [daily_line_stats[date]['removed'] for date in dates] | |
| total = [daily_line_stats[date]['total'] for date in dates] | |
| plt.figure(figsize=(15, 7)) | |
| plt.stackplot(dates, added, removed, total, labels=['Added', 'Removed', 'Total']) | |
| plt.xlabel('Dates') | |
| plt.ylabel('Line counts (Log scale)') | |
| plt.yscale('log') # Set the y-axis to a logarithmic scale | |
| plt.title(f'Line Stats Per Day Across All Repositories by {author}') | |
| plt.legend() | |
| plt.xticks(rotation=45, ha="right", rotation_mode="anchor") # Rotate labels for better visibility | |
| plt.show() | |
| def main(): | |
| # [IMPORTANT] Change the config below before executing the script | |
| repos = ['hello-world'] # Modify this line to specify the local repo path you want to analyze | |
| author = "ShaneTsui" # Modify this line to specify a different author | |
| since_date = '2023-09-01' # Optional: specify the start date | |
| until_date = '2023-12-31' # Optional: specify the end date | |
| line_stats = {} | |
| daily_commits = defaultdict(int) | |
| daily_line_stats = defaultdict(lambda: {'added': 0, 'removed': 0, 'total': 0}) | |
| for repo in repos: | |
| if os.path.isdir(repo): | |
| analyze_repo(repo, author, line_stats, daily_commits, since=since_date, until=until_date) | |
| get_daily_line_stats_for_repo(repo, author, daily_line_stats, since=since_date, until=until_date) | |
| # Plotting | |
| plot_line_stats(author, line_stats) | |
| plot_daily_commits(author, daily_commits) | |
| plot_line_stats_per_day(author, daily_line_stats) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment