Skip to content

Instantly share code, notes, and snippets.

@ShaneTsui
Last active October 20, 2023 09:09
Show Gist options
  • Select an option

  • Save ShaneTsui/59dfde9a1014c7c1702e3a71c5c281ec to your computer and use it in GitHub Desktop.

Select an option

Save ShaneTsui/59dfde9a1014c7c1702e3a71c5c281ec to your computer and use it in GitHub Desktop.
Personal git repo contribution statistics based on git log
import os
import re
import math
import subprocess
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
import logging
logging.basicConfig(level=logging.INFO)
def execute_command(cmd):
"""Execute shell command and return its output."""
try:
return subprocess.getoutput(cmd).strip()
except Exception as e:
logging.error(f"Failed to execute command: {cmd}, Error: {e}")
return ''
def process_time_args(since, until):
"""Generate time arguments for git commands."""
time_args = ""
if since:
time_args += f" --since='{since}'"
if until:
time_args += f" --until='{until}'"
return time_args
def analyze_repo(repo, author, line_stats, daily_commits, since=None, until=None):
os.chdir(repo)
time_args = ""
if since:
time_args += f" --since='{since}'"
if until:
time_args += f" --until='{until}'"
cmd_line_stats = (f"git log --author={author}{time_args} --pretty=tformat: --numstat | "
"awk '{ add += $1; subs += $2; loc += $1 - $2 } END { printf \"%s,%s,%s\", add, subs, loc }'")
output = execute_command(cmd_line_stats).split(',')
if '' not in output and len(output) == 3:
added, removed, total = map(int, output)
line_stats[repo] = {'added': added, 'removed': removed, 'total': total}
else:
print(f"No commits found in the time range for repo {repo}")
cmd_daily_commits = f"git log --author={author} --pretty='%ad' --date=format:'%Y-%m-%d'"
output = execute_command(cmd_daily_commits).split('\n')
for date in output:
daily_commits[date] += 1
os.chdir('..')
def add_text_labels(ax, values, bar_positions, bar_width, vertical_offset=0.1):
"""Add text labels to bars in a bar chart."""
for index, value in enumerate(values):
text_position_x = bar_positions[index] + bar_width / 2 - 0.15
if value > 0:
log_value = math.log10(value)
else:
log_value = 0
text_position_y = 10 ** (log_value + vertical_offset)
ax.text(text_position_x, text_position_y, str(value), ha='center')
def plot_line_stats(author, stats):
"""Plot a bar chart for line statistics."""
repos = list(stats.keys())
added = [stats[repo]['added'] for repo in repos]
removed = [stats[repo]['removed'] for repo in repos]
total = [stats[repo]['total'] for repo in repos]
sum_added = sum(added)
sum_removed = sum(removed)
sum_total = sum(total)
barWidth = 0.3
r1 = np.arange(len(repos))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
fig, ax = plt.subplots(figsize=(15, 7))
plt.subplots_adjust(right=0.8)
ax.bar(r1, added, color='g', width=barWidth, label='added', log=True)
ax.bar(r2, removed, color='r', width=barWidth, label='removed', log=True)
ax.bar(r3, total, color='b', width=barWidth, label='total', log=True)
add_text_labels(ax, added, r1, barWidth)
add_text_labels(ax, removed, r2, barWidth)
add_text_labels(ax, total, r3, barWidth)
plt.xlabel('Repositories', fontsize=15)
plt.ylabel('Line counts (Log scale)', fontsize=15)
plt.title(f'Line Stats by {author}', fontsize=15)
plt.xticks([r + barWidth for r in range(len(repos))], repos, rotation=45, ha="right", rotation_mode="anchor")
legend_text = f"Total Added: {sum_added}\nTotal of Removed: {sum_removed}\nTotal of Total: {sum_total}"
ax.text(1.05, 1, legend_text, transform=ax.transAxes, fontsize=12, verticalalignment='top', horizontalalignment='left')
plt.legend()
plt.show()
def plot_daily_commits(author, daily_commit_count):
dates, commit_counts = zip(*sorted(daily_commit_count.items())) # Sort based on dates
total_commits = sum(commit_counts)
fig, ax = plt.subplots()
plt.subplots_adjust(right=0.8) # Make space for the external legend
ax.bar(dates, commit_counts, color='b')
ax.set(xlabel='Dates', ylabel='Commit Counts',
title=f'Daily Commit Counts by {author}')
ax.grid()
# Adding a total commit count legend in the top right corner
legend_text = f"Total Commits: {total_commits}"
ax.text(1.05, 1, legend_text, transform=ax.transAxes, fontsize=12, verticalalignment='top', horizontalalignment='left')
plt.xticks(rotation=45, ha="right", rotation_mode="anchor") # Rotate labels for better visibility
plt.show()
def get_daily_line_stats_for_repo(repo, author, daily_line_stats, since=None, until=None):
os.chdir(repo)
time_args = ""
if since:
time_args += f" --since='{since}'"
if until:
time_args += f" --until='{until}'"
cmd = f"git log --author={author}{time_args} --pretty='%ad' --date=format:'%Y-%m-%d' --numstat"
output = subprocess.getoutput(cmd).split('\n')
current_date = ""
date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")
for line in output:
if date_pattern.match(line):
current_date = line
elif line.count('\t') == 2:
added, removed, _ = map(str.strip, line.split('\t'))
if added.replace('-', '').isdigit() and removed.replace('-', '').isdigit():
daily_line_stats[current_date]['added'] += int(added)
daily_line_stats[current_date]['removed'] += int(removed)
daily_line_stats[current_date]['total'] += int(added) - int(removed)
os.chdir("..")
def plot_line_stats_per_day(author, daily_line_stats):
dates = sorted(daily_line_stats.keys())
added = [daily_line_stats[date]['added'] for date in dates]
removed = [daily_line_stats[date]['removed'] for date in dates]
total = [daily_line_stats[date]['total'] for date in dates]
plt.figure(figsize=(15, 7))
plt.stackplot(dates, added, removed, total, labels=['Added', 'Removed', 'Total'])
plt.xlabel('Dates')
plt.ylabel('Line counts (Log scale)')
plt.yscale('log') # Set the y-axis to a logarithmic scale
plt.title(f'Line Stats Per Day Across All Repositories by {author}')
plt.legend()
plt.xticks(rotation=45, ha="right", rotation_mode="anchor") # Rotate labels for better visibility
plt.show()
def main():
# [IMPORTANT] Change the config below before executing the script
repos = ['hello-world'] # Modify this line to specify the local repo path you want to analyze
author = "ShaneTsui" # Modify this line to specify a different author
since_date = '2023-09-01' # Optional: specify the start date
until_date = '2023-12-31' # Optional: specify the end date
line_stats = {}
daily_commits = defaultdict(int)
daily_line_stats = defaultdict(lambda: {'added': 0, 'removed': 0, 'total': 0})
for repo in repos:
if os.path.isdir(repo):
analyze_repo(repo, author, line_stats, daily_commits, since=since_date, until=until_date)
get_daily_line_stats_for_repo(repo, author, daily_line_stats, since=since_date, until=until_date)
# Plotting
plot_line_stats(author, line_stats)
plot_daily_commits(author, daily_commits)
plot_line_stats_per_day(author, daily_line_stats)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment