Created
December 19, 2025 05:16
-
-
Save innomatics/c6951ab275cfd2900cb666579b59b459 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # GitHub Repository Statistics Script (GraphQL Version) | |
| # Usage: ./github_stats.sh owner/repo1 owner/repo2 ... | |
| # Requires: GITHUB_TOKEN environment variable for authentication | |
| set -euo pipefail | |
| # Colors for output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| BLUE='\033[0;34m' | |
| NC='\033[0m' # No Color | |
| # Check if GITHUB_TOKEN is set | |
| if [ -z "${GITHUB_TOKEN:-}" ]; then | |
| echo -e "${RED}Error: GITHUB_TOKEN environment variable is not set${NC}" | |
| echo "Please set it with: export GITHUB_TOKEN='your_github_token'" | |
| exit 1 | |
| fi | |
| # Check if repositories are provided | |
| if [ $# -eq 0 ]; then | |
| echo -e "${RED}Error: No repositories provided${NC}" | |
| echo "Usage: $0 owner/repo1 owner/repo2 ..." | |
| exit 1 | |
| fi | |
| # Get current year | |
| CURRENT_YEAR=$(date +%Y) | |
| START_DATE="${CURRENT_YEAR}-01-01T00:00:00Z" | |
| # Temporary directory for data | |
| TEMP_DIR=$(mktemp -d) | |
| trap "rm -rf $TEMP_DIR" EXIT | |
| echo -e "${BLUE}=== GitHub Repository Statistics for ${CURRENT_YEAR} ===${NC}" | |
| echo -e "${BLUE}=== (PRs created in ${CURRENT_YEAR} and merged) ===${NC}\n" | |
| # Initialize data files | |
| PR_FILE="$TEMP_DIR/prs.json" | |
| CONTRIBUTORS_FILE="$TEMP_DIR/contributors.json" | |
| echo "[]" > "$PR_FILE" | |
| echo "{}" > "$CONTRIBUTORS_FILE" | |
| # Function to call GitHub GraphQL API | |
| graphql_query() { | |
| local query="$1" | |
| local temp_response="$TEMP_DIR/graphql_response.json" | |
| local temp_headers="$TEMP_DIR/graphql_headers.txt" | |
| # Use -w to get HTTP status code, save response to file | |
| http_code=$(curl -s -w "%{http_code}" -o "$temp_response" \ | |
| -X POST \ | |
| -H "Authorization: bearer $GITHUB_TOKEN" \ | |
| -H "Content-Type: application/json" \ | |
| -d "{\"query\": $(echo "$query" | jq -Rs .)}" \ | |
| https://api.github.com/graphql) | |
| # Write HTTP code to a separate file | |
| echo "$http_code" > "$TEMP_DIR/http_code.txt" | |
| } | |
| # Process each repository | |
| for REPO in "$@"; do | |
| echo -e "${GREEN}Processing repository: ${REPO}${NC}" | |
| # Validate repo format | |
| if [[ ! "$REPO" =~ ^([^/]+)/([^/]+)$ ]]; then | |
| echo -e "${RED}Invalid repository format: $REPO (expected: owner/repo)${NC}" | |
| continue | |
| fi | |
| OWNER="${BASH_REMATCH[1]}" | |
| REPO_NAME="${BASH_REMATCH[2]}" | |
| echo " Fetching pull requests created since ${START_DATE}..." | |
| has_next_page=true | |
| after_cursor="null" | |
| pr_count=0 | |
| while [ "$has_next_page" = true ]; do | |
| # GraphQL query to fetch PRs created in current year | |
| # Note: We filter by createdAt at API level, then filter merged PRs locally | |
| query="query { | |
| repository(owner: \"$OWNER\", name: \"$REPO_NAME\") { | |
| pullRequests( | |
| first: 100 | |
| states: MERGED | |
| orderBy: {field: CREATED_AT, direction: DESC} | |
| after: $after_cursor | |
| ) { | |
| pageInfo { | |
| hasNextPage | |
| endCursor | |
| } | |
| nodes { | |
| number | |
| title | |
| createdAt | |
| mergedAt | |
| additions | |
| deletions | |
| bodyText | |
| author { | |
| login | |
| } | |
| reviews(first: 100) { | |
| totalCount | |
| } | |
| comments(first: 1) { | |
| totalCount | |
| } | |
| } | |
| } | |
| } | |
| }" | |
| # Retry logic with exponential backoff | |
| max_retries=3 | |
| retry_count=0 | |
| success=false | |
| while [ $retry_count -lt $max_retries ] && [ "$success" = false ]; do | |
| # Call API (writes to files) | |
| graphql_query "$query" | |
| # Read HTTP code and response from files | |
| http_code=$(cat "$TEMP_DIR/http_code.txt") | |
| response=$(cat "$TEMP_DIR/graphql_response.json") | |
| # Check HTTP status code | |
| if [ "$http_code" != "200" ]; then | |
| retry_count=$((retry_count + 1)) | |
| if [ $retry_count -lt $max_retries ]; then | |
| wait_time=$((2 ** retry_count)) # Exponential backoff: 2, 4, 8 seconds | |
| echo -e "${YELLOW} HTTP Error: Status $http_code${NC}" | |
| echo " Retrying in ${wait_time} seconds (attempt $((retry_count + 1))/$max_retries)..." | |
| sleep $wait_time | |
| else | |
| echo -e "${RED}HTTP Error after $max_retries attempts: Status $http_code${NC}" | |
| if [ -n "$response" ]; then | |
| echo "$response" | |
| fi | |
| has_next_page=false | |
| break 2 # Break out of both retry loop and main while loop | |
| fi | |
| continue | |
| fi | |
| # Check for GraphQL errors in response | |
| if echo "$response" | jq -e '.errors' > /dev/null 2>&1; then | |
| error_msg=$(echo "$response" | jq -r '.errors[0].message // "Unknown error"') | |
| retry_count=$((retry_count + 1)) | |
| if [ $retry_count -lt $max_retries ]; then | |
| wait_time=$((2 ** retry_count)) # Exponential backoff: 2, 4, 8 seconds | |
| echo -e "${YELLOW} API Error: $error_msg${NC}" | |
| echo " Retrying in ${wait_time} seconds (attempt $((retry_count + 1))/$max_retries)..." | |
| sleep $wait_time | |
| else | |
| echo -e "${RED}GraphQL Error after $max_retries attempts:${NC}" | |
| echo "$response" | jq '.errors' | |
| has_next_page=false | |
| break 2 # Break out of both retry loop and main while loop | |
| fi | |
| else | |
| success=true | |
| fi | |
| done | |
| if [ "$success" = false ]; then | |
| break | |
| fi | |
| # Check if repository data exists | |
| if ! echo "$response" | jq -e '.data.repository' > /dev/null 2>&1; then | |
| echo -e "${RED}Error: Repository not found or no access${NC}" | |
| break | |
| fi | |
| # Extract page info with safe defaults | |
| has_next_page=$(echo "$response" | jq -r '.data.repository.pullRequests.pageInfo.hasNextPage // false') | |
| end_cursor=$(echo "$response" | jq -r '.data.repository.pullRequests.pageInfo.endCursor // ""') | |
| # Process PRs from this page - handle null nodes and filter by createdAt | |
| prs_page=$(echo "$response" | jq --arg start "$START_DATE" ' | |
| .data.repository.pullRequests.nodes // [] | | |
| map(select(. != null)) | | |
| map(select(.createdAt >= $start)) | |
| ') | |
| page_count=$(echo "$prs_page" | jq 'length') | |
| pr_count=$((pr_count + page_count)) | |
| # Check if we've gone past our date range (based on creation date) | |
| oldest_created=$(echo "$response" | jq -r ' | |
| [.data.repository.pullRequests.nodes[].createdAt] | | |
| map(select(. != null)) | | |
| min | |
| ') | |
| # Process each PR in this page | |
| echo "$prs_page" | jq -c '.[]' | while read -r pr; do | |
| PR_NUMBER=$(echo "$pr" | jq -r '.number // 0') | |
| PR_USER=$(echo "$pr" | jq -r '.author.login // "unknown"') | |
| PR_TITLE=$(echo "$pr" | jq -r '.title // "No title"') | |
| PR_BODY=$(echo "$pr" | jq -r '.bodyText // ""') | |
| PR_BODY_SIZE=${#PR_BODY} | |
| # Safely extract numeric fields with defaults | |
| ADDITIONS=$(echo "$pr" | jq -r '.additions // 0') | |
| DELETIONS=$(echo "$pr" | jq -r '.deletions // 0') | |
| # Validate numeric values (handle null/invalid) | |
| if [[ ! "$ADDITIONS" =~ ^[0-9]+$ ]]; then | |
| ADDITIONS=0 | |
| fi | |
| if [[ ! "$DELETIONS" =~ ^[0-9]+$ ]]; then | |
| DELETIONS=0 | |
| fi | |
| DIFF_SIZE=$((ADDITIONS + DELETIONS)) | |
| # Get review comment count (reviews.totalCount + comments.totalCount) | |
| REVIEW_COUNT=$(echo "$pr" | jq -r '.reviews.totalCount // 0') | |
| COMMENT_COUNT=$(echo "$pr" | jq -r '.comments.totalCount // 0') | |
| # Validate review counts | |
| if [[ ! "$REVIEW_COUNT" =~ ^[0-9]+$ ]]; then | |
| REVIEW_COUNT=0 | |
| fi | |
| if [[ ! "$COMMENT_COUNT" =~ ^[0-9]+$ ]]; then | |
| COMMENT_COUNT=0 | |
| fi | |
| REVIEW_COMMENT_COUNT=$((REVIEW_COUNT + COMMENT_COUNT)) | |
| # Store PR data | |
| echo "{}" | jq \ | |
| --arg repo "$REPO" \ | |
| --arg number "$PR_NUMBER" \ | |
| --arg user "$PR_USER" \ | |
| --arg title "$PR_TITLE" \ | |
| --argjson additions "$ADDITIONS" \ | |
| --argjson deletions "$DELETIONS" \ | |
| --argjson diff_size "$DIFF_SIZE" \ | |
| --argjson body_size "$PR_BODY_SIZE" \ | |
| --argjson review_comments "$REVIEW_COMMENT_COUNT" \ | |
| '{ | |
| repo: $repo, | |
| number: $number, | |
| user: $user, | |
| title: $title, | |
| additions: $additions, | |
| deletions: $deletions, | |
| diff_size: $diff_size, | |
| body_size: $body_size, | |
| review_comments: $review_comments | |
| }' > "$TEMP_DIR/pr_data.json" | |
| # Append to PR file | |
| jq -s '.[0] + [.[1]]' "$PR_FILE" "$TEMP_DIR/pr_data.json" > "$TEMP_DIR/prs_tmp.json" | |
| mv "$TEMP_DIR/prs_tmp.json" "$PR_FILE" | |
| # Update contributor stats | |
| echo "{}" | jq \ | |
| --arg user "$PR_USER" \ | |
| --argjson deletions "$DELETIONS" \ | |
| --argjson diff_size "$DIFF_SIZE" \ | |
| --argjson review_comments "$REVIEW_COMMENT_COUNT" \ | |
| '{ | |
| user: $user, | |
| deletions: $deletions, | |
| diff_size: $diff_size, | |
| review_comments: $review_comments | |
| }' > "$TEMP_DIR/contrib_data.json" | |
| # Update contributors file | |
| jq -s ' | |
| .[0] as $contributors | | |
| .[1] as $contrib | | |
| $contributors | | |
| .[$contrib.user] += { | |
| deletions: ((.[$contrib.user].deletions // 0) + $contrib.deletions), | |
| total_diff: ((.[$contrib.user].total_diff // 0) + $contrib.diff_size), | |
| pr_count: ((.[$contrib.user].pr_count // 0) + 1), | |
| review_comments: ((.[$contrib.user].review_comments // 0) + $contrib.review_comments) | |
| } | |
| ' "$CONTRIBUTORS_FILE" "$TEMP_DIR/contrib_data.json" > "$TEMP_DIR/contributors_tmp.json" | |
| mv "$TEMP_DIR/contributors_tmp.json" "$CONTRIBUTORS_FILE" | |
| done | |
| echo " Processed page (found $page_count PRs created in ${CURRENT_YEAR})" | |
| # Stop if we've gone past our start date (based on creation date) | |
| if [ "$oldest_created" != "null" ] && [ "$oldest_created" \< "$START_DATE" ]; then | |
| echo " Reached PRs created before ${CURRENT_YEAR}, stopping..." | |
| break | |
| fi | |
| # Update cursor for next page | |
| if [ "$has_next_page" = "true" ]; then | |
| after_cursor="\"$end_cursor\"" | |
| sleep 0.5 # Rate limiting | |
| fi | |
| done | |
| echo " Found $pr_count merged PRs created in ${CURRENT_YEAR}" | |
| echo "" | |
| done | |
| # Generate statistics | |
| echo -e "${BLUE}=== Compiling Statistics ===${NC}\n" | |
| # Total merged PRs | |
| TOTAL_MERGED=$(jq 'length' "$PR_FILE") | |
| echo -e "${YELLOW}Total merged pull requests created in ${CURRENT_YEAR}:${NC} $TOTAL_MERGED" | |
| echo "" | |
| # Top 10 contributors by deleted lines (exclude github-actions) | |
| echo -e "${YELLOW}Top 10 contributors by deleted lines of code:${NC}" | |
| jq -r 'to_entries | | |
| map(select(.key != "github-actions[bot]" and .key != "github-actions")) | | |
| map({user: .key, deletions: .value.deletions}) | | |
| sort_by(-.deletions) | | |
| limit(10; .[]) | | |
| "\(.deletions) lines - \(.user)"' "$CONTRIBUTORS_FILE" | nl | |
| echo "" | |
| # Top 10 smallest PRs by diff size (exclude zero-diff PRs) | |
| echo -e "${YELLOW}Top 10 smallest PRs by diff size (excluding zero diffs):${NC}" | |
| jq -r 'map(select(.diff_size > 0)) | | |
| sort_by(.diff_size) | | |
| limit(10; .[]) | | |
| "\(.diff_size) lines - \(.repo)#\(.number): \(.title)"' "$PR_FILE" | nl | |
| echo "" | |
| # Top 10 PRs by description size | |
| echo -e "${YELLOW}Top 10 PRs by description size (bytes):${NC}" | |
| jq -r 'sort_by(-.body_size) | | |
| limit(10; .[]) | | |
| "\(.body_size) bytes - \(.repo)#\(.number): \(.title)"' "$PR_FILE" | nl | |
| echo "" | |
| # Top 10 contributors by smallest average PR size (exclude github-actions) | |
| echo -e "${YELLOW}Top 10 contributors by smallest average PR size:${NC}" | |
| jq -r 'to_entries | | |
| map(select(.key != "github-actions[bot]" and .key != "github-actions")) | | |
| map({ | |
| user: .key, | |
| avg_size: ((.value.total_diff / .value.pr_count) | floor), | |
| pr_count: .value.pr_count | |
| }) | | |
| sort_by(.avg_size) | | |
| limit(10; .[]) | | |
| "\(.avg_size) avg lines (\(.pr_count) PRs) - \(.user)"' "$CONTRIBUTORS_FILE" | nl | |
| echo "" | |
| # Top 10 contributors by PR review comment count (exclude github-actions) | |
| echo -e "${YELLOW}Top 10 contributors by PR review comment count:${NC}" | |
| jq -r 'to_entries | | |
| map(select(.key != "github-actions[bot]" and .key != "github-actions")) | | |
| map({user: .key, review_comments: .value.review_comments}) | | |
| sort_by(-.review_comments) | | |
| limit(10; .[]) | | |
| "\(.review_comments) review comments - \(.user)"' "$CONTRIBUTORS_FILE" | nl | |
| echo "" | |
| echo -e "${GREEN}Statistics compilation complete!${NC}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment