yvbbrjdr/gh_star_utils.py

## gh_star_utils.py
#!/usr/bin/env python3

import argparse
from collections import Counter
from datetime import datetime
import json
import os

import requests
from tqdm import tqdm


GRAPHQL_QUERY = """
query($owner: String!, $name: String!, $cursor: String) {
  repository(owner: $owner, name: $name) {
    stargazers(first: 100, after: $cursor, orderBy: { field: STARRED_AT, direction: ASC }) {
      pageInfo {
        hasNextPage
        endCursor
      }
      edges {
        starredAt
        node {
          login
        }
      }
    }
  }
}
"""


def get_total_stars(owner: str, name: str, github_token: str) -> int:
    response = requests.get(
        f"https://api.github.com/repos/{owner}/{name}",
        headers={
            "Accept": "application/vnd.github.v3+json",
            "Authorization": f"Bearer {github_token}",
        },
    )
    response.raise_for_status()
    return response.json()["stargazers_count"]


def fetch_stars(
    owner: str,
    name: str,
    cursor: str | None,
    github_token: str,
) -> tuple[list[dict], bool, str | None]:
    response = requests.post(
        "https://api.github.com/graphql",
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {github_token}",
        },
        json={
            "query": GRAPHQL_QUERY,
            "variables": {"owner": owner, "name": name, "cursor": cursor},
        },
    )
    response.raise_for_status()

    data = response.json()
    if "data" not in data:
        if "errors" in data and data["errors"][0]["type"] == "RATE_LIMIT":
            limit = response.headers.get("X-RateLimit-Limit")
            remaining = response.headers.get("X-RateLimit-Remaining")
            used = response.headers.get("X-RateLimit-Used")
            reset = response.headers.get("X-RateLimit-Reset")
            raise RuntimeError(
                f"Rate limit exceeded: limit={limit} remaining={remaining} used={used} reset={datetime.fromtimestamp(int(reset)).strftime('%Y-%m-%d %H:%M:%S')}"
            )
        raise RuntimeError(f"Error fetching stars: {data}")
    stargazers = data["data"]["repository"]["stargazers"]
    return (
        stargazers["edges"],
        stargazers["pageInfo"]["hasNextPage"],
        stargazers["pageInfo"]["endCursor"],
    )


def fetch_handler(args: argparse.Namespace):
    github_token = os.getenv("GITHUB_TOKEN")
    if not github_token:
        raise ValueError("GITHUB_TOKEN environment variable is not set")

    total_stars = get_total_stars(args.owner, args.name, github_token)

    output_filename = f"stars_{args.owner}_{args.name}.json"
    if os.path.exists(output_filename):
        with open(output_filename, "r") as f:
            data = json.load(f)
            all_stars = data["stars"]
            cursor = data["cursor"]
            total_fetched = len(all_stars)
            has_next_page = total_fetched < total_stars
    else:
        all_stars = []
        cursor = None
        total_fetched = 0
        has_next_page = True

    progress = tqdm(
        total=total_stars,
        desc=f"Fetching stargazers for {args.owner}/{args.name}",
        initial=total_fetched,
        unit="stars",
        dynamic_ncols=True,
    )
    it = 0
    while has_next_page:
        saved_cursor = cursor
        stars, has_next_page, cursor = fetch_stars(
            args.owner, args.name, cursor, github_token
        )
        if not has_next_page and cursor is None:
            cursor = saved_cursor
        all_stars.extend(stars)
        total_fetched += len(stars)
        it += 1
        if it % 20 == 0 or not has_next_page:
            with open(output_filename, "w") as f:
                json.dump(
                    {
                        "repo": f"{args.owner}/{args.name}",
                        "stars": all_stars,
                        "cursor": cursor,
                    },
                    f,
                    separators=(",", ":"),
                )
        progress.update(len(stars))
    progress.close()
    print(f"Stargazers saved to {output_filename}")


def load_series(
    input_file: str,
) -> tuple[str, list[tuple[datetime, int]], list[tuple[datetime, int]]]:
    with open(input_file, "r") as f:
        data = json.load(f)
    repo = data["repo"]
    stars = data["stars"]

    hours = [
        datetime.fromisoformat(star["starredAt"].replace("Z", "+00:00")).replace(
            minute=0, second=0, microsecond=0
        )
        for star in stars
    ]
    hourly_counts = sorted(Counter(hours).items())
    x_datetimes = [dt for dt, _ in hourly_counts]
    cumulative = []
    total = 0
    for _, count in hourly_counts:
        total += count
        cumulative.append(total)
    cumulative_points = list(zip(x_datetimes, cumulative))
    hourly_points = [(dt, count) for dt, count in hourly_counts]
    return repo, cumulative_points, hourly_points


def plot_handler(args: argparse.Namespace):
    import matplotlib

    if not args.interactive:
        matplotlib.use("Agg")
    from matplotlib import pyplot as plt
    from matplotlib.dates import DateFormatter

    series: list[
        tuple[str, list[tuple[datetime, int]], list[tuple[datetime, int]]]
    ] = []
    for input_file in tqdm(args.input_files, desc="Loading series", dynamic_ncols=True):
        series.append(load_series(input_file))

    fig, ax1 = plt.subplots(figsize=(12, 6))
    ax2 = ax1.twinx() if args.hourly else None

    for i, (repo, cumulative_points, hourly_points) in enumerate(series):
        color = f"C{i % 10}"
        if not cumulative_points:
            ax1.plot([], [], label=repo)
            continue
        x_dates, cumulative = zip(*cumulative_points)
        ax1.plot(x_dates, cumulative, label=repo, color=color, alpha=0.8)
        if args.hourly and hourly_points:
            x_hourly, hourly = zip(*hourly_points)
            ax2.step(x_hourly, hourly, where="mid", alpha=0.5, linewidth=1, color=color)

    ax1.set_xlabel("Date & Time (UTC)")
    ax1.set_ylabel("Total Stars")
    if args.hourly:
        ax2.set_ylabel("Hourly Added Stars")
    ax1.grid(True, linestyle="--", alpha=0.6)
    if len(series) == 1:
        ax1.set_title(f"GitHub Stars Over Time for {series[0][0]}")
    else:
        ax1.set_title("GitHub Stars Over Time")
    ax1.legend(loc="upper left")
    ax1.xaxis.set_major_formatter(DateFormatter("%Y-%m-%d %H:%M"))
    fig.autofmt_xdate()
    plt.tight_layout()

    if args.interactive:
        plt.show()
    else:
        if len(args.input_files) == 1:
            output_file = args.input_files[0].replace(".json", ".png")
        else:
            output_file = "stars_combined.png"
        plt.savefig(output_file)
        print(f"Plot saved to {output_file}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Fetch and plot GitHub stargazer growth over time."
    )
    subparsers = parser.add_subparsers(dest="command")
    fetch_parser = subparsers.add_parser(
        "fetch", help="Fetch stargazers from a GitHub repository"
    )
    fetch_parser.add_argument(
        "owner", type=str, help="Repository owner (username or organization)"
    )
    fetch_parser.add_argument("name", type=str, help="Repository name")
    fetch_parser.set_defaults(func=fetch_handler)
    plot_parser = subparsers.add_parser(
        "plot", help="Plot stargazer growth from saved JSON data"
    )
    plot_parser.add_argument(
        "input_files",
        type=str,
        nargs="+",
        help="JSON file(s) from fetch command (e.g., stars_owner_repo.json)",
    )
    plot_parser.add_argument(
        "--interactive",
        action="store_true",
        help="Show interactive matplotlib plot",
    )
    plot_parser.add_argument(
        "--hourly",
        action="store_true",
        help="Also plot hourly added stars on a secondary axis",
    )
    plot_parser.set_defaults(func=plot_handler)
    args = parser.parse_args()
    if hasattr(args, "func"):
        args.func(args)
    else:
        parser.print_help()
	#!/usr/bin/env python3

	import argparse
	from collections import Counter
	from datetime import datetime
	import json
	import os

	import requests
	from tqdm import tqdm


	GRAPHQL_QUERY = """
	query($owner: String!, $name: String!, $cursor: String) {
	repository(owner: $owner, name: $name) {
	stargazers(first: 100, after: $cursor, orderBy: { field: STARRED_AT, direction: ASC }) {
	pageInfo {
	hasNextPage
	endCursor
	}
	edges {
	starredAt
	node {
	login
	}
	}
	}
	}
	}
	"""


	def get_total_stars(owner: str, name: str, github_token: str) -> int:
	response = requests.get(
	f"https://api.github.com/repos/{owner}/{name}",
	headers={
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"Bearer {github_token}",
	},
	)
	response.raise_for_status()
	return response.json()["stargazers_count"]


	def fetch_stars(
	owner: str,
	name: str,
	cursor: str \| None,
	github_token: str,
	) -> tuple[list[dict], bool, str \| None]:
	response = requests.post(
	"https://api.github.com/graphql",
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {github_token}",
	},
	json={
	"query": GRAPHQL_QUERY,
	"variables": {"owner": owner, "name": name, "cursor": cursor},
	},
	)
	response.raise_for_status()

	data = response.json()
	if "data" not in data:
	if "errors" in data and data["errors"][0]["type"] == "RATE_LIMIT":
	limit = response.headers.get("X-RateLimit-Limit")
	remaining = response.headers.get("X-RateLimit-Remaining")
	used = response.headers.get("X-RateLimit-Used")
	reset = response.headers.get("X-RateLimit-Reset")
	raise RuntimeError(
	f"Rate limit exceeded: limit={limit} remaining={remaining} used={used} reset={datetime.fromtimestamp(int(reset)).strftime('%Y-%m-%d %H:%M:%S')}"
	)
	raise RuntimeError(f"Error fetching stars: {data}")
	stargazers = data["data"]["repository"]["stargazers"]
	return (
	stargazers["edges"],
	stargazers["pageInfo"]["hasNextPage"],
	stargazers["pageInfo"]["endCursor"],
	)


	def fetch_handler(args: argparse.Namespace):
	github_token = os.getenv("GITHUB_TOKEN")
	if not github_token:
	raise ValueError("GITHUB_TOKEN environment variable is not set")

	total_stars = get_total_stars(args.owner, args.name, github_token)

	output_filename = f"stars_{args.owner}_{args.name}.json"
	if os.path.exists(output_filename):
	with open(output_filename, "r") as f:
	data = json.load(f)
	all_stars = data["stars"]
	cursor = data["cursor"]
	total_fetched = len(all_stars)
	has_next_page = total_fetched < total_stars
	else:
	all_stars = []
	cursor = None
	total_fetched = 0
	has_next_page = True

	progress = tqdm(
	total=total_stars,
	desc=f"Fetching stargazers for {args.owner}/{args.name}",
	initial=total_fetched,
	unit="stars",
	dynamic_ncols=True,
	)
	it = 0
	while has_next_page:
	saved_cursor = cursor
	stars, has_next_page, cursor = fetch_stars(
	args.owner, args.name, cursor, github_token
	)
	if not has_next_page and cursor is None:
	cursor = saved_cursor
	all_stars.extend(stars)
	total_fetched += len(stars)
	it += 1
	if it % 20 == 0 or not has_next_page:
	with open(output_filename, "w") as f:
	json.dump(
	{
	"repo": f"{args.owner}/{args.name}",
	"stars": all_stars,
	"cursor": cursor,
	},
	f,
	separators=(",", ":"),
	)
	progress.update(len(stars))
	progress.close()
	print(f"Stargazers saved to {output_filename}")


	def load_series(
	input_file: str,
	) -> tuple[str, list[tuple[datetime, int]], list[tuple[datetime, int]]]:
	with open(input_file, "r") as f:
	data = json.load(f)
	repo = data["repo"]
	stars = data["stars"]

	hours = [
	datetime.fromisoformat(star["starredAt"].replace("Z", "+00:00")).replace(
	minute=0, second=0, microsecond=0
	)
	for star in stars
	]
	hourly_counts = sorted(Counter(hours).items())
	x_datetimes = [dt for dt, _ in hourly_counts]
	cumulative = []
	total = 0
	for _, count in hourly_counts:
	total += count
	cumulative.append(total)
	cumulative_points = list(zip(x_datetimes, cumulative))
	hourly_points = [(dt, count) for dt, count in hourly_counts]
	return repo, cumulative_points, hourly_points


	def plot_handler(args: argparse.Namespace):
	import matplotlib

	if not args.interactive:
	matplotlib.use("Agg")
	from matplotlib import pyplot as plt
	from matplotlib.dates import DateFormatter

	series: list[
	tuple[str, list[tuple[datetime, int]], list[tuple[datetime, int]]]
	] = []
	for input_file in tqdm(args.input_files, desc="Loading series", dynamic_ncols=True):
	series.append(load_series(input_file))

	fig, ax1 = plt.subplots(figsize=(12, 6))
	ax2 = ax1.twinx() if args.hourly else None

	for i, (repo, cumulative_points, hourly_points) in enumerate(series):
	color = f"C{i % 10}"
	if not cumulative_points:
	ax1.plot([], [], label=repo)
	continue
	x_dates, cumulative = zip(*cumulative_points)
	ax1.plot(x_dates, cumulative, label=repo, color=color, alpha=0.8)
	if args.hourly and hourly_points:
	x_hourly, hourly = zip(*hourly_points)
	ax2.step(x_hourly, hourly, where="mid", alpha=0.5, linewidth=1, color=color)

	ax1.set_xlabel("Date & Time (UTC)")
	ax1.set_ylabel("Total Stars")
	if args.hourly:
	ax2.set_ylabel("Hourly Added Stars")
	ax1.grid(True, linestyle="--", alpha=0.6)
	if len(series) == 1:
	ax1.set_title(f"GitHub Stars Over Time for {series[0][0]}")
	else:
	ax1.set_title("GitHub Stars Over Time")
	ax1.legend(loc="upper left")
	ax1.xaxis.set_major_formatter(DateFormatter("%Y-%m-%d %H:%M"))
	fig.autofmt_xdate()
	plt.tight_layout()

	if args.interactive:
	plt.show()
	else:
	if len(args.input_files) == 1:
	output_file = args.input_files[0].replace(".json", ".png")
	else:
	output_file = "stars_combined.png"
	plt.savefig(output_file)
	print(f"Plot saved to {output_file}")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Fetch and plot GitHub stargazer growth over time."
	)
	subparsers = parser.add_subparsers(dest="command")
	fetch_parser = subparsers.add_parser(
	"fetch", help="Fetch stargazers from a GitHub repository"
	)
	fetch_parser.add_argument(
	"owner", type=str, help="Repository owner (username or organization)"
	)
	fetch_parser.add_argument("name", type=str, help="Repository name")
	fetch_parser.set_defaults(func=fetch_handler)
	plot_parser = subparsers.add_parser(
	"plot", help="Plot stargazer growth from saved JSON data"
	)
	plot_parser.add_argument(
	"input_files",
	type=str,
	nargs="+",
	help="JSON file(s) from fetch command (e.g., stars_owner_repo.json)",
	)
	plot_parser.add_argument(
	"--interactive",
	action="store_true",
	help="Show interactive matplotlib plot",
	)
	plot_parser.add_argument(
	"--hourly",
	action="store_true",
	help="Also plot hourly added stars on a secondary axis",
	)
	plot_parser.set_defaults(func=plot_handler)
	args = parser.parse_args()
	if hasattr(args, "func"):
	args.func(args)
	else:
	parser.print_help()
No results found