Skip to content

Instantly share code, notes, and snippets.

@jsstevenson
Last active January 22, 2026 04:41
Show Gist options
  • Select an option

  • Save jsstevenson/2d82bab16f6ae537fbafe3d7ab50e833 to your computer and use it in GitHub Desktop.

Select an option

Save jsstevenson/2d82bab16f6ae537fbafe3d7ab50e833 to your computer and use it in GitHub Desktop.
simple script to grab snapshot of basic usage stats for specific libs of interest
# /// script
# dependencies = [
# "requests",
# ]
# ///
import os
from pathlib import Path
from datetime import datetime
import csv
import requests
projects = [
# specs
{
"name": "VRS",
"github_org": "ga4gh",
"github_repo": "vrs",
},
{
"name": "VA-Spec",
"github_org": "ga4gh",
"github_repo": "va-spec",
},
{
"name": "Cat-VRS",
"github_org": "ga4gh",
"github_repo": "cat-vrs",
},
{
"name": "Metaschema Processor",
"github_org": "ga4gh",
"github_repo": "gks-metaschema",
"pypi_name": "ga4gh.gks.metaschema",
},
# spec implementations
{
"name": "vrs-python",
"github_org": "ga4gh",
"github_repo": "vrs-python",
"ghcr": False,
"dockerhub_org": "ga4gh",
"dockerhub_name": "vrs-python",
"pypi_name": "ga4gh.vrs",
},
{
"name": "cat-vrs-python",
"github_org": "ga4gh",
"github_repo": "cat-vrs-python",
"ghcr": False,
"dockerhub_org": None,
"dockerhub_name": None,
"pypi_name": "ga4gh.cat-vrs",
},
{
"name": "va-spec-python",
"github_org": "ga4gh",
"github_repo": "va-spec-python",
"ghcr": False,
"dockerhub_org": None,
"dockerhub_name": None,
"pypi_name": "ga4gh.va-spec",
},
# biocommons
{
"name": "seqrepo",
"github_org": "biocommons",
"github_repo": "biocommons.seqrepo",
"ghcr": False,
"dockerhub_org": "biocommons",
"dockerhub_name": "seqrepo",
"pypi_name": "biocommons.seqrepo",
},
{
"name": "hgvs",
"github_org": "biocommons",
"github_repo": "hgvs",
"ghcr": False,
"dockerhub_org": "biocommons",
"dockerhub_name": "hgvs",
"pypi_name": "hgvs",
},
{
"name": "bioutils",
"github_org": "biocommons",
"github_repo": "bioutils",
"ghcr": False,
"dockerhub_org": None,
"dockerhub_name": None,
"pypi_name": "bioutils",
},
{
"name": "anyvar",
"github_org": "biocommons",
"github_repo": "anyvar",
"ghcr": True,
"dockerhub_org": None,
"dockerhub_name": None,
"pypi_name": "biocommons.anyvar",
},
{
"name": "uta",
"github_org": "biocommons",
"github_repo": "uta",
"ghcr": False,
"dockerhub_org": "biocommons",
"dockerhub_name": "uta",
"pypi_name": None,
},
]
GITHUB_API = "https://api.github.com"
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
def gh_get(url: str, params: dict | None = None) -> requests.Response:
headers = {"Accept": "application/vnd.github+json"}
if GITHUB_TOKEN:
headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
r = requests.get(url, params=params, headers=headers)
r.raise_for_status()
return r
def get_gh_repo_stats(org: str, repo: str) -> dict:
"""Fetch stars and forks for a GitHub repo."""
url = f"{GITHUB_API}/repos/{org}/{repo}"
r = gh_get(url)
data = r.json()
return {
"stars": data["stargazers_count"],
"forks": data["forks_count"],
"html_url": data["html_url"],
}
# def get_gh_contributor_count(org: str, repo: str) -> int:
# url = f"{GITHUB_API}/repos/{org}/{repo}/contributors"
# params = {"per_page": 100}
# count = 0
#
# while url:
# r = gh_get(url, params)
# contributors = r.json()
# count += len(contributors)
#
# link = r.headers.get("Link")
# if link and 'rel="next"' in link:
# url = link.split(";")[0].strip("<>")
# params = None
# else:
# url = None
#
# return count
def _paginate(url: str, params: dict | None = None):
seen = set()
while url:
if url in seen:
raise RuntimeError(f"Pagination loop detected at {url}")
seen.add(url)
r = gh_get(url, params)
data = r.json()
if not isinstance(data, list):
raise TypeError(f"Expected list response from {url}, got {type(data)}")
yield from data
url = r.links.get("next", {}).get("url")
params = None
def get_pr_creators_and_issue_participants(org: str, repo: str, state: str = "all"):
pr_creators: set[str] = set()
issue_creators: set[str] = set()
issue_commenters: set[str] = set()
# 1) Creators of issues and PRs (same endpoint, differentiate via pull_request key)
issues_url = f"{GITHUB_API}/repos/{org}/{repo}/issues"
for item in _paginate(issues_url, params={"state": state, "per_page": 100}):
user = (item.get("user") or {}).get("login")
if not user:
continue
if "pull_request" in item:
pr_creators.add(user)
else:
issue_creators.add(user)
# 2) Commenters (repo-wide). Includes comments on both issues and PRs.
comments_url = f"{GITHUB_API}/repos/{org}/{repo}/issues/comments"
for c in _paginate(comments_url, params={"per_page": 100}):
user = (c.get("user") or {}).get("login")
if user:
issue_commenters.add(user)
issue_participants = issue_creators | issue_commenters
union_all = pr_creators | issue_participants
return (
pr_creators,
issue_participants,
len(pr_creators), # unique PR creators
len(issue_participants), # unique issue creators + discussants
len(union_all), # unique PR creators + issue participants
)
def get_pypi_month_download_count(package_name: str) -> int:
url = f"https://pypistats.org/api/packages/{package_name}/recent"
r = requests.get(url)
r.raise_for_status()
return r.json()["data"]["last_month"]
def get_dh_pull_count(org: str, name: str) -> int:
url = f"https://hub.docker.com/v2/repositories/{org}/{name}/"
r = requests.get(url)
r.raise_for_status()
return r.json()["pull_count"]
def get_ghcr_total_downloads(org: str, name: str) -> int:
# not sure how long this will work, it relies on a 3rd party service
url = f"https://ghcr-badge.elias.eu.org/api/{org}/{name}/{name}"
r = requests.get(url)
r.raise_for_status()
return r.json()["downloadCountRaw"]
def main():
results = []
for project in projects:
org = project["github_org"]
name = project["github_repo"]
gh_stats = get_gh_repo_stats(org, name)
# gh_contributors = get_gh_contributor_count(org, name)
_, _, _, _, gh_participants = get_pr_creators_and_issue_participants(org, name)
if project.get("pypi_name"):
pypi_downloads = get_pypi_month_download_count(project["pypi_name"])
else:
pypi_downloads = None
if project.get("ghcr"):
ghcr_downloads = get_ghcr_total_downloads(org, name)
else:
ghcr_downloads = None
if project.get("dockerhub_org") and project.get("dockerhub_name"):
dh_pull_count = get_dh_pull_count(
project["dockerhub_org"], project["dockerhub_name"]
)
else:
dh_pull_count = None
row = {
"name": project["name"],
"github_url": gh_stats["html_url"],
"gh_stars": gh_stats["stars"],
"gh_forks": gh_stats["forks"],
# "gh_contributors": gh_contributors,
"gh_participants": gh_participants,
"pypi_name": project.get("pypi_name"),
"last_month_pypi_downloads": pypi_downloads,
"dh_pull_count": dh_pull_count,
"total_ghcr_downloads": ghcr_downloads,
}
results.append(row)
with Path(f"lib_stats_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv").open(
"w"
) as f:
writer = csv.DictWriter(f, fieldnames=results[0].keys())
writer.writeheader()
writer.writerows(results)
if __name__ == "__main__":
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment