Last active
January 22, 2026 04:41
-
-
Save jsstevenson/2d82bab16f6ae537fbafe3d7ab50e833 to your computer and use it in GitHub Desktop.
simple script to grab snapshot of basic usage stats for specific libs of interest
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # dependencies = [ | |
| # "requests", | |
| # ] | |
| # /// | |
| import os | |
| from pathlib import Path | |
| from datetime import datetime | |
| import csv | |
| import requests | |
| projects = [ | |
| # specs | |
| { | |
| "name": "VRS", | |
| "github_org": "ga4gh", | |
| "github_repo": "vrs", | |
| }, | |
| { | |
| "name": "VA-Spec", | |
| "github_org": "ga4gh", | |
| "github_repo": "va-spec", | |
| }, | |
| { | |
| "name": "Cat-VRS", | |
| "github_org": "ga4gh", | |
| "github_repo": "cat-vrs", | |
| }, | |
| { | |
| "name": "Metaschema Processor", | |
| "github_org": "ga4gh", | |
| "github_repo": "gks-metaschema", | |
| "pypi_name": "ga4gh.gks.metaschema", | |
| }, | |
| # spec implementations | |
| { | |
| "name": "vrs-python", | |
| "github_org": "ga4gh", | |
| "github_repo": "vrs-python", | |
| "ghcr": False, | |
| "dockerhub_org": "ga4gh", | |
| "dockerhub_name": "vrs-python", | |
| "pypi_name": "ga4gh.vrs", | |
| }, | |
| { | |
| "name": "cat-vrs-python", | |
| "github_org": "ga4gh", | |
| "github_repo": "cat-vrs-python", | |
| "ghcr": False, | |
| "dockerhub_org": None, | |
| "dockerhub_name": None, | |
| "pypi_name": "ga4gh.cat-vrs", | |
| }, | |
| { | |
| "name": "va-spec-python", | |
| "github_org": "ga4gh", | |
| "github_repo": "va-spec-python", | |
| "ghcr": False, | |
| "dockerhub_org": None, | |
| "dockerhub_name": None, | |
| "pypi_name": "ga4gh.va-spec", | |
| }, | |
| # biocommons | |
| { | |
| "name": "seqrepo", | |
| "github_org": "biocommons", | |
| "github_repo": "biocommons.seqrepo", | |
| "ghcr": False, | |
| "dockerhub_org": "biocommons", | |
| "dockerhub_name": "seqrepo", | |
| "pypi_name": "biocommons.seqrepo", | |
| }, | |
| { | |
| "name": "hgvs", | |
| "github_org": "biocommons", | |
| "github_repo": "hgvs", | |
| "ghcr": False, | |
| "dockerhub_org": "biocommons", | |
| "dockerhub_name": "hgvs", | |
| "pypi_name": "hgvs", | |
| }, | |
| { | |
| "name": "bioutils", | |
| "github_org": "biocommons", | |
| "github_repo": "bioutils", | |
| "ghcr": False, | |
| "dockerhub_org": None, | |
| "dockerhub_name": None, | |
| "pypi_name": "bioutils", | |
| }, | |
| { | |
| "name": "anyvar", | |
| "github_org": "biocommons", | |
| "github_repo": "anyvar", | |
| "ghcr": True, | |
| "dockerhub_org": None, | |
| "dockerhub_name": None, | |
| "pypi_name": "biocommons.anyvar", | |
| }, | |
| { | |
| "name": "uta", | |
| "github_org": "biocommons", | |
| "github_repo": "uta", | |
| "ghcr": False, | |
| "dockerhub_org": "biocommons", | |
| "dockerhub_name": "uta", | |
| "pypi_name": None, | |
| }, | |
| ] | |
| GITHUB_API = "https://api.github.com" | |
| GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") | |
| def gh_get(url: str, params: dict | None = None) -> requests.Response: | |
| headers = {"Accept": "application/vnd.github+json"} | |
| if GITHUB_TOKEN: | |
| headers["Authorization"] = f"Bearer {GITHUB_TOKEN}" | |
| r = requests.get(url, params=params, headers=headers) | |
| r.raise_for_status() | |
| return r | |
| def get_gh_repo_stats(org: str, repo: str) -> dict: | |
| """Fetch stars and forks for a GitHub repo.""" | |
| url = f"{GITHUB_API}/repos/{org}/{repo}" | |
| r = gh_get(url) | |
| data = r.json() | |
| return { | |
| "stars": data["stargazers_count"], | |
| "forks": data["forks_count"], | |
| "html_url": data["html_url"], | |
| } | |
| # def get_gh_contributor_count(org: str, repo: str) -> int: | |
| # url = f"{GITHUB_API}/repos/{org}/{repo}/contributors" | |
| # params = {"per_page": 100} | |
| # count = 0 | |
| # | |
| # while url: | |
| # r = gh_get(url, params) | |
| # contributors = r.json() | |
| # count += len(contributors) | |
| # | |
| # link = r.headers.get("Link") | |
| # if link and 'rel="next"' in link: | |
| # url = link.split(";")[0].strip("<>") | |
| # params = None | |
| # else: | |
| # url = None | |
| # | |
| # return count | |
| def _paginate(url: str, params: dict | None = None): | |
| seen = set() | |
| while url: | |
| if url in seen: | |
| raise RuntimeError(f"Pagination loop detected at {url}") | |
| seen.add(url) | |
| r = gh_get(url, params) | |
| data = r.json() | |
| if not isinstance(data, list): | |
| raise TypeError(f"Expected list response from {url}, got {type(data)}") | |
| yield from data | |
| url = r.links.get("next", {}).get("url") | |
| params = None | |
| def get_pr_creators_and_issue_participants(org: str, repo: str, state: str = "all"): | |
| pr_creators: set[str] = set() | |
| issue_creators: set[str] = set() | |
| issue_commenters: set[str] = set() | |
| # 1) Creators of issues and PRs (same endpoint, differentiate via pull_request key) | |
| issues_url = f"{GITHUB_API}/repos/{org}/{repo}/issues" | |
| for item in _paginate(issues_url, params={"state": state, "per_page": 100}): | |
| user = (item.get("user") or {}).get("login") | |
| if not user: | |
| continue | |
| if "pull_request" in item: | |
| pr_creators.add(user) | |
| else: | |
| issue_creators.add(user) | |
| # 2) Commenters (repo-wide). Includes comments on both issues and PRs. | |
| comments_url = f"{GITHUB_API}/repos/{org}/{repo}/issues/comments" | |
| for c in _paginate(comments_url, params={"per_page": 100}): | |
| user = (c.get("user") or {}).get("login") | |
| if user: | |
| issue_commenters.add(user) | |
| issue_participants = issue_creators | issue_commenters | |
| union_all = pr_creators | issue_participants | |
| return ( | |
| pr_creators, | |
| issue_participants, | |
| len(pr_creators), # unique PR creators | |
| len(issue_participants), # unique issue creators + discussants | |
| len(union_all), # unique PR creators + issue participants | |
| ) | |
| def get_pypi_month_download_count(package_name: str) -> int: | |
| url = f"https://pypistats.org/api/packages/{package_name}/recent" | |
| r = requests.get(url) | |
| r.raise_for_status() | |
| return r.json()["data"]["last_month"] | |
| def get_dh_pull_count(org: str, name: str) -> int: | |
| url = f"https://hub.docker.com/v2/repositories/{org}/{name}/" | |
| r = requests.get(url) | |
| r.raise_for_status() | |
| return r.json()["pull_count"] | |
| def get_ghcr_total_downloads(org: str, name: str) -> int: | |
| # not sure how long this will work, it relies on a 3rd party service | |
| url = f"https://ghcr-badge.elias.eu.org/api/{org}/{name}/{name}" | |
| r = requests.get(url) | |
| r.raise_for_status() | |
| return r.json()["downloadCountRaw"] | |
| def main(): | |
| results = [] | |
| for project in projects: | |
| org = project["github_org"] | |
| name = project["github_repo"] | |
| gh_stats = get_gh_repo_stats(org, name) | |
| # gh_contributors = get_gh_contributor_count(org, name) | |
| _, _, _, _, gh_participants = get_pr_creators_and_issue_participants(org, name) | |
| if project.get("pypi_name"): | |
| pypi_downloads = get_pypi_month_download_count(project["pypi_name"]) | |
| else: | |
| pypi_downloads = None | |
| if project.get("ghcr"): | |
| ghcr_downloads = get_ghcr_total_downloads(org, name) | |
| else: | |
| ghcr_downloads = None | |
| if project.get("dockerhub_org") and project.get("dockerhub_name"): | |
| dh_pull_count = get_dh_pull_count( | |
| project["dockerhub_org"], project["dockerhub_name"] | |
| ) | |
| else: | |
| dh_pull_count = None | |
| row = { | |
| "name": project["name"], | |
| "github_url": gh_stats["html_url"], | |
| "gh_stars": gh_stats["stars"], | |
| "gh_forks": gh_stats["forks"], | |
| # "gh_contributors": gh_contributors, | |
| "gh_participants": gh_participants, | |
| "pypi_name": project.get("pypi_name"), | |
| "last_month_pypi_downloads": pypi_downloads, | |
| "dh_pull_count": dh_pull_count, | |
| "total_ghcr_downloads": ghcr_downloads, | |
| } | |
| results.append(row) | |
| with Path(f"lib_stats_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv").open( | |
| "w" | |
| ) as f: | |
| writer = csv.DictWriter(f, fieldnames=results[0].keys()) | |
| writer.writeheader() | |
| writer.writerows(results) | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment