Created
November 14, 2025 15:19
-
-
Save samber/a67a4c404d80c3a9f6834f5f62ef3d23 to your computer and use it in GitHub Desktop.
Extract repository stargazers + full profile infos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import sys | |
| import csv | |
| import time | |
| from datetime import datetime, timezone | |
| import requests | |
| GITHUB_API = "https://api.github.com" | |
| def github_get(url, token=None): | |
| headers = {"Accept": "application/vnd.github.v3.star+json"} | |
| if token: | |
| headers["Authorization"] = f"token {token}" | |
| resp = requests.get(url, headers=headers) | |
| if resp.status_code == 403 and "X-RateLimit-Remaining" in resp.headers and resp.headers["X-RateLimit-Remaining"] == "0": | |
| reset = int(resp.headers.get("X-RateLimit-Reset", 0)) | |
| wait_for = max(0, reset - int(time.time())) + 1 | |
| print(f"Rate limit hit. Waiting {wait_for}s...") | |
| time.sleep(wait_for) | |
| return github_get(url, token) | |
| resp.raise_for_status() | |
| return resp.json() | |
| def iter_stargazers(owner, repo, token=None): | |
| page = 1 | |
| while True: | |
| url = f"{GITHUB_API}/repos/{owner}/{repo}/stargazers?per_page=100&page={page}" | |
| data = github_get(url, token) | |
| if not data: | |
| break | |
| for item in data: | |
| user = item.get("user") or {} | |
| yield user["login"], user['id'], item.get("starred_at") | |
| page += 1 | |
| def get_user_details(login, token=None): | |
| url = f"{GITHUB_API}/users/{login}" | |
| data = github_get(url, token) | |
| return { | |
| "name": data.get("name"), | |
| "email": data.get("email"), | |
| "company": data.get("company"), | |
| "location": data.get("location"), | |
| "bio": data.get("bio"), | |
| "twitter_username": data.get("twitter_username"), | |
| "blog": data.get("blog"), | |
| "followers": data.get("followers"), | |
| "following": data.get("following"), | |
| "public_repos": data.get("public_repos"), | |
| "public_gists": data.get("public_gists"), | |
| } | |
| def main(): | |
| if len(sys.argv) != 3: | |
| print("Usage: python get_stargazers_with_company_country.py owner repo") | |
| sys.exit(1) | |
| owner, repo = sys.argv[1], sys.argv[2] | |
| token = os.environ.get("GITHUB_TOKEN") | |
| now = datetime.now(timezone.utc).replace( | |
| microsecond=0).isoformat().replace("+00:00", "Z") | |
| outname = f"{owner}-{repo}-stargazers-details.csv" | |
| with open(outname, "w", newline="", encoding="utf-8") as f: | |
| writer = csv.DictWriter(f, fieldnames=["repository", "user_id", "login", "name", "email", "company", "location", "bio", | |
| "twitter", "blog", "followers", "following", "public_repos", "public_gists", "starred_at", "scrapped_at", "updated_at"]) | |
| writer.writeheader() | |
| for login, id, starred_at in iter_stargazers(owner, repo, token): | |
| try: | |
| details = get_user_details(login, token) | |
| writer.writerow({ | |
| "repository": f"{owner}/{repo}", | |
| "user_id": id, | |
| "login": login, | |
| "name": details["name"], | |
| "email": details["email"], | |
| "company": details["company"], | |
| "location": details["location"], | |
| "bio": details["bio"], | |
| "twitter": details["twitter_username"], | |
| "blog": details["blog"], | |
| "followers": details["followers"], | |
| "following": details["following"], | |
| "public_repos": details["public_repos"], | |
| "public_gists": details["public_gists"], | |
| "starred_at": starred_at, | |
| "scrapped_at": now, | |
| "updated_at": now, | |
| }) | |
| print(f"✔ {login}") | |
| time.sleep(0.3) # Be polite to GitHub API | |
| except Exception as e: | |
| print(f"Error fetching {login}: {e}") | |
| continue | |
| print(f"\nSaved stargazer details to {outname}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment