Skip to content

Instantly share code, notes, and snippets.

@mr-karan
Last active March 8, 2026 10:04
Show Gist options
  • Select an option

  • Save mr-karan/f1426cfb04752ad3ae9f1daa1fa3cb6c to your computer and use it in GitHub Desktop.

Select an option

Save mr-karan/f1426cfb04752ad3ae9f1daa1fa3cb6c to your computer and use it in GitHub Desktop.
Sync GitHub issues, labels, milestones & releases to a Gitea mirror. Idempotent, run on cron.
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = ["requests"]
# ///
"""
Sync GitHub issues, labels, milestones, and releases to a Gitea mirror.
Gitea's mirror mode only syncs git objects (commits, branches, tags).
This script fills the gap by syncing metadata via APIs. Idempotent —
safe to run repeatedly (skips already-synced items).
Usage:
# Sync a single repo (uv auto-installs dependencies)
uv run gitea_mirror_sync.py --gh-repo owner/repo \
--gh-token ghp_xxx --gitea-token xxx
# Or use env vars
export GITHUB_TOKEN=ghp_xxx
export GITEA_TOKEN=xxx
export GITEA_URL=https://your-gitea.example.com
uv run gitea_mirror_sync.py --gh-repo owner/repo
# Dry run (show what would be synced)
uv run gitea_mirror_sync.py --gh-repo owner/repo --dry-run
What it syncs:
- Labels (name, color, description)
- Milestones (title, description, state, due date)
- Issues (title, body, labels, milestone, comments, open/closed state)
- Releases (tag, name, body, draft/prerelease flags)
Issues and comments include attribution headers linking back to the
original GitHub author and URL.
"""
import argparse
import os
import sys
import time
import requests
class GitHubClient:
def __init__(self, token: str):
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github.v3+json",
})
self.base = "https://api.github.com"
def _paginate(self, url: str, params: dict | None = None) -> list:
results = []
params = params or {}
params.setdefault("per_page", 100)
page = 1
while True:
params["page"] = page
resp = self.session.get(url, params=params)
resp.raise_for_status()
data = resp.json()
if not data:
break
results.extend(data)
if len(data) < params["per_page"]:
break
page += 1
time.sleep(0.2)
return results
def get_labels(self, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{repo}/labels")
def get_milestones(self, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{repo}/milestones", {"state": "all"})
def get_issues(self, repo: str) -> list:
"""Get all issues (not PRs)."""
all_items = self._paginate(f"{self.base}/repos/{repo}/issues", {
"state": "all", "sort": "created", "direction": "asc",
})
# GitHub API returns PRs mixed with issues — filter them out
return [i for i in all_items if "pull_request" not in i]
def get_issue_comments(self, repo: str, issue_number: int) -> list:
return self._paginate(f"{self.base}/repos/{repo}/issues/{issue_number}/comments")
def get_releases(self, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{repo}/releases")
class GiteaClient:
def __init__(self, url: str, token: str):
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"token {token}",
"Content-Type": "application/json",
})
self.base = f"{url.rstrip('/')}/api/v1"
def _paginate(self, url: str, params: dict | None = None) -> list:
results = []
params = params or {}
params.setdefault("limit", 50)
page = 1
while True:
params["page"] = page
resp = self.session.get(url, params=params)
resp.raise_for_status()
data = resp.json()
if not data:
break
results.extend(data)
if len(data) < params["limit"]:
break
page += 1
return results
# ── Labels ──
def get_labels(self, owner: str, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{owner}/{repo}/labels")
def create_label(self, owner: str, repo: str, name: str, color: str, description: str = "") -> dict:
resp = self.session.post(f"{self.base}/repos/{owner}/{repo}/labels", json={
"name": name,
"color": f"#{color.lstrip('#')}",
"description": description,
})
resp.raise_for_status()
return resp.json()
# ── Milestones ──
def get_milestones(self, owner: str, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{owner}/{repo}/milestones", {"state": "all"})
def create_milestone(self, owner: str, repo: str, data: dict) -> dict:
resp = self.session.post(f"{self.base}/repos/{owner}/{repo}/milestones", json=data)
resp.raise_for_status()
return resp.json()
# ── Issues ──
def get_issues(self, owner: str, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{owner}/{repo}/issues", {"state": "all", "type": "issues"})
def create_issue(self, owner: str, repo: str, data: dict) -> dict:
resp = self.session.post(f"{self.base}/repos/{owner}/{repo}/issues", json=data)
resp.raise_for_status()
return resp.json()
def create_comment(self, owner: str, repo: str, issue_index: int, body: str) -> dict:
resp = self.session.post(
f"{self.base}/repos/{owner}/{repo}/issues/{issue_index}/comments",
json={"body": body},
)
resp.raise_for_status()
return resp.json()
def close_issue(self, owner: str, repo: str, issue_index: int) -> dict:
resp = self.session.patch(
f"{self.base}/repos/{owner}/{repo}/issues/{issue_index}",
json={"state": "closed"},
)
resp.raise_for_status()
return resp.json()
# ── Releases ──
def get_releases(self, owner: str, repo: str) -> list:
return self._paginate(f"{self.base}/repos/{owner}/{repo}/releases")
def create_release(self, owner: str, repo: str, data: dict) -> dict:
resp = self.session.post(f"{self.base}/repos/{owner}/{repo}/releases", json=data)
resp.raise_for_status()
return resp.json()
def sync_labels(gh: GitHubClient, gt: GiteaClient, gh_repo: str, gt_owner: str, gt_repo: str, dry_run: bool) -> dict[str, int]:
"""Sync labels. Returns mapping of label name → Gitea label ID."""
gh_labels = gh.get_labels(gh_repo)
gt_labels = gt.get_labels(gt_owner, gt_repo)
gt_label_names = {l["name"]: l["id"] for l in gt_labels}
label_map: dict[str, int] = dict(gt_label_names)
created = 0
for label in gh_labels:
if label["name"] not in gt_label_names:
if dry_run:
print(f" [DRY] Would create label: {label['name']}")
else:
result = gt.create_label(gt_owner, gt_repo, label["name"], label["color"], label.get("description", ""))
label_map[label["name"]] = result["id"]
created += 1
print(f" Labels: {len(gh_labels)} on GitHub, {len(gt_labels)} on Gitea, {created} created")
return label_map
def sync_milestones(gh: GitHubClient, gt: GiteaClient, gh_repo: str, gt_owner: str, gt_repo: str, dry_run: bool) -> dict[str, int]:
"""Sync milestones. Returns mapping of milestone title → Gitea milestone ID."""
gh_milestones = gh.get_milestones(gh_repo)
gt_milestones = gt.get_milestones(gt_owner, gt_repo)
gt_ms_titles = {m["title"]: m["id"] for m in gt_milestones}
ms_map: dict[str, int] = dict(gt_ms_titles)
created = 0
for ms in gh_milestones:
if ms["title"] not in gt_ms_titles:
data = {
"title": ms["title"],
"description": ms.get("description") or "",
"state": ms["state"],
}
if ms.get("due_on"):
data["due_on"] = ms["due_on"]
if dry_run:
print(f" [DRY] Would create milestone: {ms['title']}")
else:
result = gt.create_milestone(gt_owner, gt_repo, data)
ms_map[ms["title"]] = result["id"]
created += 1
print(f" Milestones: {len(gh_milestones)} on GitHub, {len(gt_milestones)} on Gitea, {created} created")
return ms_map
def sync_issues(gh: GitHubClient, gt: GiteaClient, gh_repo: str, gt_owner: str, gt_repo: str,
label_map: dict[str, int], ms_map: dict[str, int], dry_run: bool):
"""Sync issues and their comments."""
gh_issues = gh.get_issues(gh_repo)
gt_issues = gt.get_issues(gt_owner, gt_repo)
# Build set of existing issue titles to avoid duplicates
gt_titles = {i["title"] for i in gt_issues}
created = 0
skipped = 0
for issue in gh_issues:
if issue["title"] in gt_titles:
skipped += 1
continue
# Map labels
issue_label_ids = []
for label in issue.get("labels", []):
if label["name"] in label_map:
issue_label_ids.append(label_map[label["name"]])
# Map milestone
milestone_id = None
if issue.get("milestone") and issue["milestone"]["title"] in ms_map:
milestone_id = ms_map[issue["milestone"]["title"]]
# Attribution header
gh_user = issue["user"]["login"]
gh_url = issue["html_url"]
gh_date = issue["created_at"][:10]
body = f"*Originally posted by [@{gh_user}](https://github.com/{gh_user}) on {gh_date} — [GitHub]({gh_url})*\n\n---\n\n"
body += issue.get("body") or ""
data = {
"title": issue["title"],
"body": body,
"labels": issue_label_ids,
}
if milestone_id:
data["milestone"] = milestone_id
if dry_run:
print(f" [DRY] Would create issue: #{issue['number']} {issue['title']}")
continue
result = gt.create_issue(gt_owner, gt_repo, data)
gt_index = result["number"]
created += 1
# Sync comments
comments = gh.get_issue_comments(gh_repo, issue["number"])
for comment in comments:
c_user = comment["user"]["login"]
c_date = comment["created_at"][:10]
c_body = f"*Comment by [@{c_user}](https://github.com/{c_user}) on {c_date}*\n\n---\n\n"
c_body += comment.get("body") or ""
gt.create_comment(gt_owner, gt_repo, gt_index, c_body)
# Close if closed on GitHub
if issue["state"] == "closed":
gt.close_issue(gt_owner, gt_repo, gt_index)
time.sleep(0.3) # Rate limit
print(f" Issues: {len(gh_issues)} on GitHub, {len(gt_issues)} on Gitea, {created} created, {skipped} skipped (already exist)")
def sync_releases(gh: GitHubClient, gt: GiteaClient, gh_repo: str, gt_owner: str, gt_repo: str, dry_run: bool):
"""Sync releases (metadata only, not binary assets)."""
gh_releases = gh.get_releases(gh_repo)
gt_releases = gt.get_releases(gt_owner, gt_repo)
gt_tags = {r["tag_name"] for r in gt_releases}
created = 0
for rel in gh_releases:
if rel["tag_name"] in gt_tags:
continue
data = {
"tag_name": rel["tag_name"],
"target_commitish": rel.get("target_commitish", ""),
"name": rel.get("name") or rel["tag_name"],
"body": rel.get("body") or "",
"draft": rel.get("draft", False),
"prerelease": rel.get("prerelease", False),
}
if dry_run:
print(f" [DRY] Would create release: {rel['tag_name']}")
else:
gt.create_release(gt_owner, gt_repo, data)
created += 1
print(f" Releases: {len(gh_releases)} on GitHub, {len(gt_releases)} on Gitea, {created} created")
def main():
parser = argparse.ArgumentParser(description="Sync GitHub metadata to Gitea mirror")
parser.add_argument("--gh-repo", required=True, help="GitHub repo (owner/name)")
parser.add_argument("--gitea-url", default=os.environ.get("GITEA_URL", "https://gitea.example.com"))
parser.add_argument("--gitea-owner", default=None, help="Gitea owner (default: same as GitHub)")
parser.add_argument("--gitea-repo", default=None, help="Gitea repo name (default: same as GitHub)")
parser.add_argument("--gh-token", default=os.environ.get("GITHUB_TOKEN", ""))
parser.add_argument("--gitea-token", default=os.environ.get("GITEA_TOKEN", ""))
parser.add_argument("--dry-run", action="store_true", help="Show what would be synced without doing it")
args = parser.parse_args()
gh_owner, gh_name = args.gh_repo.split("/")
gt_owner = args.gitea_owner or gh_owner
gt_repo = args.gitea_repo or gh_name
if not args.gh_token:
print("ERROR: GitHub token required. Set GITHUB_TOKEN env var or use --gh-token")
sys.exit(1)
if not args.gitea_token:
print("ERROR: Gitea token required. Set GITEA_TOKEN env var or use --gitea-token")
sys.exit(1)
gh = GitHubClient(args.gh_token)
gt = GiteaClient(args.gitea_url, args.gitea_token)
print(f"Syncing {args.gh_repo} → {args.gitea_url}/{gt_owner}/{gt_repo}")
if args.dry_run:
print(" (DRY RUN — no changes will be made)\n")
print()
print("1. Labels")
label_map = sync_labels(gh, gt, args.gh_repo, gt_owner, gt_repo, args.dry_run)
print("2. Milestones")
ms_map = sync_milestones(gh, gt, args.gh_repo, gt_owner, gt_repo, args.dry_run)
print("3. Issues")
sync_issues(gh, gt, args.gh_repo, gt_owner, gt_repo, label_map, ms_map, args.dry_run)
print("4. Releases")
sync_releases(gh, gt, args.gh_repo, gt_owner, gt_repo, args.dry_run)
print("\nDone.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment