Skip to content

Instantly share code, notes, and snippets.

@vanjikumaran
Created February 17, 2026 22:03
Show Gist options
  • Select an option

  • Save vanjikumaran/bbfa6ed84f1de683aacc88beaf86331f to your computer and use it in GitHub Desktop.

Select an option

Save vanjikumaran/bbfa6ed84f1de683aacc88beaf86331f to your computer and use it in GitHub Desktop.
Search for GithubAction Template
#!/usr/bin/env python3
"""
Fetch all GitHub Actions workflows that reference a given action template.
Supports:
- Public GitHub actions (e.g. "actions/checkout@v4")
- Reusable workflow refs (e.g. "my-org/my-repo/.github/workflows/ci.yml@main")
Usage:
python fetch_actions_by_template.py --template "actions/setup-python" \
[--token <GITHUB_TOKEN>] [--org <org>] [--repo <owner/repo>] [--output results.json]
Setup:
pip install requests
export GITHUB_TOKEN="ghp_your_token_here" # recommended
How to run:
# Search public GitHub for any workflow using actions/setup-python
python fetch_actions_by_template.py --template "actions/setup-python"
# Restrict search to a specific GitHub organisation
python fetch_actions_by_template.py --template "actions/setup-python" --org my-org
# Scan a single repository only
python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo
# Find reusable workflow consumers
python fetch_actions_by_template.py \
--template "my-org/shared-workflows/.github/workflows/build.yml@main"
# Save results to JSON
python fetch_actions_by_template.py --template "actions/setup-python" --output results.json
"""
import argparse
import json
import os
import sys
import time
from dataclasses import dataclass, field, asdict
from typing import Optional
import requests
# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------
@dataclass
class WorkflowMatch:
repo_full_name: str
workflow_file: str
workflow_url: str # HTML URL to the file on GitHub
raw_url: str # Raw download URL
jobs: list[str] = field(default_factory=list) # job ids that use the template
steps: list[str] = field(default_factory=list) # step names that use the template
# ---------------------------------------------------------------------------
# GitHub API helpers
# ---------------------------------------------------------------------------
class GitHubClient:
BASE = "https://api.github.com"
def __init__(self, token: Optional[str] = None):
self.session = requests.Session()
self.session.headers.update({
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
})
if token:
self.session.headers["Authorization"] = f"Bearer {token}"
# ------------------------------------------------------------------
# Low-level request with rate-limit handling
# ------------------------------------------------------------------
def _get(self, url: str, params: dict | None = None) -> dict | list:
while True:
resp = self.session.get(url, params=params)
if resp.status_code == 403 and "rate limit" in resp.text.lower():
reset = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60))
wait = max(reset - int(time.time()), 1) + 2
print(f" [rate-limit] sleeping {wait}s …", flush=True)
time.sleep(wait)
continue
if resp.status_code == 422:
# Search index not available for this query
return {"items": [], "total_count": 0}
resp.raise_for_status()
return resp.json()
# ------------------------------------------------------------------
# Paginate through all pages of a search result
# ------------------------------------------------------------------
def _search_code_all(self, query: str) -> list[dict]:
url = f"{self.BASE}/search/code"
page, per_page = 1, 100
results: list[dict] = []
while True:
data = self._get(url, params={"q": query, "per_page": per_page, "page": page})
items = data.get("items", [])
results.extend(items)
print(f" fetched {len(results)} / {data.get('total_count', '?')} code matches …", flush=True)
if len(items) < per_page:
break
page += 1
# GitHub search API: max 10 requests/min for authenticated users
time.sleep(6)
return results
# ------------------------------------------------------------------
# Fetch raw file content
# ------------------------------------------------------------------
def get_raw(self, raw_url: str) -> str:
resp = self.session.get(raw_url)
resp.raise_for_status()
return resp.text
# ------------------------------------------------------------------
# List workflow files in a single repo
# ------------------------------------------------------------------
def list_repo_workflows(self, owner: str, repo: str) -> list[dict]:
"""Return workflow file metadata from the GitHub Actions API."""
url = f"{self.BASE}/repos/{owner}/{repo}/actions/workflows"
per_page = 100
page = 1
workflows: list[dict] = []
while True:
data = self._get(url, params={"per_page": per_page, "page": page})
batch = data.get("workflows", [])
workflows.extend(batch)
if len(batch) < per_page:
break
page += 1
return workflows
# ------------------------------------------------------------------
# Fetch raw content of a file via the Contents API
# ------------------------------------------------------------------
def get_file_content(self, owner: str, repo: str, path: str) -> str:
import base64
url = f"{self.BASE}/repos/{owner}/{repo}/contents/{path}"
data = self._get(url)
if isinstance(data, dict) and data.get("encoding") == "base64":
return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
raise ValueError(f"Unexpected response for {url}")
# ---------------------------------------------------------------------------
# YAML-aware template matching (no heavy dependency – pure text)
# ---------------------------------------------------------------------------
def _normalize_template(template: str) -> str:
"""Strip version tag for fuzzy matching, keep it for exact search too."""
return template.split("@")[0].strip()
def find_usages_in_yaml(content: str, template: str) -> tuple[list[str], list[str]]:
"""
Parse the workflow YAML text and find:
- job ids whose `uses:` field references the template
- step names whose `uses:` field references the template
Returns (matching_jobs, matching_steps).
We do simple line-by-line parsing to avoid a hard PyYAML dependency,
but still handle indentation correctly.
"""
base = _normalize_template(template)
matched_jobs: list[str] = []
matched_steps: list[str] = []
lines = content.splitlines()
current_job: Optional[str] = None
current_step: Optional[str] = None
in_jobs_block = False
for i, raw_line in enumerate(lines):
stripped = raw_line.strip()
indent = len(raw_line) - len(raw_line.lstrip())
# Detect top-level "jobs:" block
if raw_line.startswith("jobs:"):
in_jobs_block = True
continue
if not in_jobs_block:
continue
# Job-level key: indent == 2
if indent == 2 and stripped.endswith(":") and not stripped.startswith("-"):
current_job = stripped[:-1]
current_step = None
continue
# Step name
if indent == 6 and stripped.startswith("- name:"):
current_step = stripped[len("- name:"):].strip().strip('"').strip("'")
continue
if indent == 6 and stripped.startswith("-") and "name:" not in stripped:
current_step = f"(unnamed step ~line {i+1})"
continue
# Check for `uses:` at job level (reusable workflow)
if indent == 4 and stripped.startswith("uses:"):
uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
if base in uses_value or uses_value in template:
if current_job and current_job not in matched_jobs:
matched_jobs.append(current_job)
# Check for `uses:` at step level (action)
if indent >= 8 and stripped.startswith("uses:"):
uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
if base in uses_value or uses_value in template:
label = current_step or f"(unnamed step ~line {i+1})"
if label not in matched_steps:
matched_steps.append(label)
return matched_jobs, matched_steps
# ---------------------------------------------------------------------------
# Main search strategies
# ---------------------------------------------------------------------------
def search_via_code_search(
client: GitHubClient,
template: str,
org: Optional[str] = None,
) -> list[WorkflowMatch]:
"""
Use GitHub Code Search to find workflow files that reference the template.
Works across all public repos (or within an org).
"""
base = _normalize_template(template)
# Strip trailing path segments that don't appear literally in YAML
search_token = base.split("/")[-1] if "/" in base else base
query_parts = [f'"{search_token}"', "path:.github/workflows", "language:YAML"]
if org:
query_parts.append(f"org:{org}")
query = " ".join(query_parts)
print(f"[code-search] query: {query}", flush=True)
items = client._search_code_all(query)
matches: list[WorkflowMatch] = []
for item in items:
raw_url = item.get("git_url") or item.get("url", "")
html_url = item.get("html_url", "")
repo_name = item.get("repository", {}).get("full_name", "?")
file_path = item.get("path", "")
# Fetch raw content and verify actual usage
try:
# Prefer the `download_url` field (direct raw link)
download_url = item.get("url", "")
# Use the Contents API style raw URL
owner, repo_short = repo_name.split("/", 1)
content = client.get_file_content(owner, repo_short, file_path)
except Exception as exc:
print(f" [warn] could not fetch {repo_name}/{file_path}: {exc}", flush=True)
continue
jobs, steps = find_usages_in_yaml(content, template)
# Only include if we confirmed actual usage (or if search hit was strong)
if jobs or steps or base in content:
matches.append(WorkflowMatch(
repo_full_name=repo_name,
workflow_file=file_path,
workflow_url=html_url,
raw_url=item.get("url", ""),
jobs=jobs,
steps=steps,
))
return matches
def search_within_repo(
client: GitHubClient,
template: str,
owner: str,
repo: str,
) -> list[WorkflowMatch]:
"""
Scan all workflow files in a specific repository for the template.
"""
print(f"[repo-scan] scanning {owner}/{repo} …", flush=True)
matches: list[WorkflowMatch] = []
try:
workflows = client.list_repo_workflows(owner, repo)
except requests.HTTPError as exc:
print(f" [warn] could not list workflows for {owner}/{repo}: {exc}", flush=True)
return matches
for wf in workflows:
path = wf.get("path", "")
html_url = wf.get("html_url", "")
try:
content = client.get_file_content(owner, repo, path)
except Exception as exc:
print(f" [warn] {path}: {exc}", flush=True)
continue
jobs, steps = find_usages_in_yaml(content, template)
base = _normalize_template(template)
if jobs or steps or base in content:
matches.append(WorkflowMatch(
repo_full_name=f"{owner}/{repo}",
workflow_file=path,
workflow_url=html_url,
raw_url="",
jobs=jobs,
steps=steps,
))
return matches
# ---------------------------------------------------------------------------
# Output helpers
# ---------------------------------------------------------------------------
def print_results(matches: list[WorkflowMatch], template: str) -> None:
print()
print("=" * 70)
print(f" Results: workflows using '{template}'")
print("=" * 70)
if not matches:
print(" No matches found.")
return
for m in matches:
print(f"\n 📁 {m.repo_full_name}")
print(f" File : {m.workflow_file}")
print(f" URL : {m.workflow_url}")
if m.jobs:
print(f" Jobs : {', '.join(m.jobs)}")
if m.steps:
print(f" Steps : {', '.join(m.steps)}")
print()
print(f" Total matches: {len(matches)}")
print("=" * 70)
def save_results(matches: list[WorkflowMatch], path: str) -> None:
with open(path, "w", encoding="utf-8") as f:
json.dump([asdict(m) for m in matches], f, indent=2, ensure_ascii=False)
print(f"\n[output] Saved {len(matches)} results to '{path}'")
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
description="Find GitHub Actions workflows that use a given action/template.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Find all public repos using actions/setup-python
python fetch_actions_by_template.py --template "actions/setup-python"
# Limit search to a specific GitHub org
python fetch_actions_by_template.py --template "actions/setup-python" --org my-org
# Scan a single repository
python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo
# Reusable workflow reference
python fetch_actions_by_template.py \\
--template "my-org/shared-workflows/.github/workflows/build.yml@main"
# Save results to JSON
python fetch_actions_by_template.py --template "actions/setup-python" \\
--output results.json
""",
)
p.add_argument(
"--template", "-t",
required=True,
help="Action or reusable workflow reference to search for "
'(e.g. "actions/checkout@v4" or "org/repo/.github/workflows/ci.yml@main").',
)
p.add_argument(
"--token",
default=os.environ.get("GITHUB_TOKEN"),
help="GitHub personal access token. Falls back to $GITHUB_TOKEN env var. "
"Required for private repos and higher rate limits.",
)
p.add_argument(
"--org",
default=None,
help="Restrict code search to this GitHub organisation.",
)
p.add_argument(
"--repo",
default=None,
metavar="OWNER/REPO",
help="Scan only this specific repository (owner/repo).",
)
p.add_argument(
"--output", "-o",
default=None,
metavar="FILE",
help="Write results to a JSON file.",
)
return p
def main() -> None:
parser = build_parser()
args = parser.parse_args()
if not args.token:
print(
"[warn] No GitHub token provided. "
"Unauthenticated requests are limited to 10 searches/min and 60 API calls/hr.\n"
" Pass --token or set the GITHUB_TOKEN environment variable.\n",
file=sys.stderr,
)
client = GitHubClient(token=args.token)
template = args.template.strip()
matches: list[WorkflowMatch] = []
if args.repo:
# Single-repo scan
if "/" not in args.repo:
parser.error("--repo must be in 'owner/repo' format.")
owner, repo = args.repo.split("/", 1)
matches = search_within_repo(client, template, owner, repo)
else:
# Cross-repo code search (public, or within an org)
matches = search_via_code_search(client, template, org=args.org)
print_results(matches, template)
if args.output:
save_results(matches, args.output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment