Created
February 17, 2026 22:03
-
-
Save vanjikumaran/bbfa6ed84f1de683aacc88beaf86331f to your computer and use it in GitHub Desktop.
Search for GithubAction Template
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Fetch all GitHub Actions workflows that reference a given action template. | |
| Supports: | |
| - Public GitHub actions (e.g. "actions/checkout@v4") | |
| - Reusable workflow refs (e.g. "my-org/my-repo/.github/workflows/ci.yml@main") | |
| Usage: | |
| python fetch_actions_by_template.py --template "actions/setup-python" \ | |
| [--token <GITHUB_TOKEN>] [--org <org>] [--repo <owner/repo>] [--output results.json] | |
| Setup: | |
| pip install requests | |
| export GITHUB_TOKEN="ghp_your_token_here" # recommended | |
| How to run: | |
| # Search public GitHub for any workflow using actions/setup-python | |
| python fetch_actions_by_template.py --template "actions/setup-python" | |
| # Restrict search to a specific GitHub organisation | |
| python fetch_actions_by_template.py --template "actions/setup-python" --org my-org | |
| # Scan a single repository only | |
| python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo | |
| # Find reusable workflow consumers | |
| python fetch_actions_by_template.py \ | |
| --template "my-org/shared-workflows/.github/workflows/build.yml@main" | |
| # Save results to JSON | |
| python fetch_actions_by_template.py --template "actions/setup-python" --output results.json | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import sys | |
| import time | |
| from dataclasses import dataclass, field, asdict | |
| from typing import Optional | |
| import requests | |
| # --------------------------------------------------------------------------- | |
| # Data model | |
| # --------------------------------------------------------------------------- | |
| @dataclass | |
| class WorkflowMatch: | |
| repo_full_name: str | |
| workflow_file: str | |
| workflow_url: str # HTML URL to the file on GitHub | |
| raw_url: str # Raw download URL | |
| jobs: list[str] = field(default_factory=list) # job ids that use the template | |
| steps: list[str] = field(default_factory=list) # step names that use the template | |
| # --------------------------------------------------------------------------- | |
| # GitHub API helpers | |
| # --------------------------------------------------------------------------- | |
| class GitHubClient: | |
| BASE = "https://api.github.com" | |
| def __init__(self, token: Optional[str] = None): | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| "Accept": "application/vnd.github+json", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| }) | |
| if token: | |
| self.session.headers["Authorization"] = f"Bearer {token}" | |
| # ------------------------------------------------------------------ | |
| # Low-level request with rate-limit handling | |
| # ------------------------------------------------------------------ | |
| def _get(self, url: str, params: dict | None = None) -> dict | list: | |
| while True: | |
| resp = self.session.get(url, params=params) | |
| if resp.status_code == 403 and "rate limit" in resp.text.lower(): | |
| reset = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60)) | |
| wait = max(reset - int(time.time()), 1) + 2 | |
| print(f" [rate-limit] sleeping {wait}s …", flush=True) | |
| time.sleep(wait) | |
| continue | |
| if resp.status_code == 422: | |
| # Search index not available for this query | |
| return {"items": [], "total_count": 0} | |
| resp.raise_for_status() | |
| return resp.json() | |
| # ------------------------------------------------------------------ | |
| # Paginate through all pages of a search result | |
| # ------------------------------------------------------------------ | |
| def _search_code_all(self, query: str) -> list[dict]: | |
| url = f"{self.BASE}/search/code" | |
| page, per_page = 1, 100 | |
| results: list[dict] = [] | |
| while True: | |
| data = self._get(url, params={"q": query, "per_page": per_page, "page": page}) | |
| items = data.get("items", []) | |
| results.extend(items) | |
| print(f" fetched {len(results)} / {data.get('total_count', '?')} code matches …", flush=True) | |
| if len(items) < per_page: | |
| break | |
| page += 1 | |
| # GitHub search API: max 10 requests/min for authenticated users | |
| time.sleep(6) | |
| return results | |
| # ------------------------------------------------------------------ | |
| # Fetch raw file content | |
| # ------------------------------------------------------------------ | |
| def get_raw(self, raw_url: str) -> str: | |
| resp = self.session.get(raw_url) | |
| resp.raise_for_status() | |
| return resp.text | |
| # ------------------------------------------------------------------ | |
| # List workflow files in a single repo | |
| # ------------------------------------------------------------------ | |
| def list_repo_workflows(self, owner: str, repo: str) -> list[dict]: | |
| """Return workflow file metadata from the GitHub Actions API.""" | |
| url = f"{self.BASE}/repos/{owner}/{repo}/actions/workflows" | |
| per_page = 100 | |
| page = 1 | |
| workflows: list[dict] = [] | |
| while True: | |
| data = self._get(url, params={"per_page": per_page, "page": page}) | |
| batch = data.get("workflows", []) | |
| workflows.extend(batch) | |
| if len(batch) < per_page: | |
| break | |
| page += 1 | |
| return workflows | |
| # ------------------------------------------------------------------ | |
| # Fetch raw content of a file via the Contents API | |
| # ------------------------------------------------------------------ | |
| def get_file_content(self, owner: str, repo: str, path: str) -> str: | |
| import base64 | |
| url = f"{self.BASE}/repos/{owner}/{repo}/contents/{path}" | |
| data = self._get(url) | |
| if isinstance(data, dict) and data.get("encoding") == "base64": | |
| return base64.b64decode(data["content"]).decode("utf-8", errors="replace") | |
| raise ValueError(f"Unexpected response for {url}") | |
| # --------------------------------------------------------------------------- | |
| # YAML-aware template matching (no heavy dependency – pure text) | |
| # --------------------------------------------------------------------------- | |
| def _normalize_template(template: str) -> str: | |
| """Strip version tag for fuzzy matching, keep it for exact search too.""" | |
| return template.split("@")[0].strip() | |
| def find_usages_in_yaml(content: str, template: str) -> tuple[list[str], list[str]]: | |
| """ | |
| Parse the workflow YAML text and find: | |
| - job ids whose `uses:` field references the template | |
| - step names whose `uses:` field references the template | |
| Returns (matching_jobs, matching_steps). | |
| We do simple line-by-line parsing to avoid a hard PyYAML dependency, | |
| but still handle indentation correctly. | |
| """ | |
| base = _normalize_template(template) | |
| matched_jobs: list[str] = [] | |
| matched_steps: list[str] = [] | |
| lines = content.splitlines() | |
| current_job: Optional[str] = None | |
| current_step: Optional[str] = None | |
| in_jobs_block = False | |
| for i, raw_line in enumerate(lines): | |
| stripped = raw_line.strip() | |
| indent = len(raw_line) - len(raw_line.lstrip()) | |
| # Detect top-level "jobs:" block | |
| if raw_line.startswith("jobs:"): | |
| in_jobs_block = True | |
| continue | |
| if not in_jobs_block: | |
| continue | |
| # Job-level key: indent == 2 | |
| if indent == 2 and stripped.endswith(":") and not stripped.startswith("-"): | |
| current_job = stripped[:-1] | |
| current_step = None | |
| continue | |
| # Step name | |
| if indent == 6 and stripped.startswith("- name:"): | |
| current_step = stripped[len("- name:"):].strip().strip('"').strip("'") | |
| continue | |
| if indent == 6 and stripped.startswith("-") and "name:" not in stripped: | |
| current_step = f"(unnamed step ~line {i+1})" | |
| continue | |
| # Check for `uses:` at job level (reusable workflow) | |
| if indent == 4 and stripped.startswith("uses:"): | |
| uses_value = stripped[len("uses:"):].strip().strip('"').strip("'") | |
| if base in uses_value or uses_value in template: | |
| if current_job and current_job not in matched_jobs: | |
| matched_jobs.append(current_job) | |
| # Check for `uses:` at step level (action) | |
| if indent >= 8 and stripped.startswith("uses:"): | |
| uses_value = stripped[len("uses:"):].strip().strip('"').strip("'") | |
| if base in uses_value or uses_value in template: | |
| label = current_step or f"(unnamed step ~line {i+1})" | |
| if label not in matched_steps: | |
| matched_steps.append(label) | |
| return matched_jobs, matched_steps | |
| # --------------------------------------------------------------------------- | |
| # Main search strategies | |
| # --------------------------------------------------------------------------- | |
| def search_via_code_search( | |
| client: GitHubClient, | |
| template: str, | |
| org: Optional[str] = None, | |
| ) -> list[WorkflowMatch]: | |
| """ | |
| Use GitHub Code Search to find workflow files that reference the template. | |
| Works across all public repos (or within an org). | |
| """ | |
| base = _normalize_template(template) | |
| # Strip trailing path segments that don't appear literally in YAML | |
| search_token = base.split("/")[-1] if "/" in base else base | |
| query_parts = [f'"{search_token}"', "path:.github/workflows", "language:YAML"] | |
| if org: | |
| query_parts.append(f"org:{org}") | |
| query = " ".join(query_parts) | |
| print(f"[code-search] query: {query}", flush=True) | |
| items = client._search_code_all(query) | |
| matches: list[WorkflowMatch] = [] | |
| for item in items: | |
| raw_url = item.get("git_url") or item.get("url", "") | |
| html_url = item.get("html_url", "") | |
| repo_name = item.get("repository", {}).get("full_name", "?") | |
| file_path = item.get("path", "") | |
| # Fetch raw content and verify actual usage | |
| try: | |
| # Prefer the `download_url` field (direct raw link) | |
| download_url = item.get("url", "") | |
| # Use the Contents API style raw URL | |
| owner, repo_short = repo_name.split("/", 1) | |
| content = client.get_file_content(owner, repo_short, file_path) | |
| except Exception as exc: | |
| print(f" [warn] could not fetch {repo_name}/{file_path}: {exc}", flush=True) | |
| continue | |
| jobs, steps = find_usages_in_yaml(content, template) | |
| # Only include if we confirmed actual usage (or if search hit was strong) | |
| if jobs or steps or base in content: | |
| matches.append(WorkflowMatch( | |
| repo_full_name=repo_name, | |
| workflow_file=file_path, | |
| workflow_url=html_url, | |
| raw_url=item.get("url", ""), | |
| jobs=jobs, | |
| steps=steps, | |
| )) | |
| return matches | |
| def search_within_repo( | |
| client: GitHubClient, | |
| template: str, | |
| owner: str, | |
| repo: str, | |
| ) -> list[WorkflowMatch]: | |
| """ | |
| Scan all workflow files in a specific repository for the template. | |
| """ | |
| print(f"[repo-scan] scanning {owner}/{repo} …", flush=True) | |
| matches: list[WorkflowMatch] = [] | |
| try: | |
| workflows = client.list_repo_workflows(owner, repo) | |
| except requests.HTTPError as exc: | |
| print(f" [warn] could not list workflows for {owner}/{repo}: {exc}", flush=True) | |
| return matches | |
| for wf in workflows: | |
| path = wf.get("path", "") | |
| html_url = wf.get("html_url", "") | |
| try: | |
| content = client.get_file_content(owner, repo, path) | |
| except Exception as exc: | |
| print(f" [warn] {path}: {exc}", flush=True) | |
| continue | |
| jobs, steps = find_usages_in_yaml(content, template) | |
| base = _normalize_template(template) | |
| if jobs or steps or base in content: | |
| matches.append(WorkflowMatch( | |
| repo_full_name=f"{owner}/{repo}", | |
| workflow_file=path, | |
| workflow_url=html_url, | |
| raw_url="", | |
| jobs=jobs, | |
| steps=steps, | |
| )) | |
| return matches | |
| # --------------------------------------------------------------------------- | |
| # Output helpers | |
| # --------------------------------------------------------------------------- | |
| def print_results(matches: list[WorkflowMatch], template: str) -> None: | |
| print() | |
| print("=" * 70) | |
| print(f" Results: workflows using '{template}'") | |
| print("=" * 70) | |
| if not matches: | |
| print(" No matches found.") | |
| return | |
| for m in matches: | |
| print(f"\n 📁 {m.repo_full_name}") | |
| print(f" File : {m.workflow_file}") | |
| print(f" URL : {m.workflow_url}") | |
| if m.jobs: | |
| print(f" Jobs : {', '.join(m.jobs)}") | |
| if m.steps: | |
| print(f" Steps : {', '.join(m.steps)}") | |
| print() | |
| print(f" Total matches: {len(matches)}") | |
| print("=" * 70) | |
| def save_results(matches: list[WorkflowMatch], path: str) -> None: | |
| with open(path, "w", encoding="utf-8") as f: | |
| json.dump([asdict(m) for m in matches], f, indent=2, ensure_ascii=False) | |
| print(f"\n[output] Saved {len(matches)} results to '{path}'") | |
| # --------------------------------------------------------------------------- | |
| # CLI | |
| # --------------------------------------------------------------------------- | |
| def build_parser() -> argparse.ArgumentParser: | |
| p = argparse.ArgumentParser( | |
| description="Find GitHub Actions workflows that use a given action/template.", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| # Find all public repos using actions/setup-python | |
| python fetch_actions_by_template.py --template "actions/setup-python" | |
| # Limit search to a specific GitHub org | |
| python fetch_actions_by_template.py --template "actions/setup-python" --org my-org | |
| # Scan a single repository | |
| python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo | |
| # Reusable workflow reference | |
| python fetch_actions_by_template.py \\ | |
| --template "my-org/shared-workflows/.github/workflows/build.yml@main" | |
| # Save results to JSON | |
| python fetch_actions_by_template.py --template "actions/setup-python" \\ | |
| --output results.json | |
| """, | |
| ) | |
| p.add_argument( | |
| "--template", "-t", | |
| required=True, | |
| help="Action or reusable workflow reference to search for " | |
| '(e.g. "actions/checkout@v4" or "org/repo/.github/workflows/ci.yml@main").', | |
| ) | |
| p.add_argument( | |
| "--token", | |
| default=os.environ.get("GITHUB_TOKEN"), | |
| help="GitHub personal access token. Falls back to $GITHUB_TOKEN env var. " | |
| "Required for private repos and higher rate limits.", | |
| ) | |
| p.add_argument( | |
| "--org", | |
| default=None, | |
| help="Restrict code search to this GitHub organisation.", | |
| ) | |
| p.add_argument( | |
| "--repo", | |
| default=None, | |
| metavar="OWNER/REPO", | |
| help="Scan only this specific repository (owner/repo).", | |
| ) | |
| p.add_argument( | |
| "--output", "-o", | |
| default=None, | |
| metavar="FILE", | |
| help="Write results to a JSON file.", | |
| ) | |
| return p | |
| def main() -> None: | |
| parser = build_parser() | |
| args = parser.parse_args() | |
| if not args.token: | |
| print( | |
| "[warn] No GitHub token provided. " | |
| "Unauthenticated requests are limited to 10 searches/min and 60 API calls/hr.\n" | |
| " Pass --token or set the GITHUB_TOKEN environment variable.\n", | |
| file=sys.stderr, | |
| ) | |
| client = GitHubClient(token=args.token) | |
| template = args.template.strip() | |
| matches: list[WorkflowMatch] = [] | |
| if args.repo: | |
| # Single-repo scan | |
| if "/" not in args.repo: | |
| parser.error("--repo must be in 'owner/repo' format.") | |
| owner, repo = args.repo.split("/", 1) | |
| matches = search_within_repo(client, template, owner, repo) | |
| else: | |
| # Cross-repo code search (public, or within an org) | |
| matches = search_via_code_search(client, template, org=args.org) | |
| print_results(matches, template) | |
| if args.output: | |
| save_results(matches, args.output) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment