vanjikumaran/fetch_actions_by_template.py

## fetch_actions_by_template.py
#!/usr/bin/env python3
"""
Fetch all GitHub Actions workflows that reference a given action template.

Supports:
  - Public GitHub actions  (e.g. "actions/checkout@v4")
  - Reusable workflow refs (e.g. "my-org/my-repo/.github/workflows/ci.yml@main")

Usage:
    python fetch_actions_by_template.py --template "actions/setup-python" \
        [--token <GITHUB_TOKEN>] [--org <org>] [--repo <owner/repo>] [--output results.json]


Setup:
pip install requests
export GITHUB_TOKEN="ghp_your_token_here"   # recommended

How to run:
# Search public GitHub for any workflow using actions/setup-python
python fetch_actions_by_template.py --template "actions/setup-python"

# Restrict search to a specific GitHub organisation
python fetch_actions_by_template.py --template "actions/setup-python" --org my-org

# Scan a single repository only
python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo

# Find reusable workflow consumers
python fetch_actions_by_template.py \
  --template "my-org/shared-workflows/.github/workflows/build.yml@main"

# Save results to JSON
python fetch_actions_by_template.py --template "actions/setup-python" --output results.json

"""

import argparse
import json
import os
import sys
import time
from dataclasses import dataclass, field, asdict
from typing import Optional

import requests

# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------

@dataclass
class WorkflowMatch:
    repo_full_name: str
    workflow_file: str
    workflow_url: str       # HTML URL to the file on GitHub
    raw_url: str            # Raw download URL
    jobs: list[str] = field(default_factory=list)  # job ids that use the template
    steps: list[str] = field(default_factory=list)  # step names that use the template


# ---------------------------------------------------------------------------
# GitHub API helpers
# ---------------------------------------------------------------------------

class GitHubClient:
    BASE = "https://api.github.com"

    def __init__(self, token: Optional[str] = None):
        self.session = requests.Session()
        self.session.headers.update({
            "Accept": "application/vnd.github+json",
            "X-GitHub-Api-Version": "2022-11-28",
        })
        if token:
            self.session.headers["Authorization"] = f"Bearer {token}"

    # ------------------------------------------------------------------
    # Low-level request with rate-limit handling
    # ------------------------------------------------------------------
    def _get(self, url: str, params: dict | None = None) -> dict | list:
        while True:
            resp = self.session.get(url, params=params)

            if resp.status_code == 403 and "rate limit" in resp.text.lower():
                reset = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60))
                wait = max(reset - int(time.time()), 1) + 2
                print(f"  [rate-limit] sleeping {wait}s …", flush=True)
                time.sleep(wait)
                continue

            if resp.status_code == 422:
                # Search index not available for this query
                return {"items": [], "total_count": 0}

            resp.raise_for_status()
            return resp.json()

    # ------------------------------------------------------------------
    # Paginate through all pages of a search result
    # ------------------------------------------------------------------
    def _search_code_all(self, query: str) -> list[dict]:
        url = f"{self.BASE}/search/code"
        page, per_page = 1, 100
        results: list[dict] = []

        while True:
            data = self._get(url, params={"q": query, "per_page": per_page, "page": page})
            items = data.get("items", [])
            results.extend(items)
            print(f"    fetched {len(results)} / {data.get('total_count', '?')} code matches …", flush=True)

            if len(items) < per_page:
                break

            page += 1
            # GitHub search API: max 10 requests/min for authenticated users
            time.sleep(6)

        return results

    # ------------------------------------------------------------------
    # Fetch raw file content
    # ------------------------------------------------------------------
    def get_raw(self, raw_url: str) -> str:
        resp = self.session.get(raw_url)
        resp.raise_for_status()
        return resp.text

    # ------------------------------------------------------------------
    # List workflow files in a single repo
    # ------------------------------------------------------------------
    def list_repo_workflows(self, owner: str, repo: str) -> list[dict]:
        """Return workflow file metadata from the GitHub Actions API."""
        url = f"{self.BASE}/repos/{owner}/{repo}/actions/workflows"
        per_page = 100
        page = 1
        workflows: list[dict] = []
        while True:
            data = self._get(url, params={"per_page": per_page, "page": page})
            batch = data.get("workflows", [])
            workflows.extend(batch)
            if len(batch) < per_page:
                break
            page += 1
        return workflows

    # ------------------------------------------------------------------
    # Fetch raw content of a file via the Contents API
    # ------------------------------------------------------------------
    def get_file_content(self, owner: str, repo: str, path: str) -> str:
        import base64
        url = f"{self.BASE}/repos/{owner}/{repo}/contents/{path}"
        data = self._get(url)
        if isinstance(data, dict) and data.get("encoding") == "base64":
            return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
        raise ValueError(f"Unexpected response for {url}")


# ---------------------------------------------------------------------------
# YAML-aware template matching (no heavy dependency – pure text)
# ---------------------------------------------------------------------------

def _normalize_template(template: str) -> str:
    """Strip version tag for fuzzy matching, keep it for exact search too."""
    return template.split("@")[0].strip()


def find_usages_in_yaml(content: str, template: str) -> tuple[list[str], list[str]]:
    """
    Parse the workflow YAML text and find:
      - job ids whose `uses:` field references the template
      - step names whose `uses:` field references the template

    Returns (matching_jobs, matching_steps).
    We do simple line-by-line parsing to avoid a hard PyYAML dependency,
    but still handle indentation correctly.
    """
    base = _normalize_template(template)
    matched_jobs: list[str] = []
    matched_steps: list[str] = []

    lines = content.splitlines()
    current_job: Optional[str] = None
    current_step: Optional[str] = None
    in_jobs_block = False

    for i, raw_line in enumerate(lines):
        stripped = raw_line.strip()
        indent = len(raw_line) - len(raw_line.lstrip())

        # Detect top-level "jobs:" block
        if raw_line.startswith("jobs:"):
            in_jobs_block = True
            continue

        if not in_jobs_block:
            continue

        # Job-level key: indent == 2
        if indent == 2 and stripped.endswith(":") and not stripped.startswith("-"):
            current_job = stripped[:-1]
            current_step = None
            continue

        # Step name
        if indent == 6 and stripped.startswith("- name:"):
            current_step = stripped[len("- name:"):].strip().strip('"').strip("'")
            continue

        if indent == 6 and stripped.startswith("-") and "name:" not in stripped:
            current_step = f"(unnamed step ~line {i+1})"
            continue

        # Check for `uses:` at job level (reusable workflow)
        if indent == 4 and stripped.startswith("uses:"):
            uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
            if base in uses_value or uses_value in template:
                if current_job and current_job not in matched_jobs:
                    matched_jobs.append(current_job)

        # Check for `uses:` at step level (action)
        if indent >= 8 and stripped.startswith("uses:"):
            uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
            if base in uses_value or uses_value in template:
                label = current_step or f"(unnamed step ~line {i+1})"
                if label not in matched_steps:
                    matched_steps.append(label)

    return matched_jobs, matched_steps


# ---------------------------------------------------------------------------
# Main search strategies
# ---------------------------------------------------------------------------

def search_via_code_search(
    client: GitHubClient,
    template: str,
    org: Optional[str] = None,
) -> list[WorkflowMatch]:
    """
    Use GitHub Code Search to find workflow files that reference the template.
    Works across all public repos (or within an org).
    """
    base = _normalize_template(template)
    # Strip trailing path segments that don't appear literally in YAML
    search_token = base.split("/")[-1] if "/" in base else base

    query_parts = [f'"{search_token}"', "path:.github/workflows", "language:YAML"]
    if org:
        query_parts.append(f"org:{org}")

    query = " ".join(query_parts)
    print(f"[code-search] query: {query}", flush=True)

    items = client._search_code_all(query)
    matches: list[WorkflowMatch] = []

    for item in items:
        raw_url = item.get("git_url") or item.get("url", "")
        html_url = item.get("html_url", "")
        repo_name = item.get("repository", {}).get("full_name", "?")
        file_path = item.get("path", "")

        # Fetch raw content and verify actual usage
        try:
            # Prefer the `download_url` field (direct raw link)
            download_url = item.get("url", "")
            # Use the Contents API style raw URL
            owner, repo_short = repo_name.split("/", 1)
            content = client.get_file_content(owner, repo_short, file_path)
        except Exception as exc:
            print(f"  [warn] could not fetch {repo_name}/{file_path}: {exc}", flush=True)
            continue

        jobs, steps = find_usages_in_yaml(content, template)

        # Only include if we confirmed actual usage (or if search hit was strong)
        if jobs or steps or base in content:
            matches.append(WorkflowMatch(
                repo_full_name=repo_name,
                workflow_file=file_path,
                workflow_url=html_url,
                raw_url=item.get("url", ""),
                jobs=jobs,
                steps=steps,
            ))

    return matches


def search_within_repo(
    client: GitHubClient,
    template: str,
    owner: str,
    repo: str,
) -> list[WorkflowMatch]:
    """
    Scan all workflow files in a specific repository for the template.
    """
    print(f"[repo-scan] scanning {owner}/{repo} …", flush=True)
    matches: list[WorkflowMatch] = []

    try:
        workflows = client.list_repo_workflows(owner, repo)
    except requests.HTTPError as exc:
        print(f"  [warn] could not list workflows for {owner}/{repo}: {exc}", flush=True)
        return matches

    for wf in workflows:
        path = wf.get("path", "")
        html_url = wf.get("html_url", "")

        try:
            content = client.get_file_content(owner, repo, path)
        except Exception as exc:
            print(f"  [warn] {path}: {exc}", flush=True)
            continue

        jobs, steps = find_usages_in_yaml(content, template)
        base = _normalize_template(template)

        if jobs or steps or base in content:
            matches.append(WorkflowMatch(
                repo_full_name=f"{owner}/{repo}",
                workflow_file=path,
                workflow_url=html_url,
                raw_url="",
                jobs=jobs,
                steps=steps,
            ))

    return matches


# ---------------------------------------------------------------------------
# Output helpers
# ---------------------------------------------------------------------------

def print_results(matches: list[WorkflowMatch], template: str) -> None:
    print()
    print("=" * 70)
    print(f"  Results: workflows using '{template}'")
    print("=" * 70)

    if not matches:
        print("  No matches found.")
        return

    for m in matches:
        print(f"\n  📁 {m.repo_full_name}")
        print(f"     File   : {m.workflow_file}")
        print(f"     URL    : {m.workflow_url}")
        if m.jobs:
            print(f"     Jobs   : {', '.join(m.jobs)}")
        if m.steps:
            print(f"     Steps  : {', '.join(m.steps)}")

    print()
    print(f"  Total matches: {len(matches)}")
    print("=" * 70)


def save_results(matches: list[WorkflowMatch], path: str) -> None:
    with open(path, "w", encoding="utf-8") as f:
        json.dump([asdict(m) for m in matches], f, indent=2, ensure_ascii=False)
    print(f"\n[output] Saved {len(matches)} results to '{path}'")


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        description="Find GitHub Actions workflows that use a given action/template.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Find all public repos using actions/setup-python
  python fetch_actions_by_template.py --template "actions/setup-python"

  # Limit search to a specific GitHub org
  python fetch_actions_by_template.py --template "actions/setup-python" --org my-org

  # Scan a single repository
  python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo

  # Reusable workflow reference
  python fetch_actions_by_template.py \\
      --template "my-org/shared-workflows/.github/workflows/build.yml@main"

  # Save results to JSON
  python fetch_actions_by_template.py --template "actions/setup-python" \\
      --output results.json
""",
    )
    p.add_argument(
        "--template", "-t",
        required=True,
        help="Action or reusable workflow reference to search for "
             '(e.g. "actions/checkout@v4" or "org/repo/.github/workflows/ci.yml@main").',
    )
    p.add_argument(
        "--token",
        default=os.environ.get("GITHUB_TOKEN"),
        help="GitHub personal access token. Falls back to $GITHUB_TOKEN env var. "
             "Required for private repos and higher rate limits.",
    )
    p.add_argument(
        "--org",
        default=None,
        help="Restrict code search to this GitHub organisation.",
    )
    p.add_argument(
        "--repo",
        default=None,
        metavar="OWNER/REPO",
        help="Scan only this specific repository (owner/repo).",
    )
    p.add_argument(
        "--output", "-o",
        default=None,
        metavar="FILE",
        help="Write results to a JSON file.",
    )
    return p


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()

    if not args.token:
        print(
            "[warn] No GitHub token provided. "
            "Unauthenticated requests are limited to 10 searches/min and 60 API calls/hr.\n"
            "       Pass --token or set the GITHUB_TOKEN environment variable.\n",
            file=sys.stderr,
        )

    client = GitHubClient(token=args.token)
    template = args.template.strip()

    matches: list[WorkflowMatch] = []

    if args.repo:
        # Single-repo scan
        if "/" not in args.repo:
            parser.error("--repo must be in 'owner/repo' format.")
        owner, repo = args.repo.split("/", 1)
        matches = search_within_repo(client, template, owner, repo)
    else:
        # Cross-repo code search (public, or within an org)
        matches = search_via_code_search(client, template, org=args.org)

    print_results(matches, template)

    if args.output:
        save_results(matches, args.output)

if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Fetch all GitHub Actions workflows that reference a given action template.

	Supports:
	- Public GitHub actions (e.g. "actions/checkout@v4")
	- Reusable workflow refs (e.g. "my-org/my-repo/.github/workflows/ci.yml@main")

	Usage:
	python fetch_actions_by_template.py --template "actions/setup-python" \
	[--token <GITHUB_TOKEN>] [--org <org>] [--repo <owner/repo>] [--output results.json]


	Setup:
	pip install requests
	export GITHUB_TOKEN="ghp_your_token_here" # recommended

	How to run:
	# Search public GitHub for any workflow using actions/setup-python
	python fetch_actions_by_template.py --template "actions/setup-python"

	# Restrict search to a specific GitHub organisation
	python fetch_actions_by_template.py --template "actions/setup-python" --org my-org

	# Scan a single repository only
	python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo

	# Find reusable workflow consumers
	python fetch_actions_by_template.py \
	--template "my-org/shared-workflows/.github/workflows/build.yml@main"

	# Save results to JSON
	python fetch_actions_by_template.py --template "actions/setup-python" --output results.json

	"""

	import argparse
	import json
	import os
	import sys
	import time
	from dataclasses import dataclass, field, asdict
	from typing import Optional

	import requests

	# ---------------------------------------------------------------------------
	# Data model
	# ---------------------------------------------------------------------------

	@dataclass
	class WorkflowMatch:
	repo_full_name: str
	workflow_file: str
	workflow_url: str # HTML URL to the file on GitHub
	raw_url: str # Raw download URL
	jobs: list[str] = field(default_factory=list) # job ids that use the template
	steps: list[str] = field(default_factory=list) # step names that use the template


	# ---------------------------------------------------------------------------
	# GitHub API helpers
	# ---------------------------------------------------------------------------

	class GitHubClient:
	BASE = "https://api.github.com"

	def __init__(self, token: Optional[str] = None):
	self.session = requests.Session()
	self.session.headers.update({
	"Accept": "application/vnd.github+json",
	"X-GitHub-Api-Version": "2022-11-28",
	})
	if token:
	self.session.headers["Authorization"] = f"Bearer {token}"

	# ------------------------------------------------------------------
	# Low-level request with rate-limit handling
	# ------------------------------------------------------------------
	def _get(self, url: str, params: dict \| None = None) -> dict \| list:
	while True:
	resp = self.session.get(url, params=params)

	if resp.status_code == 403 and "rate limit" in resp.text.lower():
	reset = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60))
	wait = max(reset - int(time.time()), 1) + 2
	print(f" [rate-limit] sleeping {wait}s …", flush=True)
	time.sleep(wait)
	continue

	if resp.status_code == 422:
	# Search index not available for this query
	return {"items": [], "total_count": 0}

	resp.raise_for_status()
	return resp.json()

	# ------------------------------------------------------------------
	# Paginate through all pages of a search result
	# ------------------------------------------------------------------
	def _search_code_all(self, query: str) -> list[dict]:
	url = f"{self.BASE}/search/code"
	page, per_page = 1, 100
	results: list[dict] = []

	while True:
	data = self._get(url, params={"q": query, "per_page": per_page, "page": page})
	items = data.get("items", [])
	results.extend(items)
	print(f" fetched {len(results)} / {data.get('total_count', '?')} code matches …", flush=True)

	if len(items) < per_page:
	break

	page += 1
	# GitHub search API: max 10 requests/min for authenticated users
	time.sleep(6)

	return results

	# ------------------------------------------------------------------
	# Fetch raw file content
	# ------------------------------------------------------------------
	def get_raw(self, raw_url: str) -> str:
	resp = self.session.get(raw_url)
	resp.raise_for_status()
	return resp.text

	# ------------------------------------------------------------------
	# List workflow files in a single repo
	# ------------------------------------------------------------------
	def list_repo_workflows(self, owner: str, repo: str) -> list[dict]:
	"""Return workflow file metadata from the GitHub Actions API."""
	url = f"{self.BASE}/repos/{owner}/{repo}/actions/workflows"
	per_page = 100
	page = 1
	workflows: list[dict] = []
	while True:
	data = self._get(url, params={"per_page": per_page, "page": page})
	batch = data.get("workflows", [])
	workflows.extend(batch)
	if len(batch) < per_page:
	break
	page += 1
	return workflows

	# ------------------------------------------------------------------
	# Fetch raw content of a file via the Contents API
	# ------------------------------------------------------------------
	def get_file_content(self, owner: str, repo: str, path: str) -> str:
	import base64
	url = f"{self.BASE}/repos/{owner}/{repo}/contents/{path}"
	data = self._get(url)
	if isinstance(data, dict) and data.get("encoding") == "base64":
	return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
	raise ValueError(f"Unexpected response for {url}")


	# ---------------------------------------------------------------------------
	# YAML-aware template matching (no heavy dependency – pure text)
	# ---------------------------------------------------------------------------

	def _normalize_template(template: str) -> str:
	"""Strip version tag for fuzzy matching, keep it for exact search too."""
	return template.split("@")[0].strip()


	def find_usages_in_yaml(content: str, template: str) -> tuple[list[str], list[str]]:
	"""
	Parse the workflow YAML text and find:
	- job ids whose `uses:` field references the template
	- step names whose `uses:` field references the template

	Returns (matching_jobs, matching_steps).
	We do simple line-by-line parsing to avoid a hard PyYAML dependency,
	but still handle indentation correctly.
	"""
	base = _normalize_template(template)
	matched_jobs: list[str] = []
	matched_steps: list[str] = []

	lines = content.splitlines()
	current_job: Optional[str] = None
	current_step: Optional[str] = None
	in_jobs_block = False

	for i, raw_line in enumerate(lines):
	stripped = raw_line.strip()
	indent = len(raw_line) - len(raw_line.lstrip())

	# Detect top-level "jobs:" block
	if raw_line.startswith("jobs:"):
	in_jobs_block = True
	continue

	if not in_jobs_block:
	continue

	# Job-level key: indent == 2
	if indent == 2 and stripped.endswith(":") and not stripped.startswith("-"):
	current_job = stripped[:-1]
	current_step = None
	continue

	# Step name
	if indent == 6 and stripped.startswith("- name:"):
	current_step = stripped[len("- name:"):].strip().strip('"').strip("'")
	continue

	if indent == 6 and stripped.startswith("-") and "name:" not in stripped:
	current_step = f"(unnamed step ~line {i+1})"
	continue

	# Check for `uses:` at job level (reusable workflow)
	if indent == 4 and stripped.startswith("uses:"):
	uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
	if base in uses_value or uses_value in template:
	if current_job and current_job not in matched_jobs:
	matched_jobs.append(current_job)

	# Check for `uses:` at step level (action)
	if indent >= 8 and stripped.startswith("uses:"):
	uses_value = stripped[len("uses:"):].strip().strip('"').strip("'")
	if base in uses_value or uses_value in template:
	label = current_step or f"(unnamed step ~line {i+1})"
	if label not in matched_steps:
	matched_steps.append(label)

	return matched_jobs, matched_steps


	# ---------------------------------------------------------------------------
	# Main search strategies
	# ---------------------------------------------------------------------------

	def search_via_code_search(
	client: GitHubClient,
	template: str,
	org: Optional[str] = None,
	) -> list[WorkflowMatch]:
	"""
	Use GitHub Code Search to find workflow files that reference the template.
	Works across all public repos (or within an org).
	"""
	base = _normalize_template(template)
	# Strip trailing path segments that don't appear literally in YAML
	search_token = base.split("/")[-1] if "/" in base else base

	query_parts = [f'"{search_token}"', "path:.github/workflows", "language:YAML"]
	if org:
	query_parts.append(f"org:{org}")

	query = " ".join(query_parts)
	print(f"[code-search] query: {query}", flush=True)

	items = client._search_code_all(query)
	matches: list[WorkflowMatch] = []

	for item in items:
	raw_url = item.get("git_url") or item.get("url", "")
	html_url = item.get("html_url", "")
	repo_name = item.get("repository", {}).get("full_name", "?")
	file_path = item.get("path", "")

	# Fetch raw content and verify actual usage
	try:
	# Prefer the `download_url` field (direct raw link)
	download_url = item.get("url", "")
	# Use the Contents API style raw URL
	owner, repo_short = repo_name.split("/", 1)
	content = client.get_file_content(owner, repo_short, file_path)
	except Exception as exc:
	print(f" [warn] could not fetch {repo_name}/{file_path}: {exc}", flush=True)
	continue

	jobs, steps = find_usages_in_yaml(content, template)

	# Only include if we confirmed actual usage (or if search hit was strong)
	if jobs or steps or base in content:
	matches.append(WorkflowMatch(
	repo_full_name=repo_name,
	workflow_file=file_path,
	workflow_url=html_url,
	raw_url=item.get("url", ""),
	jobs=jobs,
	steps=steps,
	))

	return matches


	def search_within_repo(
	client: GitHubClient,
	template: str,
	owner: str,
	repo: str,
	) -> list[WorkflowMatch]:
	"""
	Scan all workflow files in a specific repository for the template.
	"""
	print(f"[repo-scan] scanning {owner}/{repo} …", flush=True)
	matches: list[WorkflowMatch] = []

	try:
	workflows = client.list_repo_workflows(owner, repo)
	except requests.HTTPError as exc:
	print(f" [warn] could not list workflows for {owner}/{repo}: {exc}", flush=True)
	return matches

	for wf in workflows:
	path = wf.get("path", "")
	html_url = wf.get("html_url", "")

	try:
	content = client.get_file_content(owner, repo, path)
	except Exception as exc:
	print(f" [warn] {path}: {exc}", flush=True)
	continue

	jobs, steps = find_usages_in_yaml(content, template)
	base = _normalize_template(template)

	if jobs or steps or base in content:
	matches.append(WorkflowMatch(
	repo_full_name=f"{owner}/{repo}",
	workflow_file=path,
	workflow_url=html_url,
	raw_url="",
	jobs=jobs,
	steps=steps,
	))

	return matches


	# ---------------------------------------------------------------------------
	# Output helpers
	# ---------------------------------------------------------------------------

	def print_results(matches: list[WorkflowMatch], template: str) -> None:
	print()
	print("=" * 70)
	print(f" Results: workflows using '{template}'")
	print("=" * 70)

	if not matches:
	print(" No matches found.")
	return

	for m in matches:
	print(f"\n 📁 {m.repo_full_name}")
	print(f" File : {m.workflow_file}")
	print(f" URL : {m.workflow_url}")
	if m.jobs:
	print(f" Jobs : {', '.join(m.jobs)}")
	if m.steps:
	print(f" Steps : {', '.join(m.steps)}")

	print()
	print(f" Total matches: {len(matches)}")
	print("=" * 70)


	def save_results(matches: list[WorkflowMatch], path: str) -> None:
	with open(path, "w", encoding="utf-8") as f:
	json.dump([asdict(m) for m in matches], f, indent=2, ensure_ascii=False)
	print(f"\n[output] Saved {len(matches)} results to '{path}'")


	# ---------------------------------------------------------------------------
	# CLI
	# ---------------------------------------------------------------------------

	def build_parser() -> argparse.ArgumentParser:
	p = argparse.ArgumentParser(
	description="Find GitHub Actions workflows that use a given action/template.",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Find all public repos using actions/setup-python
	python fetch_actions_by_template.py --template "actions/setup-python"

	# Limit search to a specific GitHub org
	python fetch_actions_by_template.py --template "actions/setup-python" --org my-org

	# Scan a single repository
	python fetch_actions_by_template.py --template "actions/checkout@v4" --repo owner/repo

	# Reusable workflow reference
	python fetch_actions_by_template.py \\
	--template "my-org/shared-workflows/.github/workflows/build.yml@main"

	# Save results to JSON
	python fetch_actions_by_template.py --template "actions/setup-python" \\
	--output results.json
	""",
	)
	p.add_argument(
	"--template", "-t",
	required=True,
	help="Action or reusable workflow reference to search for "
	'(e.g. "actions/checkout@v4" or "org/repo/.github/workflows/ci.yml@main").',
	)
	p.add_argument(
	"--token",
	default=os.environ.get("GITHUB_TOKEN"),
	help="GitHub personal access token. Falls back to $GITHUB_TOKEN env var. "
	"Required for private repos and higher rate limits.",
	)
	p.add_argument(
	"--org",
	default=None,
	help="Restrict code search to this GitHub organisation.",
	)
	p.add_argument(
	"--repo",
	default=None,
	metavar="OWNER/REPO",
	help="Scan only this specific repository (owner/repo).",
	)
	p.add_argument(
	"--output", "-o",
	default=None,
	metavar="FILE",
	help="Write results to a JSON file.",
	)
	return p


	def main() -> None:
	parser = build_parser()
	args = parser.parse_args()

	if not args.token:
	print(
	"[warn] No GitHub token provided. "
	"Unauthenticated requests are limited to 10 searches/min and 60 API calls/hr.\n"
	" Pass --token or set the GITHUB_TOKEN environment variable.\n",
	file=sys.stderr,
	)

	client = GitHubClient(token=args.token)
	template = args.template.strip()

	matches: list[WorkflowMatch] = []

	if args.repo:
	# Single-repo scan
	if "/" not in args.repo:
	parser.error("--repo must be in 'owner/repo' format.")
	owner, repo = args.repo.split("/", 1)
	matches = search_within_repo(client, template, owner, repo)
	else:
	# Cross-repo code search (public, or within an org)
	matches = search_via_code_search(client, template, org=args.org)

	print_results(matches, template)

	if args.output:
	save_results(matches, args.output)

	if __name__ == "__main__":
	main()
No results found