Created
March 6, 2026 08:46
-
-
Save rsgranne/fa63973d69a51692262244f0bd07a6b2 to your computer and use it in GitHub Desktop.
Extract the WHATWG HTML attribute index table and count Boolean attributes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Count Boolean attributes in the WHATWG HTML attribute index | |
| # | |
| # License: CC0 / Public Domain | |
| # https://creativecommons.org/publicdomain/zero/1.0/ | |
| # You may use, modify, and redistribute this script without restriction. | |
| # | |
| # Source table: | |
| # https://html.spec.whatwg.org/multipage/indices.html#attributes-1 | |
| # | |
| # Requirements: | |
| # Python 3 | |
| # beautifulsoup4 | |
| # lxml | |
| # | |
| # This script: | |
| # 1. Downloads the WHATWG attribute index page | |
| # 2. Extracts the table with id="attributes-1" | |
| # 3. Writes the table to CSV for verification | |
| # 4. Counts how many attributes are Boolean | |
| curl -L -o /tmp/indices.html https://html.spec.whatwg.org/multipage/indices.html | |
| python3 <<'PY' | |
| from bs4 import BeautifulSoup | |
| # Parse the downloaded HTML | |
| with open("/tmp/indices.html", "r", encoding="utf-8") as f: | |
| soup = BeautifulSoup(f, "lxml") | |
| # Locate the attribute index table | |
| table = soup.find("table", id="attributes-1") | |
| boolean_count = 0 | |
| # Export table to CSV for verification | |
| with open("/tmp/attributes-1.csv", "w", encoding="utf-8") as out: | |
| for row in table.find_all("tr"): | |
| cells = [c.get_text(" ", strip=True) for c in row.find_all(["th","td"])] | |
| if cells: | |
| out.write(",".join('"' + c.replace('"','""') + '"' for c in cells) + "\n") | |
| # Value column is the 4th column | |
| if len(cells) >= 4 and "boolean" in cells[3].lower(): | |
| boolean_count += 1 | |
| print("Boolean attributes:", boolean_count) | |
| print("CSV exported to: /tmp/attributes-1.csv") | |
| PY |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment