Last active
April 30, 2025 18:18
-
-
Save eoin-obrien/44f0cda1d85b4a5b2261a92a0d0ffab2 to your computer and use it in GitHub Desktop.
Generate WinCompose/XCompose entries for Unicode Mathematical Alphanumeric Symbols — including Latin letters, digits, and Greek characters in styles like bold, italic, script, fraktur, double-struck, sans-serif, and monospace. Includes full Unicode name comments, corrects for missing codepoints using Letterlike Symbols (e.g., ℝ, ℎ, ℯ), and emits…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| generate_compose_math.py | |
| This script generates WinCompose-compatible entries for all Unicode Mathematical | |
| Alphanumeric Symbols including Latin letters, digits, and Greek letters in various styles | |
| such as bold, italic, script, fraktur, double-struck, sans-serif, and monospace. | |
| It also handles special cases: | |
| - Italic/script 'h' → ℎ (U+210E) | |
| - Script lowercase e/g/o from the Letterlike Symbols block | |
| - Uppercase script/fraktur/double-struck backfills | |
| Author: Eoin O'Brien | |
| """ | |
| import unicodedata | |
| from typing import Dict, Optional, Tuple | |
| # Style definitions: (Latin base, style label, digit base, Greek capital base, Greek small base) | |
| style_defs: Dict[str, Tuple[int, str, Optional[int], Optional[int], Optional[int]]] = { | |
| "b": (0x1D400, "Mathematical Bold", 0x1D7CE, 0x1D6A8, 0x1D6C2), | |
| "i": (0x1D434, "Mathematical Italic", None, 0x1D6E2, 0x1D6FC), | |
| "bi": (0x1D468, "Bold Italic", None, 0x1D71C, 0x1D736), | |
| "s": (0x1D49C, "Script", None, None, None), | |
| "bs": (0x1D4D0, "Bold Script", None, None, None), | |
| "f": (0x1D504, "Fraktur", None, None, None), | |
| "d": (0x1D538, "Double-struck", 0x1D7D8, None, None), | |
| "ss": (0x1D5A0, "Sans-serif", 0x1D7E2, None, None), | |
| "bss": (0x1D5D4, "Bold Sans-serif", 0x1D7EC, 0x1D756, 0x1D770), | |
| "iss": (0x1D608, "Italic Sans-serif", None, None, None), | |
| "biss": (0x1D63C, "Bold Italic Sans-serif", None, None, None), | |
| "m": (0x1D670, "Monospace", 0x1D7F6, None, None), | |
| } | |
| # Manual backfills for missing characters | |
| backfill_map: Dict[Tuple[str, str], int] = { | |
| # Script uppercase | |
| ("s", "B"): 0x212C, | |
| ("s", "E"): 0x2130, | |
| ("s", "F"): 0x2131, | |
| ("s", "H"): 0x210B, | |
| ("s", "I"): 0x2110, | |
| ("s", "L"): 0x2112, | |
| ("s", "M"): 0x2133, | |
| ("s", "R"): 0x211B, | |
| # Script lowercase | |
| ("s", "e"): 0x212F, | |
| ("s", "g"): 0x210A, | |
| ("s", "o"): 0x2134, | |
| ("i", "h"): 0x210E, # Planck constant (italic) | |
| # Fraktur uppercase | |
| ("f", "C"): 0x212D, | |
| ("f", "H"): 0x210C, | |
| ("f", "I"): 0x2111, | |
| ("f", "R"): 0x211C, | |
| ("f", "Z"): 0x2128, | |
| # Double-struck uppercase | |
| ("d", "C"): 0x2102, | |
| ("d", "H"): 0x210D, | |
| ("d", "N"): 0x2115, | |
| ("d", "P"): 0x2119, | |
| ("d", "Q"): 0x211A, | |
| ("d", "R"): 0x211D, | |
| ("d", "Z"): 0x2124, | |
| } | |
| def print_header(label: str) -> None: | |
| """Print a section header for the style.""" | |
| print(f"# --- {label} ---") | |
| def emit_line(compose_prefix: str, key: str, codepoint: int) -> None: | |
| """Print a single WinCompose line with character name.""" | |
| char = chr(codepoint) | |
| name = unicodedata.name(char) | |
| print(f'<Multi_key> <{compose_prefix}> <{key}> : "{char}" # {name}') | |
| def generate_range( | |
| unicode_base: int, | |
| compose_prefix: str, | |
| label: str, | |
| digit_base: Optional[int], | |
| greek_upper_base: Optional[int], | |
| greek_lower_base: Optional[int], | |
| ) -> None: | |
| """ | |
| Generate mappings for Latin, digit, Greek letters. | |
| Includes backfills for gaps in Script, Fraktur, Double-struck, and special cases. | |
| """ | |
| print_header(label) | |
| for i in range(26): | |
| upper = chr(0x41 + i) | |
| lower = chr(0x61 + i) | |
| upper_cp = unicode_base + i | |
| lower_cp = unicode_base + 26 + i | |
| # Use backfill if available | |
| upper_final = backfill_map.get((compose_prefix, upper), upper_cp) | |
| lower_final = backfill_map.get((compose_prefix, lower), lower_cp) | |
| emit_line(compose_prefix, upper, upper_final) | |
| emit_line(compose_prefix, lower, lower_final) | |
| if digit_base is not None: | |
| for i in range(10): | |
| emit_line(compose_prefix, str(i), digit_base + i) | |
| greek_upper = "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ" | |
| greek_lower = "αβγδεζηθικλμνξοπρστυφχψω" | |
| if greek_upper_base is not None: | |
| for i, g in enumerate(greek_upper): | |
| emit_line(compose_prefix, g, greek_upper_base + i) | |
| if greek_lower_base is not None: | |
| for i, g in enumerate(greek_lower): | |
| emit_line(compose_prefix, g, greek_lower_base + i) | |
| print("") | |
| def main() -> None: | |
| """Main loop that processes each defined style.""" | |
| for style_key, ( | |
| latin_base, | |
| label, | |
| digit_base, | |
| greek_u_base, | |
| greek_l_base, | |
| ) in style_defs.items(): | |
| generate_range( | |
| latin_base, style_key, label, digit_base, greek_u_base, greek_l_base | |
| ) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment