Skip to content

Instantly share code, notes, and snippets.

@eoin-obrien
Last active April 30, 2025 18:18
Show Gist options
  • Select an option

  • Save eoin-obrien/44f0cda1d85b4a5b2261a92a0d0ffab2 to your computer and use it in GitHub Desktop.

Select an option

Save eoin-obrien/44f0cda1d85b4a5b2261a92a0d0ffab2 to your computer and use it in GitHub Desktop.
Generate WinCompose/XCompose entries for Unicode Mathematical Alphanumeric Symbols — including Latin letters, digits, and Greek characters in styles like bold, italic, script, fraktur, double-struck, sans-serif, and monospace. Includes full Unicode name comments, corrects for missing codepoints using Letterlike Symbols (e.g., ℝ, ℎ, ℯ), and emits…
# -*- coding: utf-8 -*-
"""
generate_compose_math.py
This script generates WinCompose-compatible entries for all Unicode Mathematical
Alphanumeric Symbols including Latin letters, digits, and Greek letters in various styles
such as bold, italic, script, fraktur, double-struck, sans-serif, and monospace.
It also handles special cases:
- Italic/script 'h' → ℎ (U+210E)
- Script lowercase e/g/o from the Letterlike Symbols block
- Uppercase script/fraktur/double-struck backfills
Author: Eoin O'Brien
"""
import unicodedata
from typing import Dict, Optional, Tuple
# Style definitions: (Latin base, style label, digit base, Greek capital base, Greek small base)
style_defs: Dict[str, Tuple[int, str, Optional[int], Optional[int], Optional[int]]] = {
"b": (0x1D400, "Mathematical Bold", 0x1D7CE, 0x1D6A8, 0x1D6C2),
"i": (0x1D434, "Mathematical Italic", None, 0x1D6E2, 0x1D6FC),
"bi": (0x1D468, "Bold Italic", None, 0x1D71C, 0x1D736),
"s": (0x1D49C, "Script", None, None, None),
"bs": (0x1D4D0, "Bold Script", None, None, None),
"f": (0x1D504, "Fraktur", None, None, None),
"d": (0x1D538, "Double-struck", 0x1D7D8, None, None),
"ss": (0x1D5A0, "Sans-serif", 0x1D7E2, None, None),
"bss": (0x1D5D4, "Bold Sans-serif", 0x1D7EC, 0x1D756, 0x1D770),
"iss": (0x1D608, "Italic Sans-serif", None, None, None),
"biss": (0x1D63C, "Bold Italic Sans-serif", None, None, None),
"m": (0x1D670, "Monospace", 0x1D7F6, None, None),
}
# Manual backfills for missing characters
backfill_map: Dict[Tuple[str, str], int] = {
# Script uppercase
("s", "B"): 0x212C,
("s", "E"): 0x2130,
("s", "F"): 0x2131,
("s", "H"): 0x210B,
("s", "I"): 0x2110,
("s", "L"): 0x2112,
("s", "M"): 0x2133,
("s", "R"): 0x211B,
# Script lowercase
("s", "e"): 0x212F,
("s", "g"): 0x210A,
("s", "o"): 0x2134,
("i", "h"): 0x210E, # Planck constant (italic)
# Fraktur uppercase
("f", "C"): 0x212D,
("f", "H"): 0x210C,
("f", "I"): 0x2111,
("f", "R"): 0x211C,
("f", "Z"): 0x2128,
# Double-struck uppercase
("d", "C"): 0x2102,
("d", "H"): 0x210D,
("d", "N"): 0x2115,
("d", "P"): 0x2119,
("d", "Q"): 0x211A,
("d", "R"): 0x211D,
("d", "Z"): 0x2124,
}
def print_header(label: str) -> None:
"""Print a section header for the style."""
print(f"# --- {label} ---")
def emit_line(compose_prefix: str, key: str, codepoint: int) -> None:
"""Print a single WinCompose line with character name."""
char = chr(codepoint)
name = unicodedata.name(char)
print(f'<Multi_key> <{compose_prefix}> <{key}> : "{char}" # {name}')
def generate_range(
unicode_base: int,
compose_prefix: str,
label: str,
digit_base: Optional[int],
greek_upper_base: Optional[int],
greek_lower_base: Optional[int],
) -> None:
"""
Generate mappings for Latin, digit, Greek letters.
Includes backfills for gaps in Script, Fraktur, Double-struck, and special cases.
"""
print_header(label)
for i in range(26):
upper = chr(0x41 + i)
lower = chr(0x61 + i)
upper_cp = unicode_base + i
lower_cp = unicode_base + 26 + i
# Use backfill if available
upper_final = backfill_map.get((compose_prefix, upper), upper_cp)
lower_final = backfill_map.get((compose_prefix, lower), lower_cp)
emit_line(compose_prefix, upper, upper_final)
emit_line(compose_prefix, lower, lower_final)
if digit_base is not None:
for i in range(10):
emit_line(compose_prefix, str(i), digit_base + i)
greek_upper = "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
greek_lower = "αβγδεζηθικλμνξοπρστυφχψω"
if greek_upper_base is not None:
for i, g in enumerate(greek_upper):
emit_line(compose_prefix, g, greek_upper_base + i)
if greek_lower_base is not None:
for i, g in enumerate(greek_lower):
emit_line(compose_prefix, g, greek_lower_base + i)
print("")
def main() -> None:
"""Main loop that processes each defined style."""
for style_key, (
latin_base,
label,
digit_base,
greek_u_base,
greek_l_base,
) in style_defs.items():
generate_range(
latin_base, style_key, label, digit_base, greek_u_base, greek_l_base
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment