Created
October 9, 2022 05:59
-
-
Save tadeokondrak/fcf64d376b0e99d6893e86c7f1fbc658 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import json | |
| import sys | |
| import re | |
| INITIALS = [ | |
| { | |
| "s": "S-", | |
| "ʃ": "SH-", | |
| "v": "SR-", | |
| "ʒ": "SH-", | |
| "ʤ": "SKWR-", | |
| "ɛksp": "SP-", | |
| "z": "STKPW-", | |
| }, | |
| { | |
| "t": "T-", | |
| "ð": "TH-", | |
| "d": "TK-", | |
| "f": "TP-", | |
| "θ": "TH-", | |
| "n": "TPH-", | |
| "g": "TKPW-", | |
| }, | |
| { | |
| "k": "K-", | |
| "ʧ": "KH-", | |
| "j": "KWR-", | |
| }, | |
| { | |
| "p": "P-", | |
| "b": "PW-", | |
| "m": "PH-", | |
| }, | |
| { | |
| "w": "W-", | |
| }, | |
| { | |
| "h": "H-", | |
| "l": "HR-", | |
| }, | |
| { | |
| "r": "R-", | |
| }, | |
| ] | |
| VOWELS = [ | |
| { | |
| "ɑr": "AR", | |
| "ər": "UR", | |
| "ɛr": "ER", | |
| "iər": "EUR", | |
| "ɔr": "OR", | |
| "oʊər": "OER", | |
| "æ": "A", | |
| "aɪ": "AOEU", | |
| "aʊ": "OU", | |
| "ɑ": "O", | |
| "eɪ": "AEU", | |
| "ə": "U", | |
| "ʌ": "U", | |
| "ɛ": "E", | |
| "i": "AOE", | |
| "ɪ": "EU", | |
| "ju": "AO*U", | |
| "oʊ": "OE", | |
| "ɔɪ": "OEU", | |
| "u": "AOU", | |
| "ʊ": "AO", | |
| }, | |
| ] | |
| NULL_VOWELS = [ | |
| { | |
| "ɑr": "-R", | |
| "ər": "-R", | |
| "ɛr": "-R", | |
| "ir": "-R", | |
| "ɔr": "-R", | |
| "oʊər": "-R", | |
| "æ": "", | |
| "aɪ": "", | |
| "aʊ": "", | |
| "ɑ": "", | |
| "eɪ": "", | |
| "ə": "", | |
| "ʌ": "", | |
| "ɛ": "", | |
| "i": "", | |
| "ɪ": "", | |
| "ju": "", | |
| "oʊ": "", | |
| "ɔɪ": "", | |
| "u": "", | |
| "ʊ": "", | |
| }, | |
| ] | |
| FINALS = [ | |
| { | |
| "f": "-F", | |
| "v": "-F", | |
| "ʧ": "-FP", | |
| "nʧ": "-FRPB", | |
| }, | |
| { | |
| "r": "-R", | |
| "ʃ": "-RB", | |
| "ʒ": "-RB", | |
| }, | |
| { | |
| "p": "-P", | |
| "n": "-PB", | |
| "ŋ": "-PBG", | |
| "m": "-PL", | |
| "nʤ": "-PBG", | |
| "nʤ": "-PBLG", | |
| }, | |
| { | |
| "b": "-B", | |
| "k": "-BG", | |
| "kʃən": "-BGS", | |
| }, | |
| { | |
| "l": "-L", | |
| "lk": "-LG", | |
| }, | |
| { | |
| "g": "-G", | |
| "ʃən": "-GS", | |
| }, | |
| { | |
| "t": "-T", | |
| }, | |
| { | |
| "s": "-S", | |
| "z": "-S", | |
| }, | |
| { | |
| "d": "-D", | |
| }, | |
| { | |
| "z": "-Z", | |
| }, | |
| ] | |
| def clean_extra_hyphens(stroke, is_asterisk=False): | |
| if stroke.startswith("-"): | |
| hyphen = "*" if is_asterisk else "-" | |
| return f"{hyphen}{stroke.replace('-', '')}" | |
| if stroke.endswith("-"): | |
| hyphen = "*" if is_asterisk else "" | |
| return f"{stroke.replace('-', '')}{hyphen}" | |
| if any(c in stroke for c in "AOEU"): | |
| stroke = stroke.replace("-", "") | |
| if not is_asterisk: | |
| return stroke | |
| match = re.match(r"([^AOEU]*[AO]*)([EU]*[^AOEU]*)", stroke) | |
| assert match | |
| return f"{match[1]}*{match[2]}" | |
| # Turns "-" into "" and "--" into "-" | |
| stroke = re.sub(r"-([^-])", fr"\1", stroke) | |
| return stroke | |
| print(f"Generating {sys.argv[2]} from {sys.argv[1]}...\n") | |
| with open(sys.argv[1], "r") as f: | |
| src = json.load(f) | |
| dst = {} | |
| for ipa, spelling in src.items(): | |
| is_asterisk = ipa.endswith("*") | |
| if is_asterisk: | |
| ipa = ipa[:-1] | |
| original_ipa = ipa | |
| steno = [] | |
| omit_vowels = False | |
| is_first_stroke = True | |
| while ipa: | |
| is_stroke_asterisk = (is_asterisk and is_first_stroke) | |
| if ipa.startswith("*"): | |
| ipa = ipa[1:] | |
| is_stroke_asterisk = True | |
| if ipa.startswith(" "): | |
| ipa = ipa[1:] | |
| omit_vowels = False | |
| is_stroke_asterisk = True | |
| cur_stroke = "" | |
| if omit_vowels: | |
| mapping = sum([INITIALS, NULL_VOWELS, FINALS], []) | |
| else: | |
| mapping = sum([INITIALS, VOWELS, FINALS], []) | |
| for stage in mapping: | |
| if ipa.startswith("."): | |
| ipa = ipa[1:] | |
| break | |
| matches = list(filter(lambda item: ipa.startswith(item[0]), stage.items())) | |
| matches.sort(key=lambda item: len(item[0]), reverse=True) | |
| if matches: | |
| mapping_ipa, mapping_steno = matches[0] | |
| ipa = ipa[len(mapping_ipa):] | |
| cur_stroke += mapping_steno | |
| continue | |
| if not cur_stroke: | |
| continue | |
| steno.append(clean_extra_hyphens(cur_stroke, is_asterisk=is_stroke_asterisk)) | |
| omit_vowels = True | |
| is_first_stroke = False | |
| assert not ipa, f"{original_ipa} {spelling}" | |
| if not steno: | |
| assert False | |
| continue | |
| outline = "/".join(steno) | |
| print(f"{spelling:<20}{original_ipa:<20}\t{outline}") | |
| dst[outline] = spelling | |
| print() | |
| with open(sys.argv[2], "w") as f: | |
| src = json.dump(dst, f, indent=0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment