Skip to content

Instantly share code, notes, and snippets.

@tadeokondrak
Created October 9, 2022 05:59
Show Gist options
  • Select an option

  • Save tadeokondrak/fcf64d376b0e99d6893e86c7f1fbc658 to your computer and use it in GitHub Desktop.

Select an option

Save tadeokondrak/fcf64d376b0e99d6893e86c7f1fbc658 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import json
import sys
import re
INITIALS = [
{
"s": "S-",
"ʃ": "SH-",
"v": "SR-",
"ʒ": "SH-",
"ʤ": "SKWR-",
"ɛksp": "SP-",
"z": "STKPW-",
},
{
"t": "T-",
"ð": "TH-",
"d": "TK-",
"f": "TP-",
"θ": "TH-",
"n": "TPH-",
"g": "TKPW-",
},
{
"k": "K-",
"ʧ": "KH-",
"j": "KWR-",
},
{
"p": "P-",
"b": "PW-",
"m": "PH-",
},
{
"w": "W-",
},
{
"h": "H-",
"l": "HR-",
},
{
"r": "R-",
},
]
VOWELS = [
{
"ɑr": "AR",
"ər": "UR",
"ɛr": "ER",
"iər": "EUR",
"ɔr": "OR",
"oʊər": "OER",
"æ": "A",
"aɪ": "AOEU",
"aʊ": "OU",
"ɑ": "O",
"eɪ": "AEU",
"ə": "U",
"ʌ": "U",
"ɛ": "E",
"i": "AOE",
"ɪ": "EU",
"ju": "AO*U",
"oʊ": "OE",
"ɔɪ": "OEU",
"u": "AOU",
"ʊ": "AO",
},
]
NULL_VOWELS = [
{
"ɑr": "-R",
"ər": "-R",
"ɛr": "-R",
"ir": "-R",
"ɔr": "-R",
"oʊər": "-R",
"æ": "",
"aɪ": "",
"aʊ": "",
"ɑ": "",
"eɪ": "",
"ə": "",
"ʌ": "",
"ɛ": "",
"i": "",
"ɪ": "",
"ju": "",
"oʊ": "",
"ɔɪ": "",
"u": "",
"ʊ": "",
},
]
FINALS = [
{
"f": "-F",
"v": "-F",
"ʧ": "-FP",
"nʧ": "-FRPB",
},
{
"r": "-R",
"ʃ": "-RB",
"ʒ": "-RB",
},
{
"p": "-P",
"n": "-PB",
"ŋ": "-PBG",
"m": "-PL",
"nʤ": "-PBG",
"nʤ": "-PBLG",
},
{
"b": "-B",
"k": "-BG",
"kʃən": "-BGS",
},
{
"l": "-L",
"lk": "-LG",
},
{
"g": "-G",
"ʃən": "-GS",
},
{
"t": "-T",
},
{
"s": "-S",
"z": "-S",
},
{
"d": "-D",
},
{
"z": "-Z",
},
]
def clean_extra_hyphens(stroke, is_asterisk=False):
if stroke.startswith("-"):
hyphen = "*" if is_asterisk else "-"
return f"{hyphen}{stroke.replace('-', '')}"
if stroke.endswith("-"):
hyphen = "*" if is_asterisk else ""
return f"{stroke.replace('-', '')}{hyphen}"
if any(c in stroke for c in "AOEU"):
stroke = stroke.replace("-", "")
if not is_asterisk:
return stroke
match = re.match(r"([^AOEU]*[AO]*)([EU]*[^AOEU]*)", stroke)
assert match
return f"{match[1]}*{match[2]}"
# Turns "-" into "" and "--" into "-"
stroke = re.sub(r"-([^-])", fr"\1", stroke)
return stroke
print(f"Generating {sys.argv[2]} from {sys.argv[1]}...\n")
with open(sys.argv[1], "r") as f:
src = json.load(f)
dst = {}
for ipa, spelling in src.items():
is_asterisk = ipa.endswith("*")
if is_asterisk:
ipa = ipa[:-1]
original_ipa = ipa
steno = []
omit_vowels = False
is_first_stroke = True
while ipa:
is_stroke_asterisk = (is_asterisk and is_first_stroke)
if ipa.startswith("*"):
ipa = ipa[1:]
is_stroke_asterisk = True
if ipa.startswith(" "):
ipa = ipa[1:]
omit_vowels = False
is_stroke_asterisk = True
cur_stroke = ""
if omit_vowels:
mapping = sum([INITIALS, NULL_VOWELS, FINALS], [])
else:
mapping = sum([INITIALS, VOWELS, FINALS], [])
for stage in mapping:
if ipa.startswith("."):
ipa = ipa[1:]
break
matches = list(filter(lambda item: ipa.startswith(item[0]), stage.items()))
matches.sort(key=lambda item: len(item[0]), reverse=True)
if matches:
mapping_ipa, mapping_steno = matches[0]
ipa = ipa[len(mapping_ipa):]
cur_stroke += mapping_steno
continue
if not cur_stroke:
continue
steno.append(clean_extra_hyphens(cur_stroke, is_asterisk=is_stroke_asterisk))
omit_vowels = True
is_first_stroke = False
assert not ipa, f"{original_ipa} {spelling}"
if not steno:
assert False
continue
outline = "/".join(steno)
print(f"{spelling:<20}{original_ipa:<20}\t{outline}")
dst[outline] = spelling
print()
with open(sys.argv[2], "w") as f:
src = json.dump(dst, f, indent=0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment