tadeokondrak/gen.py

## gen.py
#!/usr/bin/env python3

import json
import sys
import re

INITIALS = [
    {
        "s": "S-",
        "ʃ": "SH-",
        "v": "SR-",
        "ʒ": "SH-",
        "ʤ": "SKWR-",
        "ɛksp": "SP-",
        "z": "STKPW-",
    },
    {
        "t": "T-",
        "ð": "TH-",
        "d": "TK-",
        "f": "TP-",
        "θ": "TH-",
        "n": "TPH-",
        "g": "TKPW-",
    },
    {
        "k": "K-",
        "ʧ": "KH-",
        "j": "KWR-",
    },
    {
        "p": "P-",
        "b": "PW-",
        "m": "PH-",
    },
    {
        "w": "W-",
    },
    {
        "h": "H-",
        "l": "HR-",
    },
    {
        "r": "R-",
    },
]

VOWELS = [
    {
        "ɑr": "AR",
        "ər": "UR",
        "ɛr": "ER",
        "iər": "EUR",
        "ɔr": "OR",
        "oʊər": "OER",
        "æ": "A",
        "aɪ": "AOEU",
        "aʊ": "OU",
        "ɑ": "O",
        "eɪ": "AEU",
        "ə": "U",
        "ʌ": "U",
        "ɛ": "E",
        "i": "AOE",
        "ɪ": "EU",
        "ju": "AO*U",
        "oʊ": "OE",
        "ɔɪ": "OEU",
        "u": "AOU",
        "ʊ": "AO",
    },
]

NULL_VOWELS = [
    {
        "ɑr": "-R",
        "ər": "-R",
        "ɛr": "-R",
        "ir": "-R",
        "ɔr": "-R",
        "oʊər": "-R",
        "æ": "",
        "aɪ": "",
        "aʊ": "",
        "ɑ": "",
        "eɪ": "",
        "ə": "",
        "ʌ": "",
        "ɛ": "",
        "i": "",
        "ɪ": "",
        "ju": "",
        "oʊ": "",
        "ɔɪ": "",
        "u": "",
        "ʊ": "",
    },
]

FINALS = [
    {
        "f": "-F",
        "v": "-F",
        "ʧ": "-FP",
        "nʧ": "-FRPB",
    },
    {
        "r": "-R",
        "ʃ": "-RB",
        "ʒ": "-RB",
    },
    {
        "p": "-P",
        "n": "-PB",
        "ŋ": "-PBG",
        "m": "-PL",
        "nʤ": "-PBG",
        "nʤ": "-PBLG",
    },
    {
        "b": "-B",
        "k": "-BG",
        "kʃən": "-BGS",
    },
    {
        "l": "-L",
        "lk": "-LG",
    },
    {
        "g": "-G",
        "ʃən": "-GS",
    },
    {
        "t": "-T",
    },
    {
        "s": "-S",
        "z": "-S",
    },
    {
        "d": "-D",
    },
    {
        "z": "-Z",
    },
]

def clean_extra_hyphens(stroke, is_asterisk=False):
    if stroke.startswith("-"):
        hyphen = "*" if is_asterisk else "-"
        return f"{hyphen}{stroke.replace('-', '')}"
    if stroke.endswith("-"):
        hyphen = "*" if is_asterisk else ""
        return f"{stroke.replace('-', '')}{hyphen}"

    if any(c in stroke for c in "AOEU"):
        stroke = stroke.replace("-", "")
        if not is_asterisk:
            return stroke
        match = re.match(r"([^AOEU]*[AO]*)([EU]*[^AOEU]*)", stroke)
        assert match
        return f"{match[1]}*{match[2]}"

    # Turns "-" into "" and "--" into "-"
    stroke = re.sub(r"-([^-])", fr"\1", stroke)
    return stroke

print(f"Generating {sys.argv[2]} from {sys.argv[1]}...\n")
with open(sys.argv[1], "r") as f:
    src = json.load(f)

dst = {}

for ipa, spelling in src.items():
    is_asterisk = ipa.endswith("*")
    if is_asterisk:
        ipa = ipa[:-1]
    original_ipa = ipa
    steno = []
    omit_vowels = False
    is_first_stroke = True
    while ipa:
        is_stroke_asterisk = (is_asterisk and is_first_stroke)
        if ipa.startswith("*"):
            ipa = ipa[1:]
            is_stroke_asterisk = True
        if ipa.startswith(" "):
            ipa = ipa[1:]
            omit_vowels = False
            is_stroke_asterisk = True
        cur_stroke = ""
        if omit_vowels:
            mapping = sum([INITIALS, NULL_VOWELS, FINALS], [])
        else:
            mapping = sum([INITIALS, VOWELS, FINALS], [])
        for stage in mapping:
            if ipa.startswith("."):
                ipa = ipa[1:]
                break
            matches = list(filter(lambda item: ipa.startswith(item[0]), stage.items()))
            matches.sort(key=lambda item: len(item[0]), reverse=True)
            if matches:
                mapping_ipa, mapping_steno = matches[0]
                ipa = ipa[len(mapping_ipa):]
                cur_stroke += mapping_steno
                continue
        if not cur_stroke:
            continue
        steno.append(clean_extra_hyphens(cur_stroke, is_asterisk=is_stroke_asterisk))
        omit_vowels = True
        is_first_stroke = False
    assert not ipa, f"{original_ipa} {spelling}"
    if not steno:
        assert False
        continue
    outline = "/".join(steno)
    print(f"{spelling:<20}{original_ipa:<20}\t{outline}")
    dst[outline] = spelling

print()

with open(sys.argv[2], "w") as f:
    src = json.dump(dst, f, indent=0)
	#!/usr/bin/env python3

	import json
	import sys
	import re

	INITIALS = [
	{
	"s": "S-",
	"ʃ": "SH-",
	"v": "SR-",
	"ʒ": "SH-",
	"ʤ": "SKWR-",
	"ɛksp": "SP-",
	"z": "STKPW-",
	},
	{
	"t": "T-",
	"ð": "TH-",
	"d": "TK-",
	"f": "TP-",
	"θ": "TH-",
	"n": "TPH-",
	"g": "TKPW-",
	},
	{
	"k": "K-",
	"ʧ": "KH-",
	"j": "KWR-",
	},
	{
	"p": "P-",
	"b": "PW-",
	"m": "PH-",
	},
	{
	"w": "W-",
	},
	{
	"h": "H-",
	"l": "HR-",
	},
	{
	"r": "R-",
	},
	]

	VOWELS = [
	{
	"ɑr": "AR",
	"ər": "UR",
	"ɛr": "ER",
	"iər": "EUR",
	"ɔr": "OR",
	"oʊər": "OER",
	"æ": "A",
	"aɪ": "AOEU",
	"aʊ": "OU",
	"ɑ": "O",
	"eɪ": "AEU",
	"ə": "U",
	"ʌ": "U",
	"ɛ": "E",
	"i": "AOE",
	"ɪ": "EU",
	"ju": "AO*U",
	"oʊ": "OE",
	"ɔɪ": "OEU",
	"u": "AOU",
	"ʊ": "AO",
	},
	]

	NULL_VOWELS = [
	{
	"ɑr": "-R",
	"ər": "-R",
	"ɛr": "-R",
	"ir": "-R",
	"ɔr": "-R",
	"oʊər": "-R",
	"æ": "",
	"aɪ": "",
	"aʊ": "",
	"ɑ": "",
	"eɪ": "",
	"ə": "",
	"ʌ": "",
	"ɛ": "",
	"i": "",
	"ɪ": "",
	"ju": "",
	"oʊ": "",
	"ɔɪ": "",
	"u": "",
	"ʊ": "",
	},
	]

	FINALS = [
	{
	"f": "-F",
	"v": "-F",
	"ʧ": "-FP",
	"nʧ": "-FRPB",
	},
	{
	"r": "-R",
	"ʃ": "-RB",
	"ʒ": "-RB",
	},
	{
	"p": "-P",
	"n": "-PB",
	"ŋ": "-PBG",
	"m": "-PL",
	"nʤ": "-PBG",
	"nʤ": "-PBLG",
	},
	{
	"b": "-B",
	"k": "-BG",
	"kʃən": "-BGS",
	},
	{
	"l": "-L",
	"lk": "-LG",
	},
	{
	"g": "-G",
	"ʃən": "-GS",
	},
	{
	"t": "-T",
	},
	{
	"s": "-S",
	"z": "-S",
	},
	{
	"d": "-D",
	},
	{
	"z": "-Z",
	},
	]

	def clean_extra_hyphens(stroke, is_asterisk=False):
	if stroke.startswith("-"):
	hyphen = "*" if is_asterisk else "-"
	return f"{hyphen}{stroke.replace('-', '')}"
	if stroke.endswith("-"):
	hyphen = "*" if is_asterisk else ""
	return f"{stroke.replace('-', '')}{hyphen}"

	if any(c in stroke for c in "AOEU"):
	stroke = stroke.replace("-", "")
	if not is_asterisk:
	return stroke
	match = re.match(r"([^AOEU][AO])([EU][^AOEU])", stroke)
	assert match
	return f"{match[1]}*{match[2]}"

	# Turns "-" into "" and "--" into "-"
	stroke = re.sub(r"-([^-])", fr"\1", stroke)
	return stroke

	print(f"Generating {sys.argv[2]} from {sys.argv[1]}...\n")
	with open(sys.argv[1], "r") as f:
	src = json.load(f)

	dst = {}

	for ipa, spelling in src.items():
	is_asterisk = ipa.endswith("*")
	if is_asterisk:
	ipa = ipa[:-1]
	original_ipa = ipa
	steno = []
	omit_vowels = False
	is_first_stroke = True
	while ipa:
	is_stroke_asterisk = (is_asterisk and is_first_stroke)
	if ipa.startswith("*"):
	ipa = ipa[1:]
	is_stroke_asterisk = True
	if ipa.startswith(" "):
	ipa = ipa[1:]
	omit_vowels = False
	is_stroke_asterisk = True
	cur_stroke = ""
	if omit_vowels:
	mapping = sum([INITIALS, NULL_VOWELS, FINALS], [])
	else:
	mapping = sum([INITIALS, VOWELS, FINALS], [])
	for stage in mapping:
	if ipa.startswith("."):
	ipa = ipa[1:]
	break
	matches = list(filter(lambda item: ipa.startswith(item[0]), stage.items()))
	matches.sort(key=lambda item: len(item[0]), reverse=True)
	if matches:
	mapping_ipa, mapping_steno = matches[0]
	ipa = ipa[len(mapping_ipa):]
	cur_stroke += mapping_steno
	continue
	if not cur_stroke:
	continue
	steno.append(clean_extra_hyphens(cur_stroke, is_asterisk=is_stroke_asterisk))
	omit_vowels = True
	is_first_stroke = False
	assert not ipa, f"{original_ipa} {spelling}"
	if not steno:
	assert False
	continue
	outline = "/".join(steno)
	print(f"{spelling:<20}{original_ipa:<20}\t{outline}")
	dst[outline] = spelling

	print()

	with open(sys.argv[2], "w") as f:
	src = json.dump(dst, f, indent=0)
No results found