Created
September 15, 2025 04:16
-
-
Save noaione/633d002db22891c6c818389c275fc961 to your computer and use it in GitHub Desktop.
quick and simple ttml to lrc parser in python, with transliteration replacement support
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Needs Python 3.10+ | |
| Usage: | |
| ```py | |
| from ttml_parser import ttml_to_lrc | |
| ttml_data = "<tt>...</tt>" | |
| lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"}); | |
| # If you have transliterations and want to use them: | |
| # lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"}, use_transliteration=True) | |
| print(lrc_data) | |
| ``` | |
| """ | |
| import re | |
| import xml.etree.ElementTree as ET | |
| from typing import Any | |
| NS = { | |
| "tt": "http://www.w3.org/ns/ttml", | |
| "itunes": "http://music.apple.com/lyric-ttml-internal", | |
| "ttm": "http://www.w3.org/ns/ttml#metadata", | |
| } | |
| TIME_RE = re.compile(r"^(?:(\d+):)?(\d+)(?:\.(\d+))?$") | |
| MIN_GAP_SEC = 0.01 # Minimal gap between consecutive lyric timestamps (seconds) | |
| MIN_DURATION_SEC = 0.01 # Minimal duration after shifting (for internal adjustment) | |
| def parse_time(t: str) -> float: | |
| m = TIME_RE.match(t.strip()) | |
| if not m: | |
| raise ValueError(f"Unrecognized time format: {t}") | |
| minutes = int(m.group(1)) if m.group(1) else 0 | |
| seconds = int(m.group(2)) | |
| frac = m.group(3) or "0" | |
| if len(frac) == 1: | |
| frac_val = int(frac) / 10 | |
| elif len(frac) == 2: | |
| frac_val = int(frac) / 100 | |
| else: | |
| frac_val = int(frac[:3]) / 1000 | |
| return minutes * 60 + seconds + frac_val | |
| def normalize_text(s: str) -> str: | |
| return re.sub(r"\s+", " ", s.strip()) | |
| def extract_transliterations(tree: ET.ElementTree) -> dict[str, dict[str, str]]: | |
| root = tree.getroot() | |
| if root is None: | |
| return {} | |
| translits: dict[str, dict[str, str]] = {} | |
| for tl in root.findall(".//itunes:transliteration", NS): | |
| # Get xml:lang attribute (without namespace) | |
| lang = tl.get("{http://www.w3.org/XML/1998/namespace}lang") | |
| if not lang: | |
| continue | |
| translit_basic: dict[str, str] = {} | |
| for t in tl.findall(".//itunes:text", NS): | |
| for_tag = t.get("for") | |
| txt = normalize_text("".join(t.itertext())) | |
| if for_tag and txt: | |
| translit_basic[for_tag] = txt | |
| if translit_basic: | |
| translits[lang] = translit_basic | |
| return translits | |
| def enforce_monotonic_centiseconds(lines: list[tuple[float, float, str]]) -> list[tuple[int, str]]: | |
| """ | |
| Convert begin times to centiseconds, enforce strictly increasing sequence | |
| (LRC players can behave badly with duplicate timestamps). | |
| Returns list of (centiseconds, text). | |
| """ | |
| result: list[tuple[int, str]] = [] | |
| last_cs = -1 | |
| for b, _e, text in lines: | |
| cs = round(b * 100) | |
| if cs <= last_cs: | |
| cs = last_cs + 1 | |
| result.append((cs, text)) | |
| last_cs = cs | |
| return result | |
| def format_lrc_time_cs(cs: int) -> str: | |
| mm = cs // 6000 | |
| ss = (cs % 6000) // 100 | |
| hs = cs % 100 | |
| return f"[{mm:02d}:{ss:02d}.{hs:02d}]" | |
| def build_tags(tree: ET.ElementTree, attributes: dict) -> list[str]: | |
| tags = [f"[ti:{attributes['name']}]", f"[ar:{attributes['artistName']}]"] | |
| root = tree.getroot() | |
| if root is None: | |
| raise ValueError("Empty TTML document") | |
| songwriters = root.find(".//itunes:songwriters", NS) | |
| if songwriters is not None: | |
| names = [] | |
| for sw in songwriters.findall(".//itunes:songwriter", NS): | |
| txt = normalize_text("".join(sw.itertext())) | |
| if txt: | |
| names.append(txt) | |
| if names: | |
| tags.append(f"[au:{' / '.join(names)}]") | |
| tags += ["[by:noaione-ttml-to-lrc]"] | |
| return tags | |
| def extract_grouped_lines(tree: ET.ElementTree, use_transliteration: bool = False) -> tuple[list[list[dict[str, Any]]], list[dict[str, Any]]]: | |
| """Extract lyric lines grouped by <div>. | |
| Returns (groups, flat) where each line dict has keys: b, e, text. | |
| Empty <p> are ignored. | |
| """ | |
| root = tree.getroot() | |
| if root is None: | |
| return [], [] | |
| transliterations = extract_transliterations(tree) | |
| body = root.find("tt:body", NS) | |
| if body is None: | |
| return [], [] | |
| first_tranlit = next(iter(transliterations.values()), {}) | |
| groups: list[list[dict[str, Any]]] = [] | |
| for div in body.findall("tt:div", NS): | |
| g: list[dict[str, Any]] = [] | |
| for p in div.findall("tt:p", NS): | |
| begin = p.get("begin") | |
| end = p.get("end") | |
| if not begin: | |
| continue | |
| try: | |
| b = parse_time(begin) | |
| e = parse_time(end) if end else b | |
| except ValueError: | |
| continue | |
| text = normalize_text("".join(p.itertext())) | |
| if not text: | |
| continue | |
| # get itunes:key attribute which contains the key mapping for transliteration | |
| itunes_key = p.get("{http://music.apple.com/lyric-ttml-internal}key") | |
| if use_transliteration and itunes_key and itunes_key in first_tranlit: | |
| text = first_tranlit[itunes_key] | |
| g.append({"b": b, "e": e, "text": text}) | |
| if g: | |
| groups.append(g) | |
| flat = [ln for grp in groups for ln in grp] | |
| return groups, flat | |
| def resolve_overlaps_dict(flat: list[dict[str, Any]]): | |
| prev_begin = -1.0 | |
| prev_end = -1.0 | |
| for ln in flat: | |
| b = ln["b"] | |
| e = ln["e"] | |
| needed_start = max(prev_begin + MIN_GAP_SEC, prev_end + MIN_GAP_SEC) | |
| if b < needed_start: | |
| b = needed_start | |
| if b > e: | |
| e = b + MIN_DURATION_SEC | |
| ln["b"], ln["e"] = b, e | |
| prev_begin = b | |
| prev_end = max(prev_end, e) | |
| def enforce_monotonic_cs_inplace(flat: list[dict[str, Any]]): | |
| last_cs = -1 | |
| for ln in flat: | |
| cs = round(ln["b"] * 100) | |
| if cs <= last_cs: | |
| cs = last_cs + 1 | |
| ln["b"] = cs / 100.0 | |
| last_cs = cs | |
| def ttml_to_lrc(ttml: str, attributes: dict, break_threshold: float = 3.0, use_transliteration: bool = False) -> str: | |
| """Convert Apple Music TTML to LRC string. | |
| - Resolves overlapping timestamps. | |
| - Ensures strictly increasing centisecond timestamps. | |
| - Inserts blank line between <div> groups if gap >= break_threshold seconds. | |
| (Set break_threshold <= 0 to disable.) | |
| """ | |
| try: | |
| tree = ET.ElementTree(ET.fromstring(ttml)) | |
| except ET.ParseError as e: | |
| raise ValueError(f"Failed to parse TTML: {e}") from e | |
| groups, flat = extract_grouped_lines(tree, use_transliteration) | |
| if not flat: | |
| return "" # No lyrics | |
| resolve_overlaps_dict(flat) | |
| enforce_monotonic_cs_inplace(flat) | |
| # Recompute group bounds after adjustments | |
| group_bounds: list[tuple[float, float]] = [] | |
| for grp in groups: | |
| group_bounds.append((grp[0]["b"], grp[-1]["e"])) | |
| tags = build_tags(tree, attributes) | |
| lines_out: list[str] = [] | |
| lines_out.extend(tags) | |
| for gi, grp in enumerate(groups): | |
| if gi > 0 and break_threshold > 0: | |
| prev_end = group_bounds[gi - 1][1] | |
| curr_start = group_bounds[gi][0] | |
| if (curr_start - prev_end) >= break_threshold: | |
| # Insert a timestamped empty line at the END time of previous group | |
| lines_out.append(f"{format_lrc_time_cs(round(prev_end * 100))}") | |
| for ln in grp: | |
| lines_out.append(f"{format_lrc_time_cs(round(ln['b'] * 100))}{ln['text']}") | |
| return "\n".join(lines_out) + "\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment