Skip to content

Instantly share code, notes, and snippets.

@noaione
Created September 15, 2025 04:16
Show Gist options
  • Select an option

  • Save noaione/633d002db22891c6c818389c275fc961 to your computer and use it in GitHub Desktop.

Select an option

Save noaione/633d002db22891c6c818389c275fc961 to your computer and use it in GitHub Desktop.
quick and simple ttml to lrc parser in python, with transliteration replacement support
"""
Needs Python 3.10+
Usage:
```py
from ttml_parser import ttml_to_lrc
ttml_data = "<tt>...</tt>"
lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"});
# If you have transliterations and want to use them:
# lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"}, use_transliteration=True)
print(lrc_data)
```
"""
import re
import xml.etree.ElementTree as ET
from typing import Any
NS = {
"tt": "http://www.w3.org/ns/ttml",
"itunes": "http://music.apple.com/lyric-ttml-internal",
"ttm": "http://www.w3.org/ns/ttml#metadata",
}
TIME_RE = re.compile(r"^(?:(\d+):)?(\d+)(?:\.(\d+))?$")
MIN_GAP_SEC = 0.01 # Minimal gap between consecutive lyric timestamps (seconds)
MIN_DURATION_SEC = 0.01 # Minimal duration after shifting (for internal adjustment)
def parse_time(t: str) -> float:
m = TIME_RE.match(t.strip())
if not m:
raise ValueError(f"Unrecognized time format: {t}")
minutes = int(m.group(1)) if m.group(1) else 0
seconds = int(m.group(2))
frac = m.group(3) or "0"
if len(frac) == 1:
frac_val = int(frac) / 10
elif len(frac) == 2:
frac_val = int(frac) / 100
else:
frac_val = int(frac[:3]) / 1000
return minutes * 60 + seconds + frac_val
def normalize_text(s: str) -> str:
return re.sub(r"\s+", " ", s.strip())
def extract_transliterations(tree: ET.ElementTree) -> dict[str, dict[str, str]]:
root = tree.getroot()
if root is None:
return {}
translits: dict[str, dict[str, str]] = {}
for tl in root.findall(".//itunes:transliteration", NS):
# Get xml:lang attribute (without namespace)
lang = tl.get("{http://www.w3.org/XML/1998/namespace}lang")
if not lang:
continue
translit_basic: dict[str, str] = {}
for t in tl.findall(".//itunes:text", NS):
for_tag = t.get("for")
txt = normalize_text("".join(t.itertext()))
if for_tag and txt:
translit_basic[for_tag] = txt
if translit_basic:
translits[lang] = translit_basic
return translits
def enforce_monotonic_centiseconds(lines: list[tuple[float, float, str]]) -> list[tuple[int, str]]:
"""
Convert begin times to centiseconds, enforce strictly increasing sequence
(LRC players can behave badly with duplicate timestamps).
Returns list of (centiseconds, text).
"""
result: list[tuple[int, str]] = []
last_cs = -1
for b, _e, text in lines:
cs = round(b * 100)
if cs <= last_cs:
cs = last_cs + 1
result.append((cs, text))
last_cs = cs
return result
def format_lrc_time_cs(cs: int) -> str:
mm = cs // 6000
ss = (cs % 6000) // 100
hs = cs % 100
return f"[{mm:02d}:{ss:02d}.{hs:02d}]"
def build_tags(tree: ET.ElementTree, attributes: dict) -> list[str]:
tags = [f"[ti:{attributes['name']}]", f"[ar:{attributes['artistName']}]"]
root = tree.getroot()
if root is None:
raise ValueError("Empty TTML document")
songwriters = root.find(".//itunes:songwriters", NS)
if songwriters is not None:
names = []
for sw in songwriters.findall(".//itunes:songwriter", NS):
txt = normalize_text("".join(sw.itertext()))
if txt:
names.append(txt)
if names:
tags.append(f"[au:{' / '.join(names)}]")
tags += ["[by:noaione-ttml-to-lrc]"]
return tags
def extract_grouped_lines(tree: ET.ElementTree, use_transliteration: bool = False) -> tuple[list[list[dict[str, Any]]], list[dict[str, Any]]]:
"""Extract lyric lines grouped by <div>.
Returns (groups, flat) where each line dict has keys: b, e, text.
Empty <p> are ignored.
"""
root = tree.getroot()
if root is None:
return [], []
transliterations = extract_transliterations(tree)
body = root.find("tt:body", NS)
if body is None:
return [], []
first_tranlit = next(iter(transliterations.values()), {})
groups: list[list[dict[str, Any]]] = []
for div in body.findall("tt:div", NS):
g: list[dict[str, Any]] = []
for p in div.findall("tt:p", NS):
begin = p.get("begin")
end = p.get("end")
if not begin:
continue
try:
b = parse_time(begin)
e = parse_time(end) if end else b
except ValueError:
continue
text = normalize_text("".join(p.itertext()))
if not text:
continue
# get itunes:key attribute which contains the key mapping for transliteration
itunes_key = p.get("{http://music.apple.com/lyric-ttml-internal}key")
if use_transliteration and itunes_key and itunes_key in first_tranlit:
text = first_tranlit[itunes_key]
g.append({"b": b, "e": e, "text": text})
if g:
groups.append(g)
flat = [ln for grp in groups for ln in grp]
return groups, flat
def resolve_overlaps_dict(flat: list[dict[str, Any]]):
prev_begin = -1.0
prev_end = -1.0
for ln in flat:
b = ln["b"]
e = ln["e"]
needed_start = max(prev_begin + MIN_GAP_SEC, prev_end + MIN_GAP_SEC)
if b < needed_start:
b = needed_start
if b > e:
e = b + MIN_DURATION_SEC
ln["b"], ln["e"] = b, e
prev_begin = b
prev_end = max(prev_end, e)
def enforce_monotonic_cs_inplace(flat: list[dict[str, Any]]):
last_cs = -1
for ln in flat:
cs = round(ln["b"] * 100)
if cs <= last_cs:
cs = last_cs + 1
ln["b"] = cs / 100.0
last_cs = cs
def ttml_to_lrc(ttml: str, attributes: dict, break_threshold: float = 3.0, use_transliteration: bool = False) -> str:
"""Convert Apple Music TTML to LRC string.
- Resolves overlapping timestamps.
- Ensures strictly increasing centisecond timestamps.
- Inserts blank line between <div> groups if gap >= break_threshold seconds.
(Set break_threshold <= 0 to disable.)
"""
try:
tree = ET.ElementTree(ET.fromstring(ttml))
except ET.ParseError as e:
raise ValueError(f"Failed to parse TTML: {e}") from e
groups, flat = extract_grouped_lines(tree, use_transliteration)
if not flat:
return "" # No lyrics
resolve_overlaps_dict(flat)
enforce_monotonic_cs_inplace(flat)
# Recompute group bounds after adjustments
group_bounds: list[tuple[float, float]] = []
for grp in groups:
group_bounds.append((grp[0]["b"], grp[-1]["e"]))
tags = build_tags(tree, attributes)
lines_out: list[str] = []
lines_out.extend(tags)
for gi, grp in enumerate(groups):
if gi > 0 and break_threshold > 0:
prev_end = group_bounds[gi - 1][1]
curr_start = group_bounds[gi][0]
if (curr_start - prev_end) >= break_threshold:
# Insert a timestamped empty line at the END time of previous group
lines_out.append(f"{format_lrc_time_cs(round(prev_end * 100))}")
for ln in grp:
lines_out.append(f"{format_lrc_time_cs(round(ln['b'] * 100))}{ln['text']}")
return "\n".join(lines_out) + "\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment