noaione/ttml_parser.py

## ttml_parser.py
"""
Needs Python 3.10+

Usage:

```py
from ttml_parser import ttml_to_lrc

ttml_data = "<tt>...</tt>"

lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"});
# If you have transliterations and want to use them:
# lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"}, use_transliteration=True)

print(lrc_data)
```
"""


import re
import xml.etree.ElementTree as ET
from typing import Any

NS = {
    "tt": "http://www.w3.org/ns/ttml",
    "itunes": "http://music.apple.com/lyric-ttml-internal",
    "ttm": "http://www.w3.org/ns/ttml#metadata",
}

TIME_RE = re.compile(r"^(?:(\d+):)?(\d+)(?:\.(\d+))?$")

MIN_GAP_SEC = 0.01        # Minimal gap between consecutive lyric timestamps (seconds)
MIN_DURATION_SEC = 0.01   # Minimal duration after shifting (for internal adjustment)


def parse_time(t: str) -> float:
    m = TIME_RE.match(t.strip())
    if not m:
        raise ValueError(f"Unrecognized time format: {t}")
    minutes = int(m.group(1)) if m.group(1) else 0
    seconds = int(m.group(2))
    frac = m.group(3) or "0"
    if len(frac) == 1:
        frac_val = int(frac) / 10
    elif len(frac) == 2:
        frac_val = int(frac) / 100
    else:
        frac_val = int(frac[:3]) / 1000
    return minutes * 60 + seconds + frac_val


def normalize_text(s: str) -> str:
    return re.sub(r"\s+", " ", s.strip())


def extract_transliterations(tree: ET.ElementTree) -> dict[str, dict[str, str]]:
    root = tree.getroot()
    if root is None:
        return {}
    translits: dict[str, dict[str, str]] = {}
    for tl in root.findall(".//itunes:transliteration", NS):
        # Get xml:lang attribute (without namespace)
        lang = tl.get("{http://www.w3.org/XML/1998/namespace}lang")
        if not lang:
            continue
        translit_basic: dict[str, str] = {}
        for t in tl.findall(".//itunes:text", NS):
            for_tag = t.get("for")
            txt = normalize_text("".join(t.itertext()))
            if for_tag and txt:
                translit_basic[for_tag] = txt
        if translit_basic:
            translits[lang] = translit_basic
    return translits


def enforce_monotonic_centiseconds(lines: list[tuple[float, float, str]]) -> list[tuple[int, str]]:
    """
    Convert begin times to centiseconds, enforce strictly increasing sequence
    (LRC players can behave badly with duplicate timestamps).
    Returns list of (centiseconds, text).
    """
    result: list[tuple[int, str]] = []
    last_cs = -1
    for b, _e, text in lines:
        cs = round(b * 100)
        if cs <= last_cs:
            cs = last_cs + 1
        result.append((cs, text))
        last_cs = cs
    return result


def format_lrc_time_cs(cs: int) -> str:
    mm = cs // 6000
    ss = (cs % 6000) // 100
    hs = cs % 100
    return f"[{mm:02d}:{ss:02d}.{hs:02d}]"


def build_tags(tree: ET.ElementTree, attributes: dict) -> list[str]:
    tags = [f"[ti:{attributes['name']}]", f"[ar:{attributes['artistName']}]"]
    root = tree.getroot()
    if root is None:
        raise ValueError("Empty TTML document")
    songwriters = root.find(".//itunes:songwriters", NS)
    if songwriters is not None:
        names = []
        for sw in songwriters.findall(".//itunes:songwriter", NS):
            txt = normalize_text("".join(sw.itertext()))
            if txt:
                names.append(txt)
        if names:
            tags.append(f"[au:{' / '.join(names)}]")
    tags += ["[by:noaione-ttml-to-lrc]"]
    return tags


def extract_grouped_lines(tree: ET.ElementTree, use_transliteration: bool = False) -> tuple[list[list[dict[str, Any]]], list[dict[str, Any]]]:
    """Extract lyric lines grouped by <div>.

    Returns (groups, flat) where each line dict has keys: b, e, text.
    Empty <p> are ignored.
    """
    root = tree.getroot()
    if root is None:
        return [], []
    transliterations = extract_transliterations(tree)
    body = root.find("tt:body", NS)
    if body is None:
        return [], []
    first_tranlit = next(iter(transliterations.values()), {})
    groups: list[list[dict[str, Any]]] = []
    for div in body.findall("tt:div", NS):
        g: list[dict[str, Any]] = []
        for p in div.findall("tt:p", NS):
            begin = p.get("begin")
            end = p.get("end")
            if not begin:
                continue
            try:
                b = parse_time(begin)
                e = parse_time(end) if end else b
            except ValueError:
                continue
            text = normalize_text("".join(p.itertext()))
            if not text:
                continue
            # get itunes:key attribute which contains the key mapping for transliteration
            itunes_key = p.get("{http://music.apple.com/lyric-ttml-internal}key")
            if use_transliteration and itunes_key and itunes_key in first_tranlit:
                text = first_tranlit[itunes_key]
            g.append({"b": b, "e": e, "text": text})
        if g:
            groups.append(g)
    flat = [ln for grp in groups for ln in grp]
    return groups, flat


def resolve_overlaps_dict(flat: list[dict[str, Any]]):
    prev_begin = -1.0
    prev_end = -1.0
    for ln in flat:
        b = ln["b"]
        e = ln["e"]
        needed_start = max(prev_begin + MIN_GAP_SEC, prev_end + MIN_GAP_SEC)
        if b < needed_start:
            b = needed_start
            if b > e:
                e = b + MIN_DURATION_SEC
        ln["b"], ln["e"] = b, e
        prev_begin = b
        prev_end = max(prev_end, e)


def enforce_monotonic_cs_inplace(flat: list[dict[str, Any]]):
    last_cs = -1
    for ln in flat:
        cs = round(ln["b"] * 100)
        if cs <= last_cs:
            cs = last_cs + 1
            ln["b"] = cs / 100.0
        last_cs = cs


def ttml_to_lrc(ttml: str, attributes: dict, break_threshold: float = 3.0, use_transliteration: bool = False) -> str:
    """Convert Apple Music TTML to LRC string.

    - Resolves overlapping timestamps.
    - Ensures strictly increasing centisecond timestamps.
    - Inserts blank line between <div> groups if gap >= break_threshold seconds.
      (Set break_threshold <= 0 to disable.)
    """
    try:
        tree = ET.ElementTree(ET.fromstring(ttml))
    except ET.ParseError as e:
        raise ValueError(f"Failed to parse TTML: {e}") from e

    groups, flat = extract_grouped_lines(tree, use_transliteration)
    if not flat:
        return ""  # No lyrics

    resolve_overlaps_dict(flat)
    enforce_monotonic_cs_inplace(flat)

    # Recompute group bounds after adjustments
    group_bounds: list[tuple[float, float]] = []
    for grp in groups:
        group_bounds.append((grp[0]["b"], grp[-1]["e"]))

    tags = build_tags(tree, attributes)
    lines_out: list[str] = []
    lines_out.extend(tags)

    for gi, grp in enumerate(groups):
        if gi > 0 and break_threshold > 0:
            prev_end = group_bounds[gi - 1][1]
            curr_start = group_bounds[gi][0]
            if (curr_start - prev_end) >= break_threshold:
                # Insert a timestamped empty line at the END time of previous group
                lines_out.append(f"{format_lrc_time_cs(round(prev_end * 100))}")
        for ln in grp:
            lines_out.append(f"{format_lrc_time_cs(round(ln['b'] * 100))}{ln['text']}")

    return "\n".join(lines_out) + "\n"
	"""
	Needs Python 3.10+

	Usage:

	```py
	from ttml_parser import ttml_to_lrc

	ttml_data = "<tt>...</tt>"

	lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"});
	# If you have transliterations and want to use them:
	# lrc_data = ttml_to_lrc(ttml_data, {"name": "Song Title", "artistName": "Artist Name"}, use_transliteration=True)

	print(lrc_data)
	```
	"""


	import re
	import xml.etree.ElementTree as ET
	from typing import Any

	NS = {
	"tt": "http://www.w3.org/ns/ttml",
	"itunes": "http://music.apple.com/lyric-ttml-internal",
	"ttm": "http://www.w3.org/ns/ttml#metadata",
	}

	TIME_RE = re.compile(r"^(?:(\d+):)?(\d+)(?:\.(\d+))?$")

	MIN_GAP_SEC = 0.01 # Minimal gap between consecutive lyric timestamps (seconds)
	MIN_DURATION_SEC = 0.01 # Minimal duration after shifting (for internal adjustment)


	def parse_time(t: str) -> float:
	m = TIME_RE.match(t.strip())
	if not m:
	raise ValueError(f"Unrecognized time format: {t}")
	minutes = int(m.group(1)) if m.group(1) else 0
	seconds = int(m.group(2))
	frac = m.group(3) or "0"
	if len(frac) == 1:
	frac_val = int(frac) / 10
	elif len(frac) == 2:
	frac_val = int(frac) / 100
	else:
	frac_val = int(frac[:3]) / 1000
	return minutes * 60 + seconds + frac_val


	def normalize_text(s: str) -> str:
	return re.sub(r"\s+", " ", s.strip())


	def extract_transliterations(tree: ET.ElementTree) -> dict[str, dict[str, str]]:
	root = tree.getroot()
	if root is None:
	return {}
	translits: dict[str, dict[str, str]] = {}
	for tl in root.findall(".//itunes:transliteration", NS):
	# Get xml:lang attribute (without namespace)
	lang = tl.get("{http://www.w3.org/XML/1998/namespace}lang")
	if not lang:
	continue
	translit_basic: dict[str, str] = {}
	for t in tl.findall(".//itunes:text", NS):
	for_tag = t.get("for")
	txt = normalize_text("".join(t.itertext()))
	if for_tag and txt:
	translit_basic[for_tag] = txt
	if translit_basic:
	translits[lang] = translit_basic
	return translits


	def enforce_monotonic_centiseconds(lines: list[tuple[float, float, str]]) -> list[tuple[int, str]]:
	"""
	Convert begin times to centiseconds, enforce strictly increasing sequence
	(LRC players can behave badly with duplicate timestamps).
	Returns list of (centiseconds, text).
	"""
	result: list[tuple[int, str]] = []
	last_cs = -1
	for b, _e, text in lines:
	cs = round(b * 100)
	if cs <= last_cs:
	cs = last_cs + 1
	result.append((cs, text))
	last_cs = cs
	return result


	def format_lrc_time_cs(cs: int) -> str:
	mm = cs // 6000
	ss = (cs % 6000) // 100
	hs = cs % 100
	return f"[{mm:02d}:{ss:02d}.{hs:02d}]"


	def build_tags(tree: ET.ElementTree, attributes: dict) -> list[str]:
	tags = [f"[ti:{attributes['name']}]", f"[ar:{attributes['artistName']}]"]
	root = tree.getroot()
	if root is None:
	raise ValueError("Empty TTML document")
	songwriters = root.find(".//itunes:songwriters", NS)
	if songwriters is not None:
	names = []
	for sw in songwriters.findall(".//itunes:songwriter", NS):
	txt = normalize_text("".join(sw.itertext()))
	if txt:
	names.append(txt)
	if names:
	tags.append(f"[au:{' / '.join(names)}]")
	tags += ["[by:noaione-ttml-to-lrc]"]
	return tags


	def extract_grouped_lines(tree: ET.ElementTree, use_transliteration: bool = False) -> tuple[list[list[dict[str, Any]]], list[dict[str, Any]]]:
	"""Extract lyric lines grouped by <div>.

	Returns (groups, flat) where each line dict has keys: b, e, text.
	Empty <p> are ignored.
	"""
	root = tree.getroot()
	if root is None:
	return [], []
	transliterations = extract_transliterations(tree)
	body = root.find("tt:body", NS)
	if body is None:
	return [], []
	first_tranlit = next(iter(transliterations.values()), {})
	groups: list[list[dict[str, Any]]] = []
	for div in body.findall("tt:div", NS):
	g: list[dict[str, Any]] = []
	for p in div.findall("tt:p", NS):
	begin = p.get("begin")
	end = p.get("end")
	if not begin:
	continue
	try:
	b = parse_time(begin)
	e = parse_time(end) if end else b
	except ValueError:
	continue
	text = normalize_text("".join(p.itertext()))
	if not text:
	continue
	# get itunes:key attribute which contains the key mapping for transliteration
	itunes_key = p.get("{http://music.apple.com/lyric-ttml-internal}key")
	if use_transliteration and itunes_key and itunes_key in first_tranlit:
	text = first_tranlit[itunes_key]
	g.append({"b": b, "e": e, "text": text})
	if g:
	groups.append(g)
	flat = [ln for grp in groups for ln in grp]
	return groups, flat


	def resolve_overlaps_dict(flat: list[dict[str, Any]]):
	prev_begin = -1.0
	prev_end = -1.0
	for ln in flat:
	b = ln["b"]
	e = ln["e"]
	needed_start = max(prev_begin + MIN_GAP_SEC, prev_end + MIN_GAP_SEC)
	if b < needed_start:
	b = needed_start
	if b > e:
	e = b + MIN_DURATION_SEC
	ln["b"], ln["e"] = b, e
	prev_begin = b
	prev_end = max(prev_end, e)


	def enforce_monotonic_cs_inplace(flat: list[dict[str, Any]]):
	last_cs = -1
	for ln in flat:
	cs = round(ln["b"] * 100)
	if cs <= last_cs:
	cs = last_cs + 1
	ln["b"] = cs / 100.0
	last_cs = cs


	def ttml_to_lrc(ttml: str, attributes: dict, break_threshold: float = 3.0, use_transliteration: bool = False) -> str:
	"""Convert Apple Music TTML to LRC string.

	- Resolves overlapping timestamps.
	- Ensures strictly increasing centisecond timestamps.
	- Inserts blank line between <div> groups if gap >= break_threshold seconds.
	(Set break_threshold <= 0 to disable.)
	"""
	try:
	tree = ET.ElementTree(ET.fromstring(ttml))
	except ET.ParseError as e:
	raise ValueError(f"Failed to parse TTML: {e}") from e

	groups, flat = extract_grouped_lines(tree, use_transliteration)
	if not flat:
	return "" # No lyrics

	resolve_overlaps_dict(flat)
	enforce_monotonic_cs_inplace(flat)

	# Recompute group bounds after adjustments
	group_bounds: list[tuple[float, float]] = []
	for grp in groups:
	group_bounds.append((grp[0]["b"], grp[-1]["e"]))

	tags = build_tags(tree, attributes)
	lines_out: list[str] = []
	lines_out.extend(tags)

	for gi, grp in enumerate(groups):
	if gi > 0 and break_threshold > 0:
	prev_end = group_bounds[gi - 1][1]
	curr_start = group_bounds[gi][0]
	if (curr_start - prev_end) >= break_threshold:
	# Insert a timestamped empty line at the END time of previous group
	lines_out.append(f"{format_lrc_time_cs(round(prev_end * 100))}")
	for ln in grp:
	lines_out.append(f"{format_lrc_time_cs(round(ln['b'] * 100))}{ln['text']}")

	return "\n".join(lines_out) + "\n"
No results found