Last active
July 20, 2025 15:19
-
-
Save noaione/2bc57ff5e94cd671b84e00497406adfc to your computer and use it in GitHub Desktop.
retag a collection of m4a files in a folders to JP storefront version
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Quickly retag multiple m4a files in folders from Apple Music | |
| This is mainly used since my storefront is not JP and I want the original JP metadata | |
| and not translated version of it. | |
| Need to be installed: | |
| - orjson | |
| - requests | |
| - mutagen | |
| Usage: | |
| - python3 ./amp-retag-jp.py | |
| Folder structure example: | |
| | amp-retag-jp.py | |
| ∟ [YYYY-MM-DD] Album Name 1 | |
| ∟ 01. Track.m4a | |
| ∟ 02. Track.m4a | |
| ∟ [YYYY-MM-DD] Album Name 2 | |
| ∟ 01. Track.m4a | |
| ∟ 02. Track.m4a | |
| ∟ [YYYY-MM-DD] Album Name 2 | |
| ∟ 01. Track.m4a | |
| ∟ 02. Track.m4a | |
| ∟ .cache-amp | |
| ∟ 123456789.json | |
| """ | |
| import re | |
| from pathlib import Path | |
| import orjson | |
| import requests | |
| from mutagen.mp4 import MP4, MP4FreeForm | |
| TOKEN = "[REDACTED]" | |
| STOREFRONT = "jp" | |
| ALBUM_HEAD = "----:com.apple.iTunes:ITUNESALBUMID" | |
| ALBUM_HEAD_ALT = "plID" | |
| TRACK_HEAD = "cnID" | |
| CURRENT_DIR = Path(__file__).resolve().parent | |
| CACHE_DATA = CURRENT_DIR / ".cache-amp" | |
| CACHE_DATA.mkdir(exist_ok=True) | |
| session = requests.Session() | |
| session.headers.update({ | |
| "Origin": "https://music.apple.com", | |
| "Referer": "https://music.apple.com/", | |
| "Authorization": f"Bearer {TOKEN}", | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0" | |
| }) | |
| def secure_filename(filename: str) -> str: | |
| # Make sure the filename is valid string in Windows | |
| invalid_chars = '<>:"/\\|?*' | |
| for char in invalid_chars: | |
| filename = filename.replace(char, "_") | |
| # Remove leading and trailing whitespace | |
| filename = filename.strip() | |
| return filename | |
| def strip_cv(artist_name: str) -> str: | |
| regex_pattern = r"\s*\(CV[:\.].*?\)\s*" | |
| # Remove the CV part from the artist name | |
| artist_name = re.sub(regex_pattern, "", artist_name) | |
| return artist_name.strip() | |
| def split_artist_comma(artist: str) -> list[str]: | |
| # Split by comma and handle cases with & | |
| artists = [] | |
| for part in artist.split(","): | |
| part = part.strip() | |
| if not part: | |
| continue | |
| artists.append(part) | |
| for art in artists: | |
| if " & " in art: | |
| sub_artists = art.split(" & ") | |
| for sub_artist in sub_artists: | |
| sub_artist = sub_artist.strip() | |
| if sub_artist: | |
| artists.append(sub_artist) | |
| artists.remove(art) | |
| return artists | |
| def strip_album_name(album_name: str) -> str: | |
| # Remove any mention of EP, Single, or similar terms | |
| terms = [ | |
| "EP", "Single" | |
| ] | |
| for term in terms: | |
| if album_name.endswith(" - " + term): | |
| album_name = album_name[:-(len(term) + 3)].strip() | |
| return album_name.strip() | |
| def fetch_album_info(album_id: str) -> dict: | |
| cached_data = CACHE_DATA / f"{album_id}.json" | |
| if cached_data.exists(): | |
| return orjson.loads(cached_data.read_bytes())["data"][0] | |
| url = f"https://amp-api.music.apple.com/v1/catalog/{STOREFRONT}/albums/{album_id}" | |
| params = { | |
| "include": "artists", | |
| } | |
| response = session.get(url, params=params) | |
| response.raise_for_status() | |
| result = response.json() | |
| cached_data.write_bytes(orjson.dumps(result, option=orjson.OPT_INDENT_2)) | |
| return result["data"][0] | |
| def find_track_from_album(track_id: str, track_lists: list[dict]) -> dict: | |
| track_id = str(track_id).strip() | |
| for track in track_lists: | |
| if str(track["id"]) == track_id: | |
| return track | |
| raise ValueError(f"Track ID {track_id} not found in album track list.") | |
| def split_artist_names(artist_name: str) -> list[str]: | |
| artists = [] | |
| split_artist = split_artist_comma(artist_name) | |
| for artist in split_artist: | |
| artist = artist.strip() | |
| if not artist: | |
| continue | |
| # Remove the CV part | |
| artist = strip_cv(artist) | |
| if not artist: | |
| continue | |
| artists.append(artist) | |
| return artists | |
| for folder_path in CURRENT_DIR.iterdir(): | |
| if not folder_path.is_dir() or folder_path.name == ".cache-amp": | |
| continue | |
| print(f"Processing album: {folder_path.name}") | |
| for song_file in folder_path.glob("*.m4a"): | |
| # read data | |
| audio = MP4(song_file) | |
| album_id: str | None = None | |
| if album_id_first := audio.get(ALBUM_HEAD): | |
| album_id = album_id_first[0].decode("utf-8") | |
| if not album_id and (album_id_alt := audio.get(ALBUM_HEAD_ALT)): | |
| album_id = album_id_alt[0] | |
| if not album_id: | |
| raise ValueError(f"Album ID not found in {song_file.name}") | |
| album_info = fetch_album_info(album_id) | |
| track_lists = album_info["relationships"]["tracks"]["data"] | |
| track_info = find_track_from_album(audio[TRACK_HEAD][0], track_lists) | |
| artist_names = split_artist_names(track_info["attributes"]["artistName"]) | |
| album_artist_names = split_artist_names(album_info["attributes"]["artistName"]) | |
| composer_names = split_artist_comma(track_info["attributes"]["composerName"]) | |
| album_name = strip_album_name(album_info["attributes"]["name"]) | |
| release_date = album_info["attributes"]["releaseDate"] | |
| track_no = track_info["attributes"]["trackNumber"] | |
| track_name = track_info["attributes"]["name"] | |
| # Artist | |
| audio["\xa9ART"] = artist_names | |
| audio["soar"] = artist_names # Sorted version | |
| # Album Artist | |
| audio["aART"] = album_artist_names | |
| audio["soaa"] = album_artist_names # Sorted version | |
| # Performer | |
| audio["\xa9prf"] = artist_names | |
| audio["----:com.apple.iTunes:PERFORMER"] = [MP4FreeForm(artist.encode("utf-8")) for artist in artist_names] | |
| # Composer | |
| audio["\xa9wrt"] = composer_names # Composer | |
| audio["soco"] = composer_names # Sorted version | |
| # Album | |
| audio["\xa9alb"] = album_name | |
| audio["soal"] = album_name # Sorted version | |
| # Release date | |
| audio["\xa9day"] = release_date # Release date | |
| # Track name | |
| audio["\xa9nam"] = track_name | |
| audio["sonm"] = track_name # Sorted version | |
| # save | |
| audio.save() | |
| print(f" Updated metadata for {song_file.name}") | |
| print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment