Skip to content

Instantly share code, notes, and snippets.

@noaione
Last active July 20, 2025 15:19
Show Gist options
  • Select an option

  • Save noaione/2bc57ff5e94cd671b84e00497406adfc to your computer and use it in GitHub Desktop.

Select an option

Save noaione/2bc57ff5e94cd671b84e00497406adfc to your computer and use it in GitHub Desktop.
retag a collection of m4a files in a folders to JP storefront version
"""
Quickly retag multiple m4a files in folders from Apple Music
This is mainly used since my storefront is not JP and I want the original JP metadata
and not translated version of it.
Need to be installed:
- orjson
- requests
- mutagen
Usage:
- python3 ./amp-retag-jp.py
Folder structure example:
| amp-retag-jp.py
∟ [YYYY-MM-DD] Album Name 1
∟ 01. Track.m4a
∟ 02. Track.m4a
∟ [YYYY-MM-DD] Album Name 2
∟ 01. Track.m4a
∟ 02. Track.m4a
∟ [YYYY-MM-DD] Album Name 2
∟ 01. Track.m4a
∟ 02. Track.m4a
∟ .cache-amp
∟ 123456789.json
"""
import re
from pathlib import Path
import orjson
import requests
from mutagen.mp4 import MP4, MP4FreeForm
TOKEN = "[REDACTED]"
STOREFRONT = "jp"
ALBUM_HEAD = "----:com.apple.iTunes:ITUNESALBUMID"
ALBUM_HEAD_ALT = "plID"
TRACK_HEAD = "cnID"
CURRENT_DIR = Path(__file__).resolve().parent
CACHE_DATA = CURRENT_DIR / ".cache-amp"
CACHE_DATA.mkdir(exist_ok=True)
session = requests.Session()
session.headers.update({
"Origin": "https://music.apple.com",
"Referer": "https://music.apple.com/",
"Authorization": f"Bearer {TOKEN}",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:141.0) Gecko/20100101 Firefox/141.0"
})
def secure_filename(filename: str) -> str:
# Make sure the filename is valid string in Windows
invalid_chars = '<>:"/\\|?*'
for char in invalid_chars:
filename = filename.replace(char, "_")
# Remove leading and trailing whitespace
filename = filename.strip()
return filename
def strip_cv(artist_name: str) -> str:
regex_pattern = r"\s*\(CV[:\.].*?\)\s*"
# Remove the CV part from the artist name
artist_name = re.sub(regex_pattern, "", artist_name)
return artist_name.strip()
def split_artist_comma(artist: str) -> list[str]:
# Split by comma and handle cases with &
artists = []
for part in artist.split(","):
part = part.strip()
if not part:
continue
artists.append(part)
for art in artists:
if " & " in art:
sub_artists = art.split(" & ")
for sub_artist in sub_artists:
sub_artist = sub_artist.strip()
if sub_artist:
artists.append(sub_artist)
artists.remove(art)
return artists
def strip_album_name(album_name: str) -> str:
# Remove any mention of EP, Single, or similar terms
terms = [
"EP", "Single"
]
for term in terms:
if album_name.endswith(" - " + term):
album_name = album_name[:-(len(term) + 3)].strip()
return album_name.strip()
def fetch_album_info(album_id: str) -> dict:
cached_data = CACHE_DATA / f"{album_id}.json"
if cached_data.exists():
return orjson.loads(cached_data.read_bytes())["data"][0]
url = f"https://amp-api.music.apple.com/v1/catalog/{STOREFRONT}/albums/{album_id}"
params = {
"include": "artists",
}
response = session.get(url, params=params)
response.raise_for_status()
result = response.json()
cached_data.write_bytes(orjson.dumps(result, option=orjson.OPT_INDENT_2))
return result["data"][0]
def find_track_from_album(track_id: str, track_lists: list[dict]) -> dict:
track_id = str(track_id).strip()
for track in track_lists:
if str(track["id"]) == track_id:
return track
raise ValueError(f"Track ID {track_id} not found in album track list.")
def split_artist_names(artist_name: str) -> list[str]:
artists = []
split_artist = split_artist_comma(artist_name)
for artist in split_artist:
artist = artist.strip()
if not artist:
continue
# Remove the CV part
artist = strip_cv(artist)
if not artist:
continue
artists.append(artist)
return artists
for folder_path in CURRENT_DIR.iterdir():
if not folder_path.is_dir() or folder_path.name == ".cache-amp":
continue
print(f"Processing album: {folder_path.name}")
for song_file in folder_path.glob("*.m4a"):
# read data
audio = MP4(song_file)
album_id: str | None = None
if album_id_first := audio.get(ALBUM_HEAD):
album_id = album_id_first[0].decode("utf-8")
if not album_id and (album_id_alt := audio.get(ALBUM_HEAD_ALT)):
album_id = album_id_alt[0]
if not album_id:
raise ValueError(f"Album ID not found in {song_file.name}")
album_info = fetch_album_info(album_id)
track_lists = album_info["relationships"]["tracks"]["data"]
track_info = find_track_from_album(audio[TRACK_HEAD][0], track_lists)
artist_names = split_artist_names(track_info["attributes"]["artistName"])
album_artist_names = split_artist_names(album_info["attributes"]["artistName"])
composer_names = split_artist_comma(track_info["attributes"]["composerName"])
album_name = strip_album_name(album_info["attributes"]["name"])
release_date = album_info["attributes"]["releaseDate"]
track_no = track_info["attributes"]["trackNumber"]
track_name = track_info["attributes"]["name"]
# Artist
audio["\xa9ART"] = artist_names
audio["soar"] = artist_names # Sorted version
# Album Artist
audio["aART"] = album_artist_names
audio["soaa"] = album_artist_names # Sorted version
# Performer
audio["\xa9prf"] = artist_names
audio["----:com.apple.iTunes:PERFORMER"] = [MP4FreeForm(artist.encode("utf-8")) for artist in artist_names]
# Composer
audio["\xa9wrt"] = composer_names # Composer
audio["soco"] = composer_names # Sorted version
# Album
audio["\xa9alb"] = album_name
audio["soal"] = album_name # Sorted version
# Release date
audio["\xa9day"] = release_date # Release date
# Track name
audio["\xa9nam"] = track_name
audio["sonm"] = track_name # Sorted version
# save
audio.save()
print(f" Updated metadata for {song_file.name}")
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment