Skip to content

Instantly share code, notes, and snippets.

@fbonzon
Last active November 25, 2017 19:20
Show Gist options
  • Select an option

  • Save fbonzon/11333236 to your computer and use it in GitHub Desktop.

Select an option

Save fbonzon/11333236 to your computer and use it in GitHub Desktop.
Scrapes and returns a list of all genre IDs, aka 'geID' tag in iTunes Store media files.
#!/opt/local/bin/php71
<?php
// Usage:
// ./itunes_genre_ids_scraper.php > itunes_genre_ids.pl
// Ref: http://www.apple.com/itunes/affiliates/resources/documentation/genre-mapping.html
const GENRES_METADATA_URL =
'http://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/genres';
// Locale is U.S. English in UTF-8.
ini_set('default_charset', 'UTF-8');
setlocale(LC_ALL, 'en_US.UTF-8');
/**
* @param array $hierarchy Higher level parent category or genres.
*/
function parse_genres ($json, &$hierarchy, &$genres) {
array_push($hierarchy, $json->name);
$genres[$json->id] = array(
'id' => $json->id,
'name' => implode('|', $hierarchy),
'last_name' => $json->name,
'last_name_html' => str_replace('&amp;', '&', htmlentities($json->name)),
);
if (isset($json->subgenres)) {
foreach ($json->subgenres as $subgenre) {
parse_genres($subgenre, $hierarchy, $genres);
}
}
array_pop($hierarchy);
}
// Fetch iTunes genres metadata with cURL.
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, GENRES_METADATA_URL);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
# Enable all supported encoding types.
curl_setopt($curl, CURLOPT_ENCODING, '');
$json = curl_exec($curl);
curl_close($curl);
// Decode JSON string.
$json = json_decode($json);
// Loop through all genres, and store them in an array.
$hierarchy = array();
$genres = array();
foreach ($json as $category) {
parse_genres($category, $hierarchy, $genres);
}
// Sort by numerical genre ID.
ksort($genres);
// Output all genres in your preferred format. Here as Perl source code for
// ExifTool. If genre name has accented letters, remove accents and add original
// name as comment in HTML.
function perl_escape_string ($str) {
if (FALSE === strpos($str, "'")) {
# No single quote in input.
return sprintf("'%s'", $str);
}
if (FALSE === strpos($str, '"')) {
# No double quote in input.
return sprintf('"%s"', $str);
}
return sprintf("'%s'",
str_replace(array("\\", "'"), array("\\\\", "\\'"), $str));
}
foreach ($genres as $genre) {
if ($genre['last_name_html'] == $genre['last_name']) {
$genre['last_name_html'] = '';
} else {
$genre['name'] =
transliterator_transliterate('Any-Latin; Latin-ASCII', $genre['name']);
}
echo str_repeat(' ', 12) . sprintf("%d => %s,%s\n",
$genre['id'],
perl_escape_string($genre['name']),
empty($genre['last_name_html']) ?
'' :
' # (' . $genre['last_name_html'] . ')');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment