Last active
November 25, 2017 19:20
-
-
Save fbonzon/11333236 to your computer and use it in GitHub Desktop.
Scrapes and returns a list of all genre IDs, aka 'geID' tag in iTunes Store media files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/opt/local/bin/php71 | |
| <?php | |
| // Usage: | |
| // ./itunes_genre_ids_scraper.php > itunes_genre_ids.pl | |
| // Ref: http://www.apple.com/itunes/affiliates/resources/documentation/genre-mapping.html | |
| const GENRES_METADATA_URL = | |
| 'http://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/genres'; | |
| // Locale is U.S. English in UTF-8. | |
| ini_set('default_charset', 'UTF-8'); | |
| setlocale(LC_ALL, 'en_US.UTF-8'); | |
| /** | |
| * @param array $hierarchy Higher level parent category or genres. | |
| */ | |
| function parse_genres ($json, &$hierarchy, &$genres) { | |
| array_push($hierarchy, $json->name); | |
| $genres[$json->id] = array( | |
| 'id' => $json->id, | |
| 'name' => implode('|', $hierarchy), | |
| 'last_name' => $json->name, | |
| 'last_name_html' => str_replace('&', '&', htmlentities($json->name)), | |
| ); | |
| if (isset($json->subgenres)) { | |
| foreach ($json->subgenres as $subgenre) { | |
| parse_genres($subgenre, $hierarchy, $genres); | |
| } | |
| } | |
| array_pop($hierarchy); | |
| } | |
| // Fetch iTunes genres metadata with cURL. | |
| $curl = curl_init(); | |
| curl_setopt($curl, CURLOPT_URL, GENRES_METADATA_URL); | |
| curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE); | |
| # Enable all supported encoding types. | |
| curl_setopt($curl, CURLOPT_ENCODING, ''); | |
| $json = curl_exec($curl); | |
| curl_close($curl); | |
| // Decode JSON string. | |
| $json = json_decode($json); | |
| // Loop through all genres, and store them in an array. | |
| $hierarchy = array(); | |
| $genres = array(); | |
| foreach ($json as $category) { | |
| parse_genres($category, $hierarchy, $genres); | |
| } | |
| // Sort by numerical genre ID. | |
| ksort($genres); | |
| // Output all genres in your preferred format. Here as Perl source code for | |
| // ExifTool. If genre name has accented letters, remove accents and add original | |
| // name as comment in HTML. | |
| function perl_escape_string ($str) { | |
| if (FALSE === strpos($str, "'")) { | |
| # No single quote in input. | |
| return sprintf("'%s'", $str); | |
| } | |
| if (FALSE === strpos($str, '"')) { | |
| # No double quote in input. | |
| return sprintf('"%s"', $str); | |
| } | |
| return sprintf("'%s'", | |
| str_replace(array("\\", "'"), array("\\\\", "\\'"), $str)); | |
| } | |
| foreach ($genres as $genre) { | |
| if ($genre['last_name_html'] == $genre['last_name']) { | |
| $genre['last_name_html'] = ''; | |
| } else { | |
| $genre['name'] = | |
| transliterator_transliterate('Any-Latin; Latin-ASCII', $genre['name']); | |
| } | |
| echo str_repeat(' ', 12) . sprintf("%d => %s,%s\n", | |
| $genre['id'], | |
| perl_escape_string($genre['name']), | |
| empty($genre['last_name_html']) ? | |
| '' : | |
| ' # (' . $genre['last_name_html'] . ')'); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment