Skip to content

Instantly share code, notes, and snippets.

@royce002
Created September 18, 2025 13:54
Show Gist options
  • Select an option

  • Save royce002/11935ca64c50d3b7c5f6a32137b3222c to your computer and use it in GitHub Desktop.

Select an option

Save royce002/11935ca64c50d3b7c5f6a32137b3222c to your computer and use it in GitHub Desktop.
# WordPress Taxonomy Exporter This Python script parses a standard WordPress XML export file (`.xml`) to extract a complete, unique, and alphabetized list of all categories and post tags.
import xml.etree.ElementTree as ET
import os
def extract_categories_tags_from_wordpress_export(xml_file):
"""
Parses a WordPress XML export file to extract all unique categories and tags.
Args:
xml_file (str): The path to the WordPress XML export file.
Returns:
tuple: A tuple containing two lists: (categories, tags)
"""
categories = set()
tags = set()
try:
# Register the XML namespaces to properly parse the file
namespaces = {
'wp': 'http://wordpress.org/export/1.2/',
}
tree = ET.parse(xml_file)
root = tree.getroot()
# Find all category elements (domain="category") and extract the text content
for cat in root.findall('.//category[@domain="category"]'):
if cat.text:
categories.add(cat.text)
# Find all tag elements (domain="post_tag") and extract the text content
for tag in root.findall('.//category[@domain="post_tag"]'):
if tag.text:
tags.add(tag.text)
return sorted(list(categories)), sorted(list(tags))
except ET.ParseError as e:
print(f"Error parsing XML file: {e}")
return [], []
except FileNotFoundError:
print(f"Error: The file '{xml_file}' was not found.")
return [], []
if __name__ == "__main__":
# The XML file is in the parent directory
script_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(script_dir)
xml_file_path = os.path.join(parent_dir, 'WordPress.2025-09-17.xml')
print(f"Looking for XML file at: {xml_file_path}")
print(f"File exists: {os.path.exists(xml_file_path)}")
extracted_categories, extracted_tags = extract_categories_tags_from_wordpress_export(xml_file_path)
print(f"Found {len(extracted_categories)} categories and {len(extracted_tags)} tags")
# Use the script directory we already calculated
output_file = os.path.join(script_dir, 'blog_categories_tags_output.txt')
# Write output to text file
with open(output_file, 'w', encoding='utf-8') as f:
if extracted_categories:
f.write("### Categories ###\n")
for category in extracted_categories:
f.write(f"- {category}\n")
if extracted_tags:
f.write("\n### Tags ###\n")
for tag in extracted_tags:
f.write(f"- {tag}\n")
print(f"Output written to: {output_file}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment