Skip to content

Instantly share code, notes, and snippets.

@ILPlais
Last active September 16, 2025 12:27
Show Gist options
  • Select an option

  • Save ILPlais/18d143807cfc0247b4639b612cdf6618 to your computer and use it in GitHub Desktop.

Select an option

Save ILPlais/18d143807cfc0247b4639b612cdf6618 to your computer and use it in GitHub Desktop.
Decodes HTML entities (both hexadecimal and decimal) in filenames
import os
import re
import html
def decode_html_entities(text):
"""
Decodes hexadecimal and decimal HTML entities in text
"""
# Decode standard HTML entities (like &, <, etc.)
text = html.unescape(text)
# Specifically decode hexadecimal entities (&#xXX; or &#xXXXX;)
def replace_hex_entity(match):
hex_value = match.group(1)
try:
char_code = int(hex_value, 16)
return chr(char_code)
except (ValueError, OverflowError):
return match.group(0) # Return original if unable to decode
text = re.sub(r'&#x([0-9a-fA-F]+);', replace_hex_entity, text)
# Also decode decimal entities ({)
def replace_dec_entity(match):
dec_value = match.group(1)
try:
char_code = int(dec_value)
return chr(char_code)
except (ValueError, OverflowError):
return match.group(0) # Return original if unable to decode
text = re.sub(r'&#([0-9]+);', replace_dec_entity, text)
return text
def rename_files_with_html_entities(directory_path, dry_run = True):
"""
Renames files containing escaped HTML entities
Args:
directory_path: Path to the directory to process
dry_run: If True, only displays what would be done without renaming
"""
if not os.path.exists(directory_path):
print(f"Error: Directory '{directory_path}' does not exist.")
return
renamed_count = 0
for filename in os.listdir(directory_path):
old_path = os.path.join(directory_path, filename)
# Skip directories
if os.path.isdir(old_path):
continue
# Decode HTML entities in the name
new_filename = decode_html_entities(filename)
# If the name hasn't changed, move to next
if new_filename == filename:
continue
new_path = os.path.join(directory_path, new_filename)
# Check if the new name already exists
if os.path.exists(new_path):
print(f"⚠️ Conflict: '{new_filename}' already exists. File skipped: '{filename}'")
continue
if dry_run:
print(f"πŸ“ Would be renamed:")
print(f" Old: {filename}")
print(f" New: {new_filename}")
else:
try:
os.rename(old_path, new_path)
print(f"βœ… Renamed:")
print(f" {filename} -> {new_filename}")
renamed_count += 1
except OSError as e:
print(f"❌ Error renaming '{filename}': {e}")
print() # Empty line for readability
if dry_run:
print("πŸ” Simulation mode completed. Use dry_run = False to make the changes.")
else:
print(f"βœ… {renamed_count} file(s) renamed successfully.")
def main():
# Usage example
directory = input("Enter the directory path to process (or '.' for current directory): ").strip()
if not directory:
directory = "."
print(f"\nAnalyzing directory: {os.path.abspath(directory)}")
print("=" * 50)
# First run in simulation mode
print("SIMULATION MODE - No files will be modified")
print("-" * 50)
rename_files_with_html_entities(directory, dry_run = True)
# Ask for confirmation for actual execution
response = input("\nDo you want to proceed with actual renaming? (y/n): ").strip().lower()
if response in ['y', 'yes']:
print("\nREAL MODE - Renaming files")
print("-" * 26)
rename_files_with_html_entities(directory, dry_run = False)
else:
print("Operation cancelled.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment