Last active
September 16, 2025 12:27
-
-
Save ILPlais/18d143807cfc0247b4639b612cdf6618 to your computer and use it in GitHub Desktop.
Decodes HTML entities (both hexadecimal and decimal) in filenames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| import html | |
| def decode_html_entities(text): | |
| """ | |
| Decodes hexadecimal and decimal HTML entities in text | |
| """ | |
| # Decode standard HTML entities (like &, <, etc.) | |
| text = html.unescape(text) | |
| # Specifically decode hexadecimal entities (&#xXX; or &#xXXXX;) | |
| def replace_hex_entity(match): | |
| hex_value = match.group(1) | |
| try: | |
| char_code = int(hex_value, 16) | |
| return chr(char_code) | |
| except (ValueError, OverflowError): | |
| return match.group(0) # Return original if unable to decode | |
| text = re.sub(r'&#x([0-9a-fA-F]+);', replace_hex_entity, text) | |
| # Also decode decimal entities ({) | |
| def replace_dec_entity(match): | |
| dec_value = match.group(1) | |
| try: | |
| char_code = int(dec_value) | |
| return chr(char_code) | |
| except (ValueError, OverflowError): | |
| return match.group(0) # Return original if unable to decode | |
| text = re.sub(r'&#([0-9]+);', replace_dec_entity, text) | |
| return text | |
| def rename_files_with_html_entities(directory_path, dry_run = True): | |
| """ | |
| Renames files containing escaped HTML entities | |
| Args: | |
| directory_path: Path to the directory to process | |
| dry_run: If True, only displays what would be done without renaming | |
| """ | |
| if not os.path.exists(directory_path): | |
| print(f"Error: Directory '{directory_path}' does not exist.") | |
| return | |
| renamed_count = 0 | |
| for filename in os.listdir(directory_path): | |
| old_path = os.path.join(directory_path, filename) | |
| # Skip directories | |
| if os.path.isdir(old_path): | |
| continue | |
| # Decode HTML entities in the name | |
| new_filename = decode_html_entities(filename) | |
| # If the name hasn't changed, move to next | |
| if new_filename == filename: | |
| continue | |
| new_path = os.path.join(directory_path, new_filename) | |
| # Check if the new name already exists | |
| if os.path.exists(new_path): | |
| print(f"β οΈ Conflict: '{new_filename}' already exists. File skipped: '{filename}'") | |
| continue | |
| if dry_run: | |
| print(f"π Would be renamed:") | |
| print(f" Old: {filename}") | |
| print(f" New: {new_filename}") | |
| else: | |
| try: | |
| os.rename(old_path, new_path) | |
| print(f"β Renamed:") | |
| print(f" {filename} -> {new_filename}") | |
| renamed_count += 1 | |
| except OSError as e: | |
| print(f"β Error renaming '{filename}': {e}") | |
| print() # Empty line for readability | |
| if dry_run: | |
| print("π Simulation mode completed. Use dry_run = False to make the changes.") | |
| else: | |
| print(f"β {renamed_count} file(s) renamed successfully.") | |
| def main(): | |
| # Usage example | |
| directory = input("Enter the directory path to process (or '.' for current directory): ").strip() | |
| if not directory: | |
| directory = "." | |
| print(f"\nAnalyzing directory: {os.path.abspath(directory)}") | |
| print("=" * 50) | |
| # First run in simulation mode | |
| print("SIMULATION MODE - No files will be modified") | |
| print("-" * 50) | |
| rename_files_with_html_entities(directory, dry_run = True) | |
| # Ask for confirmation for actual execution | |
| response = input("\nDo you want to proceed with actual renaming? (y/n): ").strip().lower() | |
| if response in ['y', 'yes']: | |
| print("\nREAL MODE - Renaming files") | |
| print("-" * 26) | |
| rename_files_with_html_entities(directory, dry_run = False) | |
| else: | |
| print("Operation cancelled.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment