Created
May 25, 2025 05:19
-
-
Save wassupluke/1ea8d0b52dcbe768ea634fe82cbf8d07 to your computer and use it in GitHub Desktop.
XML String Translation and File Comparison, powered by the googletrans api
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Compares existing strings.xml file with another xml file.* | |
| The script expects exactly two arguments, existing_file_path, and | |
| new_file_path, respetively. | |
| e.g., `python3 compare-translations.py strings-fr.xml more-strings-fr.xml` | |
| Any strings already found to be translated in the existing file will be | |
| removed from the new file, and the new file will be appended to the | |
| existing file. | |
| *This automated process will leave a chunk of, "</resources>\n<resources>", | |
| in the middle of the file at the point where the new content was appended. | |
| This is intential and forces the user to personally review the output file | |
| for appropriateness and the user must manually remove the aforementioned | |
| text chunk from the file else it may break whatever depends on that file. | |
| """ | |
| import sys | |
| import re | |
| if len(sys.argv) == 3: | |
| _, original, new = sys.argv | |
| else: | |
| sys.exit(1) | |
| with open(original, "r") as f: | |
| original_content = f.read() | |
| with open(f"{original}.bak", "w") as f: | |
| f.write(original_content) | |
| with open(new, "r") as f: | |
| new_content = f.read() | |
| pattern = r'<string name="([^"]+)">(?!<!\[CDATA\[)(.*?)</string>' | |
| matches_original = re.findall(pattern, original_content, re.DOTALL) | |
| matches_new = re.findall(pattern, new_content, re.DOTALL) | |
| original_content = {name: content for name, content in matches_original} | |
| new_content = {name: content for name, content in matches_new} | |
| for key in original_content.keys(): | |
| try: | |
| new_content.pop(key) | |
| except: | |
| pass | |
| output = ['<resources>\n'] | |
| for name, content in new_content.items(): | |
| output.append(f' <string name="{name}">{content}</string>\n') | |
| output.append('</resources>\n') | |
| with open(original, "a") as f: | |
| f.writelines(output) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Translates xml content to a desired language. | |
| Script expects exactly three arguments: | |
| input file path, output file path, and output language, respectively. | |
| The output language shall be the two-letter code for that language. | |
| e.g., `python3 translator3.py strings.xml strings-fr.xml fr` | |
| https://pypi.org/project/googletrans/ | |
| pip install googletrans==3.1.0a0 # Use a specific version for stability | |
| """ | |
| import asyncio | |
| import re | |
| import sys | |
| import os | |
| from googletrans import Translator | |
| def get_args(): | |
| """Parse command-line arguments or get user input.""" | |
| if len(sys.argv) == 4: | |
| print(f"Arguments received: {sys.argv[1:]}") | |
| return sys.argv[1], sys.argv[2], sys.argv[3].lower() | |
| elif len(sys.argv) > 1: | |
| print(f"Error: Expected exactly three arguments, got {len(sys.argv) - 1}.\n" | |
| "Usage: python translator2.py input_file_path output_file_path target_language\n" | |
| "Example: python translator2.py input.xml output.xml en") | |
| sys.exit(1) | |
| else: | |
| input_file_path = input("Enter the full path of the file to be translated: ").strip() | |
| output_file_path = input("Enter the full path for the desired output file: ").strip() | |
| dest = input("Enter the desired output language as the two-letter language code " | |
| "(e.g., 'en' for English, 'lt' for Lithuanian): ").strip().lower() | |
| return input_file_path, output_file_path, dest | |
| def parse_input_file(input_file_path: str) -> dict: | |
| """Parse XML file and extract string name-content pairs.""" | |
| try: | |
| with open(input_file_path, "r", encoding="utf-8") as f: | |
| file_content = f.read() | |
| except FileNotFoundError: | |
| print(f"Error: Input file '{input_file_path}' not found.") | |
| sys.exit(1) | |
| except UnicodeDecodeError: | |
| print(f"Error: Input file '{input_file_path}' contains invalid characters. Ensure it is UTF-8 encoded.") | |
| sys.exit(1) | |
| # Match <string name="...">...</string>, excluding CDATA sections | |
| pattern = r'<string name="([^"]+)">(?!<!\[CDATA\[)(.*?)</string>' | |
| matches = re.findall(pattern, file_content, re.DOTALL) | |
| if not matches: | |
| print("Warning: No valid <string> tags found in the input file.") | |
| return {name: content for name, content in matches} | |
| async def translate_bulk(my_dict: dict, dest: str) -> dict: | |
| """Translate dictionary values and return a new dictionary with translated values.""" | |
| translator = Translator() | |
| translated_dict = {} | |
| for name, content in my_dict.items(): | |
| try: | |
| translation = await translator.translate(content, dest=dest) | |
| translated_dict[name] = translation.text | |
| except Exception as e: | |
| print(f"Warning: Failed to translate '{content}' to '{dest}': {e}") | |
| return translated_dict | |
| def main(): | |
| """Main function to translate XML strings and write to output file.""" | |
| # Get arguments | |
| input_file_path, output_file_path, dest = get_args() | |
| # Validate input file existence | |
| if not os.path.isfile(input_file_path): | |
| print(f"Error: Input file '{input_file_path}' does not exist.") | |
| sys.exit(1) | |
| # Parse input file | |
| my_dict = parse_input_file(input_file_path) | |
| # Translate | |
| try: | |
| translated_dict = asyncio.run(translate_bulk(my_dict, dest)) | |
| except Exception as e: | |
| print(f"Error: Translation failed: {e}") | |
| sys.exit(1) | |
| # Prepare output | |
| output = ['<?xml version="1.0" encoding="utf-8"?>\n<resources>\n'] | |
| for name, content in translated_dict.items(): | |
| output.append(f' <string name="{name}">{content}</string>\n') | |
| output.append('</resources>\n') | |
| # Write to output file | |
| try: | |
| with open(output_file_path, "w", encoding="utf-8") as f: | |
| f.writelines(output) | |
| print(f"Translation completed. Output written to '{output_file_path}'.") | |
| except Exception as e: | |
| print(f"Error: Failed to write to output file '{output_file_path}': {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment