Skip to content

Instantly share code, notes, and snippets.

@wassupluke
Created May 25, 2025 05:19
Show Gist options
  • Select an option

  • Save wassupluke/1ea8d0b52dcbe768ea634fe82cbf8d07 to your computer and use it in GitHub Desktop.

Select an option

Save wassupluke/1ea8d0b52dcbe768ea634fe82cbf8d07 to your computer and use it in GitHub Desktop.
XML String Translation and File Comparison, powered by the googletrans api
"""Compares existing strings.xml file with another xml file.*
The script expects exactly two arguments, existing_file_path, and
new_file_path, respetively.
e.g., `python3 compare-translations.py strings-fr.xml more-strings-fr.xml`
Any strings already found to be translated in the existing file will be
removed from the new file, and the new file will be appended to the
existing file.
*This automated process will leave a chunk of, "</resources>\n<resources>",
in the middle of the file at the point where the new content was appended.
This is intential and forces the user to personally review the output file
for appropriateness and the user must manually remove the aforementioned
text chunk from the file else it may break whatever depends on that file.
"""
import sys
import re
if len(sys.argv) == 3:
_, original, new = sys.argv
else:
sys.exit(1)
with open(original, "r") as f:
original_content = f.read()
with open(f"{original}.bak", "w") as f:
f.write(original_content)
with open(new, "r") as f:
new_content = f.read()
pattern = r'<string name="([^"]+)">(?!<!\[CDATA\[)(.*?)</string>'
matches_original = re.findall(pattern, original_content, re.DOTALL)
matches_new = re.findall(pattern, new_content, re.DOTALL)
original_content = {name: content for name, content in matches_original}
new_content = {name: content for name, content in matches_new}
for key in original_content.keys():
try:
new_content.pop(key)
except:
pass
output = ['<resources>\n']
for name, content in new_content.items():
output.append(f' <string name="{name}">{content}</string>\n')
output.append('</resources>\n')
with open(original, "a") as f:
f.writelines(output)
"""
Translates xml content to a desired language.
Script expects exactly three arguments:
input file path, output file path, and output language, respectively.
The output language shall be the two-letter code for that language.
e.g., `python3 translator3.py strings.xml strings-fr.xml fr`
https://pypi.org/project/googletrans/
pip install googletrans==3.1.0a0 # Use a specific version for stability
"""
import asyncio
import re
import sys
import os
from googletrans import Translator
def get_args():
"""Parse command-line arguments or get user input."""
if len(sys.argv) == 4:
print(f"Arguments received: {sys.argv[1:]}")
return sys.argv[1], sys.argv[2], sys.argv[3].lower()
elif len(sys.argv) > 1:
print(f"Error: Expected exactly three arguments, got {len(sys.argv) - 1}.\n"
"Usage: python translator2.py input_file_path output_file_path target_language\n"
"Example: python translator2.py input.xml output.xml en")
sys.exit(1)
else:
input_file_path = input("Enter the full path of the file to be translated: ").strip()
output_file_path = input("Enter the full path for the desired output file: ").strip()
dest = input("Enter the desired output language as the two-letter language code "
"(e.g., 'en' for English, 'lt' for Lithuanian): ").strip().lower()
return input_file_path, output_file_path, dest
def parse_input_file(input_file_path: str) -> dict:
"""Parse XML file and extract string name-content pairs."""
try:
with open(input_file_path, "r", encoding="utf-8") as f:
file_content = f.read()
except FileNotFoundError:
print(f"Error: Input file '{input_file_path}' not found.")
sys.exit(1)
except UnicodeDecodeError:
print(f"Error: Input file '{input_file_path}' contains invalid characters. Ensure it is UTF-8 encoded.")
sys.exit(1)
# Match <string name="...">...</string>, excluding CDATA sections
pattern = r'<string name="([^"]+)">(?!<!\[CDATA\[)(.*?)</string>'
matches = re.findall(pattern, file_content, re.DOTALL)
if not matches:
print("Warning: No valid <string> tags found in the input file.")
return {name: content for name, content in matches}
async def translate_bulk(my_dict: dict, dest: str) -> dict:
"""Translate dictionary values and return a new dictionary with translated values."""
translator = Translator()
translated_dict = {}
for name, content in my_dict.items():
try:
translation = await translator.translate(content, dest=dest)
translated_dict[name] = translation.text
except Exception as e:
print(f"Warning: Failed to translate '{content}' to '{dest}': {e}")
return translated_dict
def main():
"""Main function to translate XML strings and write to output file."""
# Get arguments
input_file_path, output_file_path, dest = get_args()
# Validate input file existence
if not os.path.isfile(input_file_path):
print(f"Error: Input file '{input_file_path}' does not exist.")
sys.exit(1)
# Parse input file
my_dict = parse_input_file(input_file_path)
# Translate
try:
translated_dict = asyncio.run(translate_bulk(my_dict, dest))
except Exception as e:
print(f"Error: Translation failed: {e}")
sys.exit(1)
# Prepare output
output = ['<?xml version="1.0" encoding="utf-8"?>\n<resources>\n']
for name, content in translated_dict.items():
output.append(f' <string name="{name}">{content}</string>\n')
output.append('</resources>\n')
# Write to output file
try:
with open(output_file_path, "w", encoding="utf-8") as f:
f.writelines(output)
print(f"Translation completed. Output written to '{output_file_path}'.")
except Exception as e:
print(f"Error: Failed to write to output file '{output_file_path}': {e}")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment