Last active
October 25, 2025 06:47
-
-
Save threethan/01c3f9d17eadcf8f1a78699f1ba4578a to your computer and use it in GitHub Desktop.
Translate Android strings.xml with DeepL and python. Supports incremental translation & mid-string tags.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' | |
| Translate Android strings.xml files using DeepL API. | |
| Requires an API key from https://www.deepl.com/pro-api | |
| Usage: | |
| - Set the DEEPL_API_KEY environment variable to your key or pass it via --auth-key | |
| - Run the script with --res-dir pointing to your Android res folder (or without args to see help) | |
| - Optionally specify source and target languages, and files to translate. | |
| (Default: Extend existing translations in values folders from EN strings.xml) | |
| ''' | |
| import requests | |
| import os | |
| import xml.etree.ElementTree as ET | |
| import tqdm | |
| from typing import List | |
| import argparse | |
| import atexit | |
| USE_FREE_API = True # api-free.deepl.com endpoint | |
| RES_DIR = None | |
| FROM_LANG = 'EN' | |
| TO_LANGS = [] | |
| FROM_FILES = ["strings.xml"] | |
| KEEP_NON_TRANSLATABLE = False | |
| # CLI arguments | |
| # Display them nicely by running with no arguments | |
| parser = argparse.ArgumentParser(description="Translate Android strings.xml files using DeepL") | |
| parser.add_argument("--res-dir", "-dir", "-d", default=RES_DIR, help="Path to Android res folder for in AND out (Required, must end with 'res')") | |
| parser.add_argument("--from-lang", "-f", default=FROM_LANG, help="Source language code (e.g. EN)") | |
| parser.add_argument("--to-langs", "-t", nargs="+", default=TO_LANGS, help="Space-separated target language codes (default: use existing values-XX dirs.)") | |
| parser.add_argument("--from-files", nargs="+", default=FROM_FILES, help="Filenames to translate (default: strings.xml)") | |
| parser.add_argument("--auth-key", help="DeepL API key (you can also set DEEPL_API_KEY env. var)") | |
| group = parser.add_mutually_exclusive_group() | |
| group.add_argument("--keep-non-translatable", dest="keep_non_translatable", action="store_true", help="Keep non-translatable strings in output") | |
| parser.set_defaults(keep_non_translatable=KEEP_NON_TRANSLATABLE) | |
| group = parser.add_mutually_exclusive_group() | |
| group.add_argument("--paid-api", dest="use_free_api", action="store_false", help="Use paid api.deepl.com endpoint") | |
| parser.set_defaults(use_free_api=USE_FREE_API) | |
| _args, _ = parser.parse_known_args() | |
| USE_FREE_API = _args.use_free_api | |
| RES_DIR = _args.res_dir | |
| FROM_LANG = _args.from_lang | |
| TO_LANGS = _args.to_langs | |
| FROM_FILES = _args.from_files | |
| KEEP_NON_TRANSLATABLE = _args.keep_non_translatable | |
| if _args.auth_key: | |
| DEEPL_API_KEY = _args.auth_key | |
| else: | |
| DEEPL_API_KEY = os.getenv("DEEPL_API_KEY") or os.getenv("DEEPL_KEY") | |
| def _run_if_requested(): | |
| global RES_DIR, FROM_LANG, TO_LANGS, FROM_FILES | |
| if RES_DIR and __name__ == "__main__": | |
| if len(TO_LANGS) == 0: | |
| # detect target langs from existing values folders | |
| TO_LANGS = [] | |
| for entry in os.listdir(RES_DIR): | |
| if entry.startswith("values-"): | |
| lang_code = entry[len("values-"):].upper() | |
| TO_LANGS.append(lang_code) | |
| print(f"Translating from {FROM_LANG} to {TO_LANGS} in '{RES_DIR}'...") | |
| translate_android_string_values(RES_DIR, FROM_LANG, TO_LANGS, from_files=FROM_FILES) | |
| else: | |
| # Show help if nothing was specified | |
| parser.print_help() | |
| parser.exit(0) | |
| atexit.register(_run_if_requested) | |
| def translate(text, target_lang='EN', source_lang=None, auth_key=DEEPL_API_KEY, use_free_api=USE_FREE_API, formality=None, timeout=20): | |
| """ | |
| Translate a string or list of strings via DeepL HTTP API. | |
| Args: | |
| text (str or list[str]): text to translate. | |
| target_lang (str): DeepL target language code (e.g. 'EN', 'DE', 'FR'). | |
| source_lang (str|None): optional source language code. | |
| auth_key (str|None): DeepL API key. If None, looks for notebook variable DEEPL_API_KEY / DEEPL_KEY | |
| or environment variable DEEPL_API_KEY / DEEPL_KEY. | |
| use_free_api (bool): use api-free.deepl.com endpoint if True. | |
| formality (str|None): 'more' / 'less' / 'default' where supported. | |
| timeout (int|float): request timeout seconds. | |
| Returns: | |
| str or list[str]: translated text (same shape as input). | |
| """ | |
| if not auth_key: | |
| raise RuntimeError("DeepL auth key not found. Set DEEPL_API_KEY env var or pass auth_key.") | |
| endpoint = 'https://api-free.deepl.com/v2/translate' if use_free_api else 'https://api.deepl.com/v2/translate' | |
| # Build form data: multiple 'text' fields allowed | |
| data = [] | |
| if isinstance(text, (list, tuple)): | |
| for t in text: | |
| data.append(('text', t)) | |
| else: | |
| data.append(('text', text)) | |
| data.append(('target_lang', target_lang)) | |
| if source_lang: | |
| data.append(('source_lang', source_lang)) | |
| if formality: | |
| data.append(('formality', formality)) | |
| data.append(('auth_key', auth_key)) | |
| resp = requests.post(endpoint, data=data, timeout=timeout) | |
| resp.raise_for_status() | |
| j = resp.json() | |
| translations = [t.get('text', '') for t in j.get('translations', [])] | |
| if isinstance(text, (list, tuple)): | |
| return translations | |
| return translations[0] if translations else '' | |
| def combineString(elem: ET.Element) -> str: | |
| text = elem.text or "" | |
| for child in elem: | |
| text += f"<{child.tag}>" | |
| text += combineString(child) if len(child) > 0 else child.text or "" | |
| text += f"</{child.tag}>" | |
| text += child.tail or "" | |
| return text | |
| def uncombineString(s: str) -> ET.Element: | |
| return uncombineTag(s, "string") | |
| def uncombineTag(s: str, tag: str) -> ET.Element: | |
| root = ET.Element(tag) | |
| i = 0 | |
| def append(text: str): | |
| if (len(root) == 0): | |
| root.text = (root.text or "") + text | |
| else: | |
| last = root[-1] | |
| last.tail = (last.tail or "") + text | |
| if ("<" not in s): | |
| append(s) | |
| return root | |
| while i < len(s): | |
| # Find next tag | |
| if s[i] == "<": | |
| j = s.find(">", i) | |
| tag = s[i+1:j] | |
| k = s.find(f"</{tag}>", j) | |
| child = uncombineTag(s[j+1:k], tag) | |
| root.append(child) | |
| i = k + len(tag) + 3 | |
| else: | |
| next_tag = s.find("<", i) | |
| if next_tag == -1: | |
| append(s[i:]) | |
| break | |
| else: | |
| append(s[i:next_tag]) | |
| i = next_tag | |
| return root | |
| def _throw(msg): | |
| raise Exception(msg) | |
| def translateElement(elem: ET.Element, target_lang='EN', source_lang=None, | |
| ignore_if=lambda _: False, | |
| replace_ignored=lambda _: _throw("replace_ignored must be defined when ignore_if is True")) -> ET.Element: | |
| if elem.tag == "resources": | |
| children = [replace_ignored(child) | |
| if ignore_if(child) | |
| else translateElement(child, target_lang=target_lang, source_lang=source_lang, ignore_if=ignore_if, replace_ignored=replace_ignored) | |
| for child in tqdm.tqdm(list(elem), desc="Translating children", unit="item")] | |
| new_elem = ET.Element(elem.tag, attrib=elem.attrib) | |
| for child in children: | |
| if KEEP_NON_TRANSLATABLE or child.get("translatable") != "false": | |
| new_elem.append(child) | |
| return new_elem | |
| elif elem.tag == "string": | |
| # ignore untranslatable strings | |
| if (elem.get("translatable") == "false" and not KEEP_NON_TRANSLATABLE): | |
| return elem | |
| # split elements into text and subelements, translate, and reassemble | |
| combined = combineString(elem) | |
| n_lt = combined.count('<') | |
| n_gt = combined.count('>') | |
| combined = combined.replace("\n", "") | |
| combined = combined.replace("\\n", "\n") | |
| n_nl = combined.count('\n') | |
| translated = translate(combined, target_lang=target_lang, source_lang=source_lang) | |
| if n_lt != translated.count('<') or n_gt != translated.count('>'): | |
| print(f"Warning: mismatched tag counts in translation of '{elem.get('name')}'") | |
| print(f"Original: {combined}") | |
| print(f"Translated: {translated}") | |
| print(f"Proceeding with tags removed...") | |
| combined_stripped = combined.replace('<', '').replace('>', '') | |
| translated_stripped = translate(combined_stripped, target_lang=target_lang, source_lang=source_lang) | |
| translated_stripped = translated_stripped.replace("\n", "").replace("\\n", "\n").replace('<', '').replace('>', '') | |
| translated = translated_stripped | |
| if n_nl != translated.count('\n'): | |
| print(f"Warning: mismatched newline counts in translation of '{elem.get('name')}'") | |
| print(f"Original: {combined}") | |
| print(f"Translated: {translated}") | |
| # Re-escape quotes | |
| for c in ['\'', '\"']: | |
| translated = translated.replace(f"\\{c}", c).replace(c, f"\\{c}") | |
| # Re-escaple newlines | |
| translated = translated.replace('\n', '\n\\n') | |
| new_elem = uncombineString(translated) | |
| new_elem.tag = elem.tag | |
| new_elem.attrib = elem.attrib | |
| new_elem.tail = elem.tail | |
| return new_elem | |
| else: | |
| return elem | |
| def translate_android_strings_xml_file(input_file: str, output_file: str, source_lang: str, target_lang: str): | |
| input_tree = ET.parse(input_file) | |
| input_root = input_tree.getroot() | |
| assert input_root.tag == "resources", f"Non-resources root tag '{input_root.tag}' in input file '{input_file}', aborting!" | |
| # Just translate if output file does not exist | |
| if not os.path.exists(output_file): | |
| output_root = translateElement(input_root, source_lang=source_lang, target_lang=target_lang) | |
| output_tree = ET.ElementTree(output_root) | |
| return | |
| # Attempt to preserve existing translations if they exist | |
| print(f" [!] | Overwriting existing file '{output_file}'.") | |
| print(f" | Existing translations will be skipped and left untouched.") | |
| print(f" | Any string not present in source file will be removed from output.") | |
| existing_tree = ET.parse(output_file) | |
| existing_root = existing_tree.getroot() | |
| existing_string_names = [elem.get("name") for elem in existing_root if elem.tag == "string"] | |
| # Translate, ignoring existing strings and replacing them with existing translations | |
| output_root = translateElement(input_root, source_lang=source_lang, target_lang=target_lang, | |
| ignore_if=lambda el: el.tag == "string" and el.get("name") in existing_string_names, | |
| replace_ignored=lambda el: next((e for e in existing_root if e.tag == "string" and e.get("name") == el.get("name")), el)) | |
| output_tree = ET.ElementTree(output_root) | |
| output_tree.write(output_file, encoding='utf-8', xml_declaration=True) | |
| def translate_android_string_values(res_folder: str, source_lang: str, target_langs: List[str], from_files=["strings.xml"]): | |
| assert os.path.isdir(res_folder), f"Resources folder '{res_folder}' does is not an existing directory" | |
| assert res_folder.endswith("res"), f"Path '{res_folder}' does not appear to be a valid Android 'res' folder path" | |
| for i, target_lang in enumerate(target_langs, 1): | |
| for from_file in from_files: | |
| print(f"Translating '{from_file}' from {source_lang} to {target_lang} ({i} of {len(target_langs)})...") | |
| translate_android_strings_xml_file( | |
| input_file=os.path.join(res_folder, "values", from_file), | |
| output_file=os.path.join(res_folder, f"values-{target_lang.lower()}", from_file), | |
| source_lang=source_lang, | |
| target_lang=target_lang | |
| ) | |
| print("Translation complete.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Couldn't find a decent auto-translator that actually supported inline tags like
<b>and<i>, so I created this.If target languages already exist:
Translation isn't context-aware, so actual translation quality may be poor.