Created
October 8, 2024 16:00
-
-
Save vpdn/d6a6ecdc779cff44508f1145b50ad2bc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import datetime | |
import time | |
import anthropic | |
import argparse | |
# pip install anthropic | |
from anthropic import Anthropic | |
# Global variables | |
is_info_plist = False | |
LANGUAGE_IDENTIFIERS = [ | |
"ca", | |
"hr", | |
"cs", | |
"da", | |
"nl", | |
"fi", | |
"fr", | |
"de", | |
"tr", | |
"sv", | |
"es", | |
"ro", | |
"pl", | |
"it", | |
"hu", | |
"el", | |
"pt-PT", | |
"pt-BR", | |
"sk" | |
] | |
# Initialize Anthropic client | |
anthropic_client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) | |
# Use Claude for translation | |
def translate_string(source_string, target_language, extra_context=None, | |
comment=None, custom_prompt=""): | |
system_prompt = f"""You are a professional translator for mobile apps. | |
Translate the given string fragments from English to the language with the | |
locale {target_language}. | |
If the string is all caps, translate it in all caps. | |
If the string ends with a period, the translated string should end with a period. | |
If the string ends with a comma, the translated string should end with a comma. | |
If the string has brackets, keep the brackets in the translated string. | |
Keep formatters like %@, %d, %f, %ld, %lld, %p, %s, %z, etc. in the translated string. | |
Keep emojis as is and do not translate them. | |
Keep it casual. Be culturally sensitive and adapt idioms or expressions appropriately | |
for the target language. Keep it focused to be the translation of the original text, | |
do not try to be creative and invent new phrases or expressions. Shorter translations | |
are better. In German use 'Du' instead of the formal 'Sie'.""" | |
if custom_prompt: | |
system_prompt += f" {custom_prompt}" | |
system_prompt += """ | |
Return only the translated text without quotation marks or additional | |
formatting. If you can't translate, output the original text. | |
""" | |
if comment: | |
system_prompt += f"""Additional instruction from a human translator: "{comment}". \ | |
If they ask to keep the original text, just return it as is. They might also ask to \ | |
not translate a specific word or phrase.""" | |
if extra_context: | |
system_prompt += f"\n\nExtra context: {extra_context}" | |
try: | |
response = anthropic_client.messages.create( | |
model="claude-3-5-sonnet-20240620", | |
max_tokens=300, | |
temperature=0.1, | |
system=system_prompt, | |
messages=[ | |
{"role": "user", "content": source_string} | |
] | |
) | |
translated_text = response.content[0].text.strip() | |
except Exception as e: | |
print(f"Translation error: {e}") | |
print("Retrying after 1 second...") | |
time.sleep(1) | |
return translate_string(source_string, target_language, extra_context, comment, custom_prompt) | |
print(f"Original: {source_string}") | |
print(f"{target_language}: {translated_text}") | |
if comment: | |
print(f"Comment: {comment}") | |
return translated_text | |
def main(xcstrings_path, extra_context=None, custom_prompt="", override=False, update_key=None): | |
# Get all the keys of strings | |
with open(xcstrings_path, "r", encoding="utf-8") as f: | |
json_data = json.load(f) | |
strings_keys = [update_key] if update_key else list(json_data["strings"].keys()) | |
print(f"\nFound {len(strings_keys)} keys\n") | |
# Print custom prompt if provided | |
if custom_prompt: | |
print(f"Custom prompt: {custom_prompt}\n") | |
# Traverse all keys | |
for key_index, key in enumerate(strings_keys): | |
if not key: | |
continue | |
# Get the current time | |
now = datetime.datetime.now() | |
# Format the current time | |
now_str = now.strftime("%Y-%m-%d %H:%M:%S") | |
print(f"[{now_str}]\n", f"🔥{key_index + 1}/{len(strings_keys)}: {key}") | |
strings = json_data["strings"][key] | |
# The strings field is empty. | |
if not strings: | |
strings = {"extractionState": "manual", "localizations": {}} | |
# The localizations field is empty | |
if "localizations" not in strings: | |
strings["localizations"] = {} | |
localizations = strings["localizations"] | |
# Get the English source string | |
source_string = localizations.get("en", {}).get("stringUnit", {}).get("value", key) | |
for language in LANGUAGE_IDENTIFIERS: | |
# Determine whether localizations contains the corresponding language key | |
if language not in localizations or override or update_key: | |
if not is_info_plist: | |
source_language = json_data["sourceLanguage"] | |
if language == source_language: | |
translated_string = source_string | |
else: | |
comment = strings.get("comment") | |
translated_string = translate_string(source_string, language, extra_context=extra_context, comment=comment, custom_prompt=custom_prompt) | |
localizations[language] = { | |
"stringUnit": { | |
"state": "translated", | |
"value": translated_string, | |
} | |
} | |
else: | |
source_language = json_data["sourceLanguage"] | |
if source_language not in localizations: | |
print("String is empty in source language") | |
continue | |
else: | |
comment = strings.get("comment") | |
translated_string = translate_string(source_string, language, extra_context=extra_context, comment=comment, custom_prompt=custom_prompt) | |
localizations[language] = { | |
"stringUnit": { | |
"state": "translated", | |
"value": translated_string, | |
} | |
} | |
else: | |
print(f"{language} has been translated") | |
strings["localizations"] = localizations | |
json_data["strings"][key] = strings | |
# Save the modified JSON file every time to prevent flashback. | |
with open(xcstrings_path, "w", encoding='utf-8') as f: | |
json.dump(json_data, ensure_ascii=False, fp=f, indent=4) | |
def is_infoplist(xcstrings_path): | |
filename, _ = os.path.splitext(os.path.basename(xcstrings_path)) | |
return filename == 'InfoPlist' | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Process .xcstrings file") | |
parser.add_argument("--xcstrings_path", required=True, help="Path to the .xcstrings file") | |
parser.add_argument("--prompt", help="Custom prompt to prepend to the translation request") | |
parser.add_argument("--override", action="store_true", help="Override existing translations") | |
parser.add_argument("--update_key", help="Specific key to update across all languages") | |
args = parser.parse_args() | |
is_info_plist = is_infoplist(args.xcstrings_path) | |
main(args.xcstrings_path, custom_prompt=args.prompt, override=args.override, update_key=args.update_key) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment