Skip to content

Instantly share code, notes, and snippets.

@vpdn
Created October 8, 2024 16:00
Show Gist options
  • Save vpdn/d6a6ecdc779cff44508f1145b50ad2bc to your computer and use it in GitHub Desktop.
Save vpdn/d6a6ecdc779cff44508f1145b50ad2bc to your computer and use it in GitHub Desktop.
import os
import json
import datetime
import time
import anthropic
import argparse
# pip install anthropic
from anthropic import Anthropic
# Global variables
is_info_plist = False
LANGUAGE_IDENTIFIERS = [
"ca",
"hr",
"cs",
"da",
"nl",
"fi",
"fr",
"de",
"tr",
"sv",
"es",
"ro",
"pl",
"it",
"hu",
"el",
"pt-PT",
"pt-BR",
"sk"
]
# Initialize Anthropic client
anthropic_client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
# Use Claude for translation
def translate_string(source_string, target_language, extra_context=None,
comment=None, custom_prompt=""):
system_prompt = f"""You are a professional translator for mobile apps.
Translate the given string fragments from English to the language with the
locale {target_language}.
If the string is all caps, translate it in all caps.
If the string ends with a period, the translated string should end with a period.
If the string ends with a comma, the translated string should end with a comma.
If the string has brackets, keep the brackets in the translated string.
Keep formatters like %@, %d, %f, %ld, %lld, %p, %s, %z, etc. in the translated string.
Keep emojis as is and do not translate them.
Keep it casual. Be culturally sensitive and adapt idioms or expressions appropriately
for the target language. Keep it focused to be the translation of the original text,
do not try to be creative and invent new phrases or expressions. Shorter translations
are better. In German use 'Du' instead of the formal 'Sie'."""
if custom_prompt:
system_prompt += f" {custom_prompt}"
system_prompt += """
Return only the translated text without quotation marks or additional
formatting. If you can't translate, output the original text.
"""
if comment:
system_prompt += f"""Additional instruction from a human translator: "{comment}". \
If they ask to keep the original text, just return it as is. They might also ask to \
not translate a specific word or phrase."""
if extra_context:
system_prompt += f"\n\nExtra context: {extra_context}"
try:
response = anthropic_client.messages.create(
model="claude-3-5-sonnet-20240620",
max_tokens=300,
temperature=0.1,
system=system_prompt,
messages=[
{"role": "user", "content": source_string}
]
)
translated_text = response.content[0].text.strip()
except Exception as e:
print(f"Translation error: {e}")
print("Retrying after 1 second...")
time.sleep(1)
return translate_string(source_string, target_language, extra_context, comment, custom_prompt)
print(f"Original: {source_string}")
print(f"{target_language}: {translated_text}")
if comment:
print(f"Comment: {comment}")
return translated_text
def main(xcstrings_path, extra_context=None, custom_prompt="", override=False, update_key=None):
# Get all the keys of strings
with open(xcstrings_path, "r", encoding="utf-8") as f:
json_data = json.load(f)
strings_keys = [update_key] if update_key else list(json_data["strings"].keys())
print(f"\nFound {len(strings_keys)} keys\n")
# Print custom prompt if provided
if custom_prompt:
print(f"Custom prompt: {custom_prompt}\n")
# Traverse all keys
for key_index, key in enumerate(strings_keys):
if not key:
continue
# Get the current time
now = datetime.datetime.now()
# Format the current time
now_str = now.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{now_str}]\n", f"🔥{key_index + 1}/{len(strings_keys)}: {key}")
strings = json_data["strings"][key]
# The strings field is empty.
if not strings:
strings = {"extractionState": "manual", "localizations": {}}
# The localizations field is empty
if "localizations" not in strings:
strings["localizations"] = {}
localizations = strings["localizations"]
# Get the English source string
source_string = localizations.get("en", {}).get("stringUnit", {}).get("value", key)
for language in LANGUAGE_IDENTIFIERS:
# Determine whether localizations contains the corresponding language key
if language not in localizations or override or update_key:
if not is_info_plist:
source_language = json_data["sourceLanguage"]
if language == source_language:
translated_string = source_string
else:
comment = strings.get("comment")
translated_string = translate_string(source_string, language, extra_context=extra_context, comment=comment, custom_prompt=custom_prompt)
localizations[language] = {
"stringUnit": {
"state": "translated",
"value": translated_string,
}
}
else:
source_language = json_data["sourceLanguage"]
if source_language not in localizations:
print("String is empty in source language")
continue
else:
comment = strings.get("comment")
translated_string = translate_string(source_string, language, extra_context=extra_context, comment=comment, custom_prompt=custom_prompt)
localizations[language] = {
"stringUnit": {
"state": "translated",
"value": translated_string,
}
}
else:
print(f"{language} has been translated")
strings["localizations"] = localizations
json_data["strings"][key] = strings
# Save the modified JSON file every time to prevent flashback.
with open(xcstrings_path, "w", encoding='utf-8') as f:
json.dump(json_data, ensure_ascii=False, fp=f, indent=4)
def is_infoplist(xcstrings_path):
filename, _ = os.path.splitext(os.path.basename(xcstrings_path))
return filename == 'InfoPlist'
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process .xcstrings file")
parser.add_argument("--xcstrings_path", required=True, help="Path to the .xcstrings file")
parser.add_argument("--prompt", help="Custom prompt to prepend to the translation request")
parser.add_argument("--override", action="store_true", help="Override existing translations")
parser.add_argument("--update_key", help="Specific key to update across all languages")
args = parser.parse_args()
is_info_plist = is_infoplist(args.xcstrings_path)
main(args.xcstrings_path, custom_prompt=args.prompt, override=args.override, update_key=args.update_key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment