Skip to content

Instantly share code, notes, and snippets.

@toriato
Last active November 16, 2024 18:08
Show Gist options
  • Save toriato/0fcfd4bf6523098d1a02804f9b270750 to your computer and use it in GitHub Desktop.
Save toriato/0fcfd4bf6523098d1a02804f9b270750 to your computer and use it in GitHub Desktop.
Extract or translate VDF key-valve file (for closed caption and etc)
# (required) pip install vdf
# (optional) pip install openai
#
# Usage:
# python closecaption_tool.py "closecaption_english.txt" "closecaption_korean.txt"
from typing import Dict
from dataclasses import dataclass
from argparse import ArgumentParser
from pathlib import Path
from copy import deepcopy
import json
import vdf
@dataclass
class ArgumentInterface:
source_path: Path
target_path: Path
export_missing_tokens_to: Path
export_translated_to: Path
openai_api_key: str
openai_model: str
openai_instruction: str
parser = ArgumentParser()
parser.add_argument('source_path', type=Path)
parser.add_argument('target_path', type=Path)
parser.add_argument('--export-missing-tokens-to', type=Path)
parser.add_argument('--export-translated-to', type=Path, default='translated.txt')
parser.add_argument('--openai-api-key', type=str)
parser.add_argument('--openai-model', type=str, default='gpt-4o')
parser.add_argument('--openai-instruction', type=str, default='''
You are a tool that translates English into Korean.
- You must never modify symbols such as quotation marks or parentheses.
- Do not translate proper nouns used in names, games, or works; leave the original text as is.
- Use a soft tone that explains things in detail, rather than a rigid tone.
- Return only the translated text.
'''.strip())
args = ArgumentInterface(**vars(parser.parse_args()))
def export (path: Path, content: Dict):
with path.open('w', encoding='utf-8') as fp:
match path.suffix.lower():
case '.json':
raw = json.dumps(content, indent=4, ensure_ascii=False)
case '.txt':
raw = vdf.dumps(content)
case _:
raise ValueError(f'unsupported export extension, only .json and .txt allowed')
fp.write(raw)
with args.source_path.open('r', encoding='utf-16-le') as fp:
base = vdf.load(fp)
with args.target_path.open('r', encoding='utf-16-le') as fp:
target = vdf.load(fp)
missing_tokens = {
key: value
for key, value in base['lang']['Tokens'].items()
if key not in target['lang']['Tokens']
}
if args.export_missing_tokens_to:
export(args.export_missing_tokens_to, missing_tokens)
if args.openai_api_key:
from openai import OpenAI
client = OpenAI(api_key=args.openai_api_key)
index = 0
length = len(missing_tokens)
translated = deepcopy(target)
for key, value in missing_tokens.items():
response = client.chat.completions.create(
model=args.openai_model,
messages=[
{
'role': 'system',
'content': args.openai_instruction
},
{
'role': 'user',
'content': value
}
]
)
index += 1
response_value = response.choices[0].message.content
print(f'{'='*20}[ {index} out of {length} ]{'='*20}\n{value}\n{response_value}\n')
translated['lang']['Tokens'][key] = response_value
export(args.export_translated_to, translated)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment