Last active
November 16, 2024 18:08
-
-
Save toriato/0fcfd4bf6523098d1a02804f9b270750 to your computer and use it in GitHub Desktop.
Extract or translate VDF key-valve file (for closed caption and etc)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# (required) pip install vdf | |
# (optional) pip install openai | |
# | |
# Usage: | |
# python closecaption_tool.py "closecaption_english.txt" "closecaption_korean.txt" | |
from typing import Dict | |
from dataclasses import dataclass | |
from argparse import ArgumentParser | |
from pathlib import Path | |
from copy import deepcopy | |
import json | |
import vdf | |
@dataclass | |
class ArgumentInterface: | |
source_path: Path | |
target_path: Path | |
export_missing_tokens_to: Path | |
export_translated_to: Path | |
openai_api_key: str | |
openai_model: str | |
openai_instruction: str | |
parser = ArgumentParser() | |
parser.add_argument('source_path', type=Path) | |
parser.add_argument('target_path', type=Path) | |
parser.add_argument('--export-missing-tokens-to', type=Path) | |
parser.add_argument('--export-translated-to', type=Path, default='translated.txt') | |
parser.add_argument('--openai-api-key', type=str) | |
parser.add_argument('--openai-model', type=str, default='gpt-4o') | |
parser.add_argument('--openai-instruction', type=str, default=''' | |
You are a tool that translates English into Korean. | |
- You must never modify symbols such as quotation marks or parentheses. | |
- Do not translate proper nouns used in names, games, or works; leave the original text as is. | |
- Use a soft tone that explains things in detail, rather than a rigid tone. | |
- Return only the translated text. | |
'''.strip()) | |
args = ArgumentInterface(**vars(parser.parse_args())) | |
def export (path: Path, content: Dict): | |
with path.open('w', encoding='utf-8') as fp: | |
match path.suffix.lower(): | |
case '.json': | |
raw = json.dumps(content, indent=4, ensure_ascii=False) | |
case '.txt': | |
raw = vdf.dumps(content) | |
case _: | |
raise ValueError(f'unsupported export extension, only .json and .txt allowed') | |
fp.write(raw) | |
with args.source_path.open('r', encoding='utf-16-le') as fp: | |
base = vdf.load(fp) | |
with args.target_path.open('r', encoding='utf-16-le') as fp: | |
target = vdf.load(fp) | |
missing_tokens = { | |
key: value | |
for key, value in base['lang']['Tokens'].items() | |
if key not in target['lang']['Tokens'] | |
} | |
if args.export_missing_tokens_to: | |
export(args.export_missing_tokens_to, missing_tokens) | |
if args.openai_api_key: | |
from openai import OpenAI | |
client = OpenAI(api_key=args.openai_api_key) | |
index = 0 | |
length = len(missing_tokens) | |
translated = deepcopy(target) | |
for key, value in missing_tokens.items(): | |
response = client.chat.completions.create( | |
model=args.openai_model, | |
messages=[ | |
{ | |
'role': 'system', | |
'content': args.openai_instruction | |
}, | |
{ | |
'role': 'user', | |
'content': value | |
} | |
] | |
) | |
index += 1 | |
response_value = response.choices[0].message.content | |
print(f'{'='*20}[ {index} out of {length} ]{'='*20}\n{value}\n{response_value}\n') | |
translated['lang']['Tokens'][key] = response_value | |
export(args.export_translated_to, translated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment