Last active
April 21, 2025 08:38
-
-
Save aperture147/cd9cf1a713f158bfea1c8e6877c5dade to your computer and use it in GitHub Desktop.
A simple python code to convert integer number to pronunciation. Use this function to pre process text before feeding it into a TTS model. Work best for number less than 1 trillion (since I have no clue how number beyond 1 trillion be pronounced in Vietnamese)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
rule_1_pronunciation_list = ['', 'mốt', 'hai', 'ba', 'tư', 'lăm', 'sáu', 'bảy', 'tám', 'chín'] | |
rule_2_pronunciation_list = ['không', 'một', 'hai', 'ba', 'bốn', 'năm', 'sáu', 'bảy', 'tám', 'chín'] | |
# should support decimal number, but I don't know how to pronounce decimal number correctly in Vietnamese | |
a = 1234567890 | |
suffix_cycle = itertools.cycle(['', 'nghìn', 'triệu', 'tỷ']) | |
suffix = '' | |
result = '' | |
while a > 0: | |
b = a % 1000 | |
suffix = next(suffix_cycle) | |
b1 = b % 10 | |
b2 = int(b / 10) % 10 | |
b3 = int(b / 100) | |
b3_pron = '' if not b3 else f'{rule_2_pronunciation_list[b3]} trăm' | |
b2_pron = '' if not b2 else 'mười' if (b3 == 0) and (b2 == 1) else f'{rule_2_pronunciation_list[b2]} mươi' | |
if not b3 and not b2: | |
b1_pron = rule_2_pronunciation_list[b1] | |
else: | |
b1_pron = ('lẻ ' if not b2 else '' + rule_1_pronunciation_list[b1]) | |
pron = f'{b3_pron} {b2_pron} {b1_pron} {suffix}'.strip() | |
result = f'{pron} {result}' | |
a = int(a / 1000) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment