Created
July 13, 2024 08:37
-
-
Save VovaStelmashchuk/935d7cf37f88feba0e9ffaa88f61228b to your computer and use it in GitHub Desktop.
Kotlin correct ukraine slug generator, based on <add link here>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Locale | |
fun slug(str: String): String { | |
return translit(str) | |
.lowercase(Locale.ENGLISH) | |
.replace(Regex("[^a-zA-Z0-9]"), "-") | |
.replace(Regex("-+"), "-") | |
.trim('-') | |
} | |
fun translit(str: String): String { | |
return str.mapIndexed { index, char -> | |
tranlit(char, str.getOrNull(index - 1)) | |
}.joinToString("") | |
} | |
@Suppress("ComplexMethod", "LongMethod") | |
private fun tranlit(current: Char, previous: Char?): String { | |
val isStartOfWord = previous == null || previous == ' ' | |
return when (current) { | |
' ' -> " " | |
'ь', 'Ь', '\'' -> "" | |
'а' -> "a" | |
'А' -> "A" | |
'б' -> "b" | |
'Б' -> "B" | |
'в' -> "v" | |
'В' -> "V" | |
'г' -> { | |
if (previous == 'з' || previous == 'З') { | |
"gh" | |
} else { | |
"h" | |
} | |
} | |
'Г' -> { | |
if (previous == 'з' || previous == 'З') { | |
"Gh" | |
} else { | |
"H" | |
} | |
} | |
'ґ' -> "g" | |
'Ґ' -> "G" | |
'д' -> "d" | |
'Д' -> "D" | |
'е' -> "e" | |
'Е' -> "E" | |
'є' -> { | |
if (isStartOfWord) { | |
"ye" | |
} else { | |
"ie" | |
} | |
} | |
'Є' -> { | |
if (isStartOfWord) { | |
"Ye" | |
} else { | |
"Ie" | |
} | |
} | |
'ж' -> "zh" | |
'Ж' -> "Zh" | |
'з' -> "z" | |
'З' -> "Z" | |
'и' -> "y" | |
'И' -> "Y" | |
'і' -> "i" | |
'І' -> "I" | |
'ї' -> "i" | |
'Ї' -> "Yi" | |
'й' -> { | |
if (isStartOfWord) { | |
"y" | |
} else { | |
"i" | |
} | |
} | |
'Й' -> { | |
if (isStartOfWord) { | |
"Y" | |
} else { | |
"I" | |
} | |
} | |
'к' -> "k" | |
'К' -> "K" | |
'л' -> "l" | |
'Л' -> "L" | |
'м' -> "m" | |
'М' -> "M" | |
'н' -> "n" | |
'Н' -> "N" | |
'о' -> "o" | |
'О' -> "O" | |
'п' -> "p" | |
'П' -> "P" | |
'р' -> "r" | |
'Р' -> "R" | |
'с' -> "s" | |
'С' -> "S" | |
'т' -> "t" | |
'Т' -> "T" | |
'у' -> "u" | |
'У' -> "U" | |
'ф' -> "f" | |
'Ф' -> "F" | |
'х' -> "kh" | |
'Х' -> "Kh" | |
'ц' -> "ts" | |
'Ц' -> "Ts" | |
'ч' -> "ch" | |
'Ч' -> "Ch" | |
'ш' -> "sh" | |
'Ш' -> "Sh" | |
'щ' -> "shch" | |
'Щ' -> "Shch" | |
'ю' -> { | |
if (isStartOfWord) { | |
"yu" | |
} else { | |
"iu" | |
} | |
} | |
'Ю' -> { | |
if (isStartOfWord) { | |
"Yu" | |
} else { | |
"Iu" | |
} | |
} | |
'я' -> { | |
if (isStartOfWord) { | |
"ya" | |
} else { | |
"ia" | |
} | |
} | |
'Я' -> { | |
if (isStartOfWord) { | |
"Ya" | |
} else { | |
"Ia" | |
} | |
} | |
else -> { | |
current.toString() | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io.kotest.core.spec.style.FunSpec | |
import io.kotest.matchers.shouldBe | |
class TransliterationKtTest : FunSpec({ | |
listOf( | |
"Борщагівка" to "Borshchahivka", | |
"Борисенко" to "Borysenko", | |
"Вінниця" to "Vinnytsia", | |
"Володимир" to "Volodymyr", | |
"Гадяч" to "Hadiach", | |
"Богдан" to "Bohdan", | |
"Згурський" to "Zghurskyi", | |
"Ґалаґан" to "Galagan", | |
"Ґорґани" to "Gorgany", | |
"Донецьк" to "Donetsk", | |
"Дмитро" to "Dmytro", | |
"Рівне" to "Rivne", | |
"Олег" to "Oleh", | |
"Есмань" to "Esman", | |
"Єнакієве" to "Yenakiieve", | |
"Гаєвич" to "Haievych", | |
"Короп'є" to "Koropie", | |
"Житомир" to "Zhytomyr", | |
"Жанна" to "Zhanna", | |
"Жежелів" to "Zhezheliv", | |
"Закарпаття" to "Zakarpattia", | |
"Казимирчук" to "Kazymyrchuk", | |
"Медвин" to "Medvyn", | |
"Михайленко" to "Mykhailenko", | |
"Іванків" to "Ivankiv", | |
"Іващенко" to "Ivashchenko", | |
"Їжакевич" to "Yizhakevych", | |
"Кадиївка" to "Kadyivka", | |
"Мар'їне" to "Marine", | |
"Йосипівка" to "Yosypivka", | |
"Стрий" to "Stryi", | |
"Олексій" to "Oleksii", | |
"Київ" to "Kyiv", | |
"Коваленко" to "Kovalenko", | |
"Лебедин" to "Lebedyn", | |
"Леонід" to "Leonid", | |
"Миколаїв" to "Mykolaiv", | |
"Маринич" to "Marynych", | |
"Ніжин" to "Nizhyn", | |
"Наталія" to "Nataliia", | |
"Одеса" to "Odesa", | |
"Онищенко" to "Onyshchenko", | |
"Полтава" to "Poltava", | |
"Петро" to "Petro", | |
"Решетилівка" to "Reshetylivka", | |
"Рибчинський" to "Rybchynskyi", | |
"Суми" to "Sumy", | |
"Соломія" to "Solomiia", | |
"Тернопіль" to "Ternopil", | |
"Троць" to "Trots", | |
"Ужгород" to "Uzhhorod", | |
"Уляна" to "Uliana", | |
"Фастів" to "Fastiv", | |
"Філіпчук" to "Filipchuk", | |
"Харків" to "Kharkiv", | |
"Христина" to "Khrystyna", | |
"Біла Церква" to "Bila Tserkva", | |
"Стеценко" to "Stetsenko", | |
"Чернівці" to "Chernivtsi", | |
"Шевченко" to "Shevchenko", | |
"Шостка" to "Shostka", | |
"Кишеньки" to "Kyshenky", | |
"Щербухи" to "Shcherbukhy", | |
"Гоща" to "Hoshcha", | |
"Гаращенко" to "Harashchenko", | |
"Юрій" to "Yurii", | |
"Корюківка" to "Koriukivka", | |
"Яготин" to "Yahotyn", | |
"Ярошенко" to "Yaroshenko", | |
"Костянтин" to "Kostiantyn", | |
"Знам'янка" to "Znamianka", | |
"Феодосія" to "Feodosiia", | |
"Гусятин" to "Husiatyn", | |
"пеніцелін" to "penitselin", | |
"Згорани" to "Zghorany", | |
"Розгон" to "Rozghon", | |
"Скритна леді" to "Skrytna ledi", | |
"Яготин Ярошенко" to "Yahotyn Yaroshenko", | |
).forEach { (ukraine, latin) -> | |
test("Transliteration of $ukraine is $latin") { | |
translit(ukraine) shouldBe latin | |
} | |
} | |
listOf( | |
"Шен Пуер" to "shen-puer", | |
"Вид пуеру" to "vyd-pueru", | |
"Смола Пуеру (Ча Гао)" to "smola-pueru-cha-hao", | |
"Мініточа (таблетки)" to "minitocha-tabletky", | |
"Пробники" to "probnyky", | |
"Підбірки" to "pidbirky", | |
).forEach { (ukraine, slug) -> | |
test("Slug of $ukraine is $slug") { | |
slug(ukraine) shouldBe slug | |
} | |
} | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment