Skip to content

Instantly share code, notes, and snippets.

@VovaStelmashchuk
Created July 13, 2024 08:37
Show Gist options
  • Save VovaStelmashchuk/935d7cf37f88feba0e9ffaa88f61228b to your computer and use it in GitHub Desktop.
Save VovaStelmashchuk/935d7cf37f88feba0e9ffaa88f61228b to your computer and use it in GitHub Desktop.
Kotlin correct ukraine slug generator, based on <add link here>
import java.util.Locale
fun slug(str: String): String {
return translit(str)
.lowercase(Locale.ENGLISH)
.replace(Regex("[^a-zA-Z0-9]"), "-")
.replace(Regex("-+"), "-")
.trim('-')
}
fun translit(str: String): String {
return str.mapIndexed { index, char ->
tranlit(char, str.getOrNull(index - 1))
}.joinToString("")
}
@Suppress("ComplexMethod", "LongMethod")
private fun tranlit(current: Char, previous: Char?): String {
val isStartOfWord = previous == null || previous == ' '
return when (current) {
' ' -> " "
'ь', 'Ь', '\'' -> ""
'а' -> "a"
'А' -> "A"
'б' -> "b"
'Б' -> "B"
'в' -> "v"
'В' -> "V"
'г' -> {
if (previous == 'з' || previous == 'З') {
"gh"
} else {
"h"
}
}
'Г' -> {
if (previous == 'з' || previous == 'З') {
"Gh"
} else {
"H"
}
}
'ґ' -> "g"
'Ґ' -> "G"
'д' -> "d"
'Д' -> "D"
'е' -> "e"
'Е' -> "E"
'є' -> {
if (isStartOfWord) {
"ye"
} else {
"ie"
}
}
'Є' -> {
if (isStartOfWord) {
"Ye"
} else {
"Ie"
}
}
'ж' -> "zh"
'Ж' -> "Zh"
'з' -> "z"
'З' -> "Z"
'и' -> "y"
'И' -> "Y"
'і' -> "i"
'І' -> "I"
'ї' -> "i"
'Ї' -> "Yi"
'й' -> {
if (isStartOfWord) {
"y"
} else {
"i"
}
}
'Й' -> {
if (isStartOfWord) {
"Y"
} else {
"I"
}
}
'к' -> "k"
'К' -> "K"
'л' -> "l"
'Л' -> "L"
'м' -> "m"
'М' -> "M"
'н' -> "n"
'Н' -> "N"
'о' -> "o"
'О' -> "O"
'п' -> "p"
'П' -> "P"
'р' -> "r"
'Р' -> "R"
'с' -> "s"
'С' -> "S"
'т' -> "t"
'Т' -> "T"
'у' -> "u"
'У' -> "U"
'ф' -> "f"
'Ф' -> "F"
'х' -> "kh"
'Х' -> "Kh"
'ц' -> "ts"
'Ц' -> "Ts"
'ч' -> "ch"
'Ч' -> "Ch"
'ш' -> "sh"
'Ш' -> "Sh"
'щ' -> "shch"
'Щ' -> "Shch"
'ю' -> {
if (isStartOfWord) {
"yu"
} else {
"iu"
}
}
'Ю' -> {
if (isStartOfWord) {
"Yu"
} else {
"Iu"
}
}
'я' -> {
if (isStartOfWord) {
"ya"
} else {
"ia"
}
}
'Я' -> {
if (isStartOfWord) {
"Ya"
} else {
"Ia"
}
}
else -> {
current.toString()
}
}
}
import io.kotest.core.spec.style.FunSpec
import io.kotest.matchers.shouldBe
class TransliterationKtTest : FunSpec({
listOf(
"Борщагівка" to "Borshchahivka",
"Борисенко" to "Borysenko",
"Вінниця" to "Vinnytsia",
"Володимир" to "Volodymyr",
"Гадяч" to "Hadiach",
"Богдан" to "Bohdan",
"Згурський" to "Zghurskyi",
"Ґалаґан" to "Galagan",
"Ґорґани" to "Gorgany",
"Донецьк" to "Donetsk",
"Дмитро" to "Dmytro",
"Рівне" to "Rivne",
"Олег" to "Oleh",
"Есмань" to "Esman",
"Єнакієве" to "Yenakiieve",
"Гаєвич" to "Haievych",
"Короп'є" to "Koropie",
"Житомир" to "Zhytomyr",
"Жанна" to "Zhanna",
"Жежелів" to "Zhezheliv",
"Закарпаття" to "Zakarpattia",
"Казимирчук" to "Kazymyrchuk",
"Медвин" to "Medvyn",
"Михайленко" to "Mykhailenko",
"Іванків" to "Ivankiv",
"Іващенко" to "Ivashchenko",
"Їжакевич" to "Yizhakevych",
"Кадиївка" to "Kadyivka",
"Мар'їне" to "Marine",
"Йосипівка" to "Yosypivka",
"Стрий" to "Stryi",
"Олексій" to "Oleksii",
"Київ" to "Kyiv",
"Коваленко" to "Kovalenko",
"Лебедин" to "Lebedyn",
"Леонід" to "Leonid",
"Миколаїв" to "Mykolaiv",
"Маринич" to "Marynych",
"Ніжин" to "Nizhyn",
"Наталія" to "Nataliia",
"Одеса" to "Odesa",
"Онищенко" to "Onyshchenko",
"Полтава" to "Poltava",
"Петро" to "Petro",
"Решетилівка" to "Reshetylivka",
"Рибчинський" to "Rybchynskyi",
"Суми" to "Sumy",
"Соломія" to "Solomiia",
"Тернопіль" to "Ternopil",
"Троць" to "Trots",
"Ужгород" to "Uzhhorod",
"Уляна" to "Uliana",
"Фастів" to "Fastiv",
"Філіпчук" to "Filipchuk",
"Харків" to "Kharkiv",
"Христина" to "Khrystyna",
"Біла Церква" to "Bila Tserkva",
"Стеценко" to "Stetsenko",
"Чернівці" to "Chernivtsi",
"Шевченко" to "Shevchenko",
"Шостка" to "Shostka",
"Кишеньки" to "Kyshenky",
"Щербухи" to "Shcherbukhy",
"Гоща" to "Hoshcha",
"Гаращенко" to "Harashchenko",
"Юрій" to "Yurii",
"Корюківка" to "Koriukivka",
"Яготин" to "Yahotyn",
"Ярошенко" to "Yaroshenko",
"Костянтин" to "Kostiantyn",
"Знам'янка" to "Znamianka",
"Феодосія" to "Feodosiia",
"Гусятин" to "Husiatyn",
"пеніцелін" to "penitselin",
"Згорани" to "Zghorany",
"Розгон" to "Rozghon",
"Скритна леді" to "Skrytna ledi",
"Яготин Ярошенко" to "Yahotyn Yaroshenko",
).forEach { (ukraine, latin) ->
test("Transliteration of $ukraine is $latin") {
translit(ukraine) shouldBe latin
}
}
listOf(
"Шен Пуер" to "shen-puer",
"Вид пуеру" to "vyd-pueru",
"Смола Пуеру (Ча Гао)" to "smola-pueru-cha-hao",
"Мініточа (таблетки)" to "minitocha-tabletky",
"Пробники" to "probnyky",
"Підбірки" to "pidbirky",
).forEach { (ukraine, slug) ->
test("Slug of $ukraine is $slug") {
slug(ukraine) shouldBe slug
}
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment