Skip to content

Instantly share code, notes, and snippets.

@Gh61
Last active August 18, 2025 14:39
Show Gist options
  • Save Gh61/04c02930ad5974f89786a32d8e7f2a34 to your computer and use it in GitHub Desktop.
Save Gh61/04c02930ad5974f89786a32d8e7f2a34 to your computer and use it in GitHub Desktop.
C# class for transcribing text into GSM 7-bit supported characters. (If used as an SMS text, provides the possibility to use all 160 characters).
/// <summary>
/// Provides methods for working with text in the GSM 7-bit format, including character validation and text transcription.
/// </summary>
public static class Gsm7Text
{
#region Definitions
/// <summary>
/// Gsm7 char definitions
/// </summary>
private static readonly List<Gsm7Char> chars = new List<Gsm7Char>()
{
// Source: https://www.codesegment.com/GSM-alphabet.htm
new Gsm7Char("00", '@', "COMMERCIAL AT", '@', '@'),
new Gsm7Char("01", '£', "POUND SIGN", '£', '£'),
new Gsm7Char("02", '$', "DOLLAR SIGN", '$', '$'),
new Gsm7Char("03", '¥', "YEN SIGN", '¥', '¥'),
new Gsm7Char("04", 'è', "LATIN SMALL LETTER E WITH GRAVE", 'è'),
new Gsm7Char("05", 'é', "LATIN SMALL LETTER E WITH ACUTE", 'é'),
new Gsm7Char("06", 'ù', "LATIN SMALL LETTER U WITH GRAVE", 'ù'),
new Gsm7Char("07", 'ì', "LATIN SMALL LETTER I WITH GRAVE", 'ì'),
new Gsm7Char("08", 'ò', "LATIN SMALL LETTER O WITH GRAVE", 'ò'),
new Gsm7Char("09", 'ç', "LATIN SMALL LETTER C WITH CEDILLA", 'ç', 'Ç'),
new Gsm7Char("0A", '\n', "LINE FEED", '\n'),
new Gsm7Char("0B", 'Ø', "LATIN CAPITAL LETTER O WITH STROKE", 'Ø', 'Ǿ'),
new Gsm7Char("0C", 'ø', "LATIN SMALL LETTER O WITH STROKE", 'ø', 'ǿ'),
new Gsm7Char("0D", '\r', "CARRIAGE RETURN", '\r'),
new Gsm7Char("0E", 'Å', "LATIN CAPITAL LETTER A WITH RING ABOVE", 'Å', 'Ǻ'),
new Gsm7Char("0F", 'å', "LATIN SMALL LETTER A WITH RING ABOVE", 'å', 'ǻ'),
new Gsm7Char("10", 'Δ', "GREEK CAPITAL LETTER DELTA", 'Δ', 'δ'),
new Gsm7Char("11", '_', "LOW LINE", '_', '_', '¯'),
new Gsm7Char("12", 'Φ', "GREEK CAPITAL LETTER PHI", 'Φ', 'ɸ', 'φ', 'ϕ', 'ᵩ'),
new Gsm7Char("13", 'Γ', "GREEK CAPITAL LETTER GAMMA", 'Γ', 'Ɣ', 'ɣ', 'γ', 'ᴦ', 'ᵧ'),
new Gsm7Char("14", 'Λ', "GREEK CAPITAL LETTER LAMBDA", 'Λ', 'ƛ', 'λ', 'ᴧ'),
new Gsm7Char("15", 'Ω', "GREEK CAPITAL LETTER OMEGA", 'Ω', 'ɷ', 'Ώ', 'ω', 'ώ', 'ὠ', 'ὡ', 'ὢ', 'ὣ', 'ὤ', 'ὥ', 'ὦ', 'ὧ', 'Ὠ', 'Ὡ', 'Ὢ', 'Ὣ', 'Ὤ', 'Ὥ', 'Ὦ', 'Ὧ', 'ὼ', 'ώ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'Ὼ', 'Ώ', 'ῼ'),
new Gsm7Char("16", 'Π', "GREEK CAPITAL LETTER PI", 'Π', 'π', 'ϖ', 'ᴨ'),
new Gsm7Char("17", 'Ψ', "GREEK CAPITAL LETTER PSI", 'Ψ', 'ψ', 'ᴪ'),
new Gsm7Char("18", 'Σ', "GREEK CAPITAL LETTER SIGMA", 'Σ', 'ς', 'σ'),
new Gsm7Char("19", 'Θ', "GREEK CAPITAL LETTER THETA", 'Θ', 'θ', 'ϑ', 'ϴ'),
new Gsm7Char("1A", 'Ξ', "GREEK CAPITAL LETTER XI", 'Ξ', 'ξ'),
//new Gsm7Char("1B", 'ESC', "ESCAPE TO EXTENSION TABLE", 'ESC'),
new Gsm7Char("1C", 'Æ', "LATIN CAPITAL LETTER AE", 'Æ', 'Ǣ', 'Ǽ', 'ᴁ'),
new Gsm7Char("1D", 'æ', "LATIN SMALL LETTER AE", 'æ', 'ǣ', 'ǽ'),
new Gsm7Char("1E", 'ß', "LATIN SMALL LETTER SHARP S (German)", 'ß', 'ẞ'),
new Gsm7Char("1F", 'É', "LATIN CAPITAL LETTER E WITH ACUTE", 'É'),
new Gsm7Char("20", ' ', "SPACE", ' ', '\t'),
new Gsm7Char("21", '!', "EXCLAMATION MARK", '!', '!'),
new Gsm7Char("22", '"', "QUOTATION MARK", '"', '"', '«', '»', '„', '“'),
new Gsm7Char("23", '#', "NUMBER SIGN", '#', '#'),
new Gsm7Char("24", '¤', "CURRENCY SIGN", '¤'),
new Gsm7Char("25", '%', "PERCENT SIGN", '%', '%'),
new Gsm7Char("26", '&', "AMPERSAND", '&', '&'),
new Gsm7Char("27", '\'', "APOSTROPHE", '\'', ''', '`', '´', '`'),
new Gsm7Char("28", '(', "LEFT PARENTHESIS", '(', '(', '⦅'),
new Gsm7Char("29", ')', "RIGHT PARENTHESIS", ')', ')', '⦆'),
new Gsm7Char("2A", '*', "ASTERISK", '*', '*'),
new Gsm7Char("2B", '+', "PLUS SIGN", '+', '+'),
new Gsm7Char("2C", ',', "COMMA", ',', ',', '¸', '、'),
new Gsm7Char("2D", '-', "HYPHEN-MINUS", '-', '-', '—'),
new Gsm7Char("2E", '.', "FULL STOP", '.', '.', '·', '。'),
new Gsm7Char("2F", '/', "SOLIDUS", '/', '/'),
new Gsm7Char("30", '0', "DIGIT ZERO", '0', '0'),
new Gsm7Char("31", '1', "DIGIT ONE", '1', '1', '¹'),
new Gsm7Char("32", '2', "DIGIT TWO", '2', '2', '²'),
new Gsm7Char("33", '3', "DIGIT THREE", '3', '3', '³'),
new Gsm7Char("34", '4', "DIGIT FOUR", '4', '4'),
new Gsm7Char("35", '5', "DIGIT FIVE", '5', '5'),
new Gsm7Char("36", '6', "DIGIT SIX", '6', '6'),
new Gsm7Char("37", '7', "DIGIT SEVEN", '7', '7'),
new Gsm7Char("38", '8', "DIGIT EIGHT", '8', '8'),
new Gsm7Char("39", '9', "DIGIT NINE", '9', '9'),
new Gsm7Char("3A", ':', "COLON", ':', ':'),
new Gsm7Char("3B", ';', "SEMICOLON", ';', ';'),
new Gsm7Char("3C", '<', "LESS-THAN SIGN", '<', '<'),
new Gsm7Char("3D", '=', "EQUALS SIGN", '=', '='),
new Gsm7Char("3E", '>', "GREATER-THAN SIGN", '>', '>'),
new Gsm7Char("3F", '?', "QUESTION MARK", '?', '?'),
new Gsm7Char("40", '¡', "INVERTED EXCLAMATION MARK", '¡'),
new Gsm7Char("41", 'A', "LATIN CAPITAL LETTER A (GREEK CAPITAL LETTER ALPHA)", 'A', 'Α', 'A', 'À', 'Á', 'Â', 'Ã', 'Ā', 'Ă', 'Ą', 'Ǎ', 'Ǟ', 'Ǡ', 'Ȁ', 'Ȃ', 'Ȧ', 'Ⱥ', 'ɑ', 'Ά', 'ά', 'α', 'ᴀ', 'Ḁ', 'Ạ', 'Ả', 'Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Ắ', 'Ằ', 'Ẳ', 'Ẵ', 'Ặ', 'ἀ', 'ἁ', 'ἂ', 'ἃ', 'ἄ', 'ἅ', 'ἆ', 'ἇ', 'Ἀ', 'Ἁ', 'Ἂ', 'Ἃ', 'Ἄ', 'Ἅ', 'Ἆ', 'Ἇ', 'ὰ', 'ά', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾰ', 'ᾱ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'Ᾰ', 'Ᾱ', 'Ὰ', 'Ά', 'ᾼ'),
new Gsm7Char("42", 'B', "LATIN CAPITAL LETTER B (GREEK CAPITAL LETTER BETA)", 'B', 'Β', 'B', 'Ɓ', 'Ƃ', 'Ƀ', 'ʙ', 'β', 'ϐ', 'ᴃ', 'ᵦ', 'Ḃ', 'Ḅ', 'Ḇ'),
new Gsm7Char("43", 'C', "LATIN CAPITAL LETTER C", 'C', 'C', '©', 'Ć', 'Ĉ', 'Ċ', 'Č', 'Ƈ', 'Ȼ', 'ʗ', 'ᴄ', 'Ḉ'),
new Gsm7Char("44", 'D', "LATIN CAPITAL LETTER D", 'D', 'D', 'Ð', 'Ď', 'Đ', 'Ɖ', 'Ɗ', 'Ƌ', 'ᴅ', 'ᴆ', 'Ḋ', 'Ḍ', 'Ḏ', 'Ḑ', 'Ḓ'),
new Gsm7Char("45", 'E', "LATIN CAPITAL LETTER E (GREEK CAPITAL LETTER EPSILON)", 'E', 'Ε', 'E', 'È', 'Ê', 'Ë', 'Ē', 'Ĕ', 'Ė', 'Ę', 'Ě', 'Ȅ', 'Ȇ', 'Ȩ', 'Ɇ', 'Έ', 'έ', 'ε', 'ᴇ', 'Ḕ', 'Ḗ', 'Ḙ', 'Ḛ', 'Ḝ', 'Ẹ', 'Ẻ', 'Ẽ', 'Ế', 'Ề', 'Ể', 'Ễ', 'Ệ', 'ἐ', 'ἑ', 'ἒ', 'ἓ', 'ἔ', 'ἕ', 'Ἐ', 'Ἑ', 'Ἒ', 'Ἓ', 'Ἔ', 'Ἕ', 'ὲ', 'έ', 'Ὲ', 'Έ'),
new Gsm7Char("46", 'F', "LATIN CAPITAL LETTER F", 'F', 'F', 'Ƒ', 'Ḟ'),
new Gsm7Char("47", 'G', "LATIN CAPITAL LETTER G", 'G', 'G', 'Ĝ', 'Ğ', 'Ġ', 'Ģ', 'Ɠ', 'Ǥ', 'Ǧ', 'Ǵ', 'ɢ', 'ʛ', 'Ḡ'),
new Gsm7Char("48", 'H', "LATIN CAPITAL LETTER H (GREEK CAPITAL LETTER ETA)", 'H', 'Η', 'H', 'Ĥ', 'Ħ', 'Ȟ', 'ʜ', 'Ή', 'ή', 'η', 'Ḣ', 'Ḥ', 'Ḧ', 'Ḩ', 'Ḫ', 'ἠ', 'ἡ', 'ἢ', 'ἣ', 'ἤ', 'ἥ', 'ἦ', 'ἧ', 'Ἠ', 'Ἡ', 'Ἢ', 'Ἣ', 'Ἤ', 'Ἥ', 'Ἦ', 'Ἧ', 'ὴ', 'ή', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'Ὴ', 'Ή', 'ῌ'),
new Gsm7Char("49", 'I', "LATIN CAPITAL LETTER I (GREEK CAPITAL LETTER IOTA)", 'I', 'Ι', 'I', 'Ì', 'Í', 'Î', 'Ï', 'Ĩ', 'Ī', 'Ĭ', 'Į', 'İ', 'Ɩ', 'Ɨ', 'Ǐ', 'Ȉ', 'Ȋ', 'ɩ', 'ɪ', 'Ί', 'ΐ', 'Ϊ', 'ί', 'ι', 'ϊ', 'ᵻ', 'Ḭ', 'Ḯ', 'Ỉ', 'Ị', 'ἰ', 'ἱ', 'ἲ', 'ἳ', 'ἴ', 'ἵ', 'ἶ', 'ἷ', 'Ἰ', 'Ἱ', 'Ἲ', 'Ἳ', 'Ἴ', 'Ἵ', 'Ἶ', 'Ἷ', 'ὶ', 'ί', 'ῐ', 'ῑ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'Ῐ', 'Ῑ', 'Ὶ', 'Ί'),
new Gsm7Char("4A", 'J', "LATIN CAPITAL LETTER J", 'J', 'J', 'Ĵ', 'Ɉ', 'ᴊ'),
new Gsm7Char("4B", 'K', "LATIN CAPITAL LETTER K (GREEK CAPITAL LETTER KAPPA)", 'K', 'Κ', 'K', 'Ķ', 'Ƙ', 'Ǩ', 'κ', 'ϰ', 'ᴋ', 'Ḱ', 'Ḳ', 'Ḵ'),
new Gsm7Char("4C", 'L', "LATIN CAPITAL LETTER L", 'L', 'L', 'Ĺ', 'Ļ', 'Ľ', 'Ŀ', 'Ł', 'Ƚ', 'ʟ', 'ᴌ', 'Ḷ', 'Ḹ', 'Ḻ', 'Ḽ'),
new Gsm7Char("4D", 'M', "LATIN CAPITAL LETTER M (GREEK CAPITAL LETTER MU)", 'M', 'Μ', 'M', 'μ', 'ᴍ', 'Ḿ', 'Ṁ', 'Ṃ'),
new Gsm7Char("4E", 'N', "LATIN CAPITAL LETTER N (GREEK CAPITAL LETTER NU)", 'N', 'Ν', 'N', 'Ń', 'Ņ', 'Ň', 'Ɲ', 'Ǹ', 'ɴ', 'ν', 'Ṅ', 'Ṇ', 'Ṉ', 'Ṋ'),
new Gsm7Char("4F", 'O', "LATIN CAPITAL LETTER O (GREEK CAPITAL LETTER OMICRON)", 'O', 'Ο', 'O', 'Ò', 'Ó', 'Ô', 'Õ', 'Ō', 'Ŏ', 'Ő', 'Ɵ', 'Ơ', 'Ǒ', 'Ǫ', 'Ǭ', 'Ȍ', 'Ȏ', 'Ȫ', 'Ȭ', 'Ȯ', 'Ȱ', 'Ό', 'ο', 'ό', 'ᴏ', 'Ṍ', 'Ṏ', 'Ṑ', 'Ṓ', 'Ọ', 'Ỏ', 'Ố', 'Ồ', 'Ổ', 'Ỗ', 'Ộ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'ὀ', 'ὁ', 'ὂ', 'ὃ', 'ὄ', 'ὅ', 'Ὀ', 'Ὁ', 'Ὂ', 'Ὃ', 'Ὄ', 'Ὅ', 'ὸ', 'ό', 'Ὸ', 'Ό'),
new Gsm7Char("50", 'P', "LATIN CAPITAL LETTER P (GREEK CAPITAL LETTER RHO)", 'P', 'Ρ', 'P', 'Ƥ', 'ρ', 'ϱ', 'ϼ', 'ᴘ', 'ᴩ', 'ᵨ', 'Ṕ', 'Ṗ', 'ῤ', 'ῥ', 'Ῥ'),
new Gsm7Char("51", 'Q', "LATIN CAPITAL LETTER Q", 'Q', 'Q', 'Ɋ'),
new Gsm7Char("52", 'R', "LATIN CAPITAL LETTER R", 'R', 'R', '®', 'Ŕ', 'Ŗ', 'Ř', 'Ȑ', 'Ȓ', 'Ɍ', 'ʀ', 'Ṙ', 'Ṛ', 'Ṝ', 'Ṟ'),
new Gsm7Char("53", 'S', "LATIN CAPITAL LETTER S", 'S', 'S', 'Ś', 'Ŝ', 'Ş', 'Š', 'Ș', 'Ṡ', 'Ṣ', 'Ṥ', 'Ṧ', 'Ṩ'),
new Gsm7Char("54", 'T', "LATIN CAPITAL LETTER T (GREEK CAPITAL LETTER TAU)", 'T', 'Τ', 'T', 'Ţ', 'Ť', 'Ŧ', 'Ƭ', 'Ʈ', 'Ț', 'Ⱦ', 'τ', 'ᴛ', 'Ṫ', 'Ṭ', 'Ṯ', 'Ṱ'),
new Gsm7Char("55", 'U', "LATIN CAPITAL LETTER U", 'U', 'U', 'Ù', 'Ú', 'Û', 'Ũ', 'Ū', 'Ŭ', 'Ů', 'Ű', 'Ų', 'Ư', 'Ǔ', 'Ǖ', 'Ǘ', 'Ǚ', 'Ǜ', 'Ȕ', 'Ȗ', 'Ʉ', 'ᴜ', 'ᵾ', 'Ṳ', 'Ṵ', 'Ṷ', 'Ṹ', 'Ṻ', 'Ụ', 'Ủ', 'Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự'),
new Gsm7Char("56", 'V', "LATIN CAPITAL LETTER V", 'V', 'V', 'Ʋ', 'ᴠ', 'Ṽ', 'Ṿ'),
new Gsm7Char("57", 'W', "LATIN CAPITAL LETTER W", 'W', 'W', 'Ŵ', 'ᴡ', 'Ẁ', 'Ẃ', 'Ẅ', 'Ẇ', 'Ẉ'),
new Gsm7Char("58", 'X', "LATIN CAPITAL LETTER X (GREEK CAPITAL LETTER CHI)", 'X', 'Χ', 'X', 'χ', 'ᵪ', 'Ẋ', 'Ẍ'),
new Gsm7Char("59", 'Y', "LATIN CAPITAL LETTER Y (GREEK CAPITAL LETTER UPSILON)", 'Y', 'Υ', 'Y', 'Ý', 'Ŷ', 'Ÿ', 'Ʊ', 'Ƴ', 'Ȳ', 'Ɏ', 'ʊ', 'ʏ', 'Ύ', 'Ϋ', 'ΰ', 'υ', 'ϋ', 'ύ', 'ϒ', 'ϓ', 'ϔ', 'Ẏ', 'Ỳ', 'Ỵ', 'Ỷ', 'Ỹ', 'Ỿ', 'ὐ', 'ὑ', 'ὒ', 'ὓ', 'ὔ', 'ὕ', 'ὖ', 'ὗ', 'Ὑ', 'Ὓ', 'Ὕ', 'Ὗ', 'ὺ', 'ύ', 'ῠ', 'ῡ', 'ῢ', 'ΰ', 'ῦ', 'ῧ', 'Ῠ', 'Ῡ', 'Ὺ', 'Ύ'),
new Gsm7Char("5A", 'Z', "LATIN CAPITAL LETTER Z (GREEK CAPITAL LETTER ZETA)", 'Z', 'Ζ', 'Z', 'Ź', 'Ż', 'Ž', 'Ƶ', 'Ȥ', 'ζ', 'ᴢ', 'Ẑ', 'Ẓ', 'Ẕ'),
new Gsm7Char("5B", 'Ä', "LATIN CAPITAL LETTER A WITH DIAERESIS", 'Ä'),
new Gsm7Char("5C", 'Ö', "LATIN CAPITAL LETTER O WITH DIAERESIS", 'Ö'),
new Gsm7Char("5D", 'Ñ', "LATIN CAPITAL LETTER N WITH TILDE", 'Ñ'),
new Gsm7Char("5E", 'Ü', "LATIN CAPITAL LETTER U WITH DIAERESIS", 'Ü'),
new Gsm7Char("5F", '§', "SECTION SIGN", '§'),
new Gsm7Char("60", '¿', "INVERTED QUESTION MARK", '¿'),
new Gsm7Char("61", 'a', "LATIN SMALL LETTER A", 'a', 'a', 'á', 'â', 'ã', 'ā', 'ă', 'ą', 'ǎ', 'ǟ', 'ǡ', 'ȁ', 'ȃ', 'ȧ', 'ᶏ', 'ḁ', 'ẚ', 'ạ', 'ả', 'ấ', 'ầ', 'ẩ', 'ẫ', 'ậ', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ'),
new Gsm7Char("62", 'b', "LATIN SMALL LETTER B", 'b', 'b', 'ƀ', 'ƃ', 'ɓ', 'ᵬ', 'ᶀ', 'ḃ', 'ḅ', 'ḇ'),
new Gsm7Char("63", 'c', "LATIN SMALL LETTER C", 'c', 'c', '¢', 'ć', 'ĉ', 'ċ', 'č', 'ƈ', 'ȼ', 'ɕ', 'ḉ', '¢'),
new Gsm7Char("64", 'd', "LATIN SMALL LETTER D", 'd', 'd', 'ð', 'ď', 'đ', 'ƌ', 'ȡ', 'ɖ', 'ɗ', 'ᵭ', 'ᶁ', 'ᶑ', 'ḋ', 'ḍ', 'ḏ', 'ḑ', 'ḓ'),
new Gsm7Char("65", 'e', "LATIN SMALL LETTER E", 'e', 'e', 'ê', 'ë', 'ē', 'ĕ', 'ė', 'ę', 'ě', 'ȅ', 'ȇ', 'ȩ', 'ɇ', 'ᶒ', 'ḕ', 'ḗ', 'ḙ', 'ḛ', 'ḝ', 'ẹ', 'ẻ', 'ẽ', 'ế', 'ề', 'ể', 'ễ', 'ệ'),
new Gsm7Char("66", 'f', "LATIN SMALL LETTER F", 'f', 'f', 'ƒ', 'ᵮ', 'ᶂ', 'ḟ'),
new Gsm7Char("67", 'g', "LATIN SMALL LETTER G", 'g', 'g', 'ĝ', 'ğ', 'ġ', 'ģ', 'ǥ', 'ǧ', 'ǵ', 'ɠ', 'ɡ', 'ᶃ', 'ḡ'),
new Gsm7Char("68", 'h', "LATIN SMALL LETTER H", 'h', 'h', 'ĥ', 'ħ', 'ȟ', 'ɦ', 'ḣ', 'ḥ', 'ḧ', 'ḩ', 'ḫ', 'ẖ'),
new Gsm7Char("69", 'i', "LATIN SMALL LETTER I", 'i', 'i', 'í', 'î', 'ï', 'ĩ', 'ī', 'ĭ', 'į', 'ı', 'ǐ', 'ȉ', 'ȋ', 'ɨ', 'ᵢ', 'ᶖ', 'ḭ', 'ḯ', 'ỉ', 'ị'),
new Gsm7Char("6A", 'j', "LATIN SMALL LETTER J", 'j', 'j', 'ĵ', 'ǰ', 'ȷ', 'ɉ', 'ɟ', 'ʄ', 'ʝ'),
new Gsm7Char("6B", 'k', "LATIN SMALL LETTER K", 'k', 'k', 'ķ', 'ĸ', 'ƙ', 'ǩ', 'ᶄ', 'ḱ', 'ḳ', 'ḵ'),
new Gsm7Char("6C", 'l', "LATIN SMALL LETTER L", 'l', 'l', 'ĺ', 'ļ', 'ľ', 'ŀ', 'ł', 'ƚ', 'ȴ', 'ɫ', 'ɬ', 'ɭ', 'ᶅ', 'ḷ', 'ḹ', 'ḻ', 'ḽ'),
new Gsm7Char("6D", 'm', "LATIN SMALL LETTER M", 'm', 'm', 'ɱ', 'ᵯ', 'ᶆ', 'ḿ', 'ṁ', 'ṃ'),
new Gsm7Char("6E", 'n', "LATIN SMALL LETTER N", 'n', 'n', 'ń', 'ņ', 'ň', 'ʼn', 'ƞ', 'ǹ', 'Ƞ', 'ȵ', 'ɲ', 'ɳ', 'ᵰ', 'ᶇ', 'ṅ', 'ṇ', 'ṉ', 'ṋ'),
new Gsm7Char("6F", 'o', "LATIN SMALL LETTER O", 'o', 'o', 'ó', 'ô', 'õ', 'ō', 'ŏ', 'ő', 'ơ', 'ǒ', 'ǫ', 'ǭ', 'ȍ', 'ȏ', 'ȫ', 'ȭ', 'ȯ', 'ȱ', 'ɵ', 'ṍ', 'ṏ', 'ṑ', 'ṓ', 'ọ', 'ỏ', 'ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ'),
new Gsm7Char("70", 'p', "LATIN SMALL LETTER P", 'p', 'p', 'ƥ', 'ᵱ', 'ᵽ', 'ᶈ', 'ṕ', 'ṗ'),
new Gsm7Char("71", 'q', "LATIN SMALL LETTER Q", 'q', 'q', 'ɋ', 'ʠ'),
new Gsm7Char("72", 'r', "LATIN SMALL LETTER R", 'r', 'r', 'ŕ', 'ŗ', 'ř', 'ȑ', 'ȓ', 'ɍ', 'ɼ', 'ɽ', 'ɾ', 'ᵣ', 'ᵲ', 'ᵳ', 'ᶉ', 'ṙ', 'ṛ', 'ṝ', 'ṟ'),
new Gsm7Char("73", 's', "LATIN SMALL LETTER S", 's', 's', 'ś', 'ŝ', 'ş', 'š', 'ș', 'ȿ', 'ʂ', 'ᵴ', 'ᶊ', 'ṡ', 'ṣ', 'ṥ', 'ṧ', 'ṩ'),
new Gsm7Char("74", 't', "LATIN SMALL LETTER T", 't', 't', 'ţ', 'ť', 'ŧ', 'ƫ', 'ƭ', 'ț', 'ȶ', 'ʈ', 'ᵵ', 'ṫ', 'ṭ', 'ṯ', 'ṱ', 'ẗ'),
new Gsm7Char("75", 'u', "LATIN SMALL LETTER U", 'u', 'u', 'ú', 'û', 'ũ', 'ū', 'ŭ', 'ů', 'ű', 'ų', 'ư', 'ǔ', 'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ȕ', 'ȗ', 'ʉ', 'ᵤ', 'ᶙ', 'ṳ', 'ṵ', 'ṷ', 'ṹ', 'ṻ', 'ụ', 'ủ', 'ứ', 'ừ', 'ử', 'ữ', 'ự'),
new Gsm7Char("76", 'v', "LATIN SMALL LETTER V", 'v', 'v', 'ʋ', 'ᵥ', 'ᶌ', 'ṽ', 'ṿ'),
new Gsm7Char("77", 'w', "LATIN SMALL LETTER W", 'w', 'w', 'ŵ', 'ẁ', 'ẃ', 'ẅ', 'ẇ', 'ẉ', 'ẘ'),
new Gsm7Char("78", 'x', "LATIN SMALL LETTER X", 'x', 'x', 'ᶍ', 'ẋ', 'ẍ'),
new Gsm7Char("79", 'y', "LATIN SMALL LETTER Y", 'y', 'y', 'ý', 'ÿ', 'ŷ', 'ƴ', 'ȳ', 'ɏ', 'ẏ', 'ẙ', 'ỳ', 'ỵ', 'ỷ', 'ỹ', 'ỿ'),
new Gsm7Char("7A", 'z', "LATIN SMALL LETTER Z", 'z', 'z', 'ź', 'ż', 'ž', 'ƶ', 'ȥ', 'ɀ', 'ʐ', 'ʑ', 'ᵶ', 'ᶎ', 'ẑ', 'ẓ', 'ẕ'),
new Gsm7Char("7B", 'ä', "LATIN SMALL LETTER A WITH DIAERESIS", 'ä'),
new Gsm7Char("7C", 'ö', "LATIN SMALL LETTER O WITH DIAERESIS", 'ö'),
new Gsm7Char("7D", 'ñ', "LATIN SMALL LETTER N WITH TILDE", 'ñ'),
new Gsm7Char("7E", 'ü', "LATIN SMALL LETTER U WITH DIAERESIS", 'ü'),
new Gsm7Char("7F", 'à', "LATIN SMALL LETTER A WITH GRAVE", 'à'),
// ESCAPED chars:
new Gsm7Char("1B 14", '^', "CIRCUMFLEX ACCENT", '^', '^'),
new Gsm7Char("1B 28", '{', "LEFT CURLY BRACKET", '{', '{'),
new Gsm7Char("1B 29", '}', "RIGHT CURLY BRACKET", '}', '}'),
new Gsm7Char("1B 2F", '\\', "REVERSE SOLIDUS", '\\', '\'),
new Gsm7Char("1B 3C", '[', "LEFT SQUARE BRACKET", '[', '['),
new Gsm7Char("1B 3D", '~', "TILDE", '~', '~'),
new Gsm7Char("1B 3E", ']', "RIGHT SQUARE BRACKET", ']', ']'),
new Gsm7Char("1B 40", '|', "VERTICAL LINE", '|', '|', '¦', '¦', '│'),
new Gsm7Char("1B 65", '€', "EURO SIGN", '€')
};
private static readonly Dictionary<char, Gsm7Char> transcribeLookupTable =
chars.SelectMany(c => c.Alternatives.Select(a => new { a, c })).ToDictionary(x => x.a, x => x.c);
private static readonly HashSet<char> charSet = new HashSet<char>(chars.Select(c => c.Character));
private class Gsm7Char
{
public Gsm7Char(string code, char character, string description, params char[] alternatives)
{
this.CharacterCode = code;
this.Character = character;
this.Description = description;
this.Alternatives = alternatives;
}
/// <summary>
/// Character code in GSM7 encoding.
/// </summary>
public string CharacterCode
{
get;
}
/// <summary>
/// Character used in GSM 7-bit.
/// </summary>
public char Character
{
get;
}
/// <summary>
/// Description for better understanding of this Character.
/// </summary>
public string Description
{
get;
}
/// <summary>
/// Alternative characters, that are represented by <see cref="Character"/> in GSM 7-bit (including the <see cref="Character"/> itself).
/// </summary>
public char[] Alternatives
{
get;
}
}
#endregion
/// <summary>
/// This code determines whether the specified character complies with the GSM 7-bit standard
/// </summary>
/// <param name="c">Represents the character to be verified.</param>
/// <returns>Returns <c>true</c> if the character complies with the GSM 7-bit standard; otherwise, returns <c>false</c>.</returns>
public static bool IsGsm7Char(char c)
{
return charSet.Contains(c);
}
/// <summary>
/// This code transcribes the specified text into GSM 7-bit characters.
/// </summary>
/// <param name="text">The text to transcribe.</param>
/// <param name="notFoundCharacter">The character to use if a character is not found in the GSM 7-bit character set. Defaults to "?".</param>
/// <returns>The transcribed text.</returns>
public static string Transcribe(string text, string notFoundCharacter = "?")
{
if (text == null)
throw new ArgumentNullException(nameof(text));
var result = new StringBuilder();
foreach (var c in text)
{
if (transcribeLookupTable.TryGetValue(c, out var gsmChar))
{
result.Append(gsmChar.Character);
}
else if (char.IsWhiteSpace(c)) // all whitespaces defaults to space
{
gsmChar = transcribeLookupTable[' '];
result.Append(gsmChar.Character);
}
else
{
result.Append(notFoundCharacter);
}
}
return result.ToString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment