Last active
October 25, 2023 08:19
-
-
Save daformat/950411857f01a9b39873ddd1b44d5813 to your computer and use it in GitHub Desktop.
A list of the different UTF spaces
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
utfSpaces = [ | |
{ | |
name: 'Space', | |
utf: '\u0020', | |
html: [' ', ' '], | |
breaking: true, | |
width: 'Typically 1/4 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Thin space', | |
utf: '\u2009', | |
html: [' ', ' ', ' '], | |
breaking: true, | |
width: '1/5 em, can be 1/6 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Hair space', | |
utf: '\u200A', | |
html: [' ', ' '], | |
breaking: true, | |
width: 'Narrower than a thin space (less than 1/5 em or 1/6em)', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Zero width space', | |
utf: '\u200B', | |
html: ['​', '​'], | |
breaking: true, | |
width: 'None (invisible character)', | |
unicode_category: 'Other, Format', | |
matched_by_s_character_class: false | |
}, | |
{ | |
name: 'Medium mathematical space', | |
utf: '\u205F', | |
html: [' ', ' '], | |
breaking: true, | |
width: '4/18 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Ogham space mark', | |
utf: '\u1680', | |
html: [' ', ' '], | |
breaking: true, | |
width: 'Usually represented by a 1em dash', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Mongolian vowel separator', | |
utf: '\u180E', | |
html: ['᠎', '᠎'], | |
breaking: false, | |
width: 'None (invisible character)', | |
unicode_category: 'Other, Format', | |
matched_by_s_character_class: false | |
}, | |
{ | |
name: 'EN quad', | |
utf: '\u2000', | |
html: [' ', ' '], | |
breaking: true, | |
width: '1 en (1/2 em)', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'EM quad', | |
utf: '\u2001', | |
html: [' ', ' '], | |
breaking: true, | |
width: '1 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'EN space', | |
utf: '\u2002', | |
html: [' ', ' ', ' '], | |
breaking: true, | |
width: '1 en (1/2 em)', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'EM space', | |
utf: '\u2003', | |
html: [' ', ' ', ' '], | |
breaking: false, | |
width: '1 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Three-per-em space', | |
utf: '\u2004', | |
html: [' ', ' '], | |
breaking: false, | |
width: '1/3 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Four-per-em space', | |
utf: '\u2005', | |
html: [' ', ' '], | |
breaking: false, | |
width: '1/4 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Six-per-em space', | |
utf: '\u2006', | |
html: [' ', ' '], | |
breaking: false, | |
width: '1/6 em', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Non breaking space', | |
utf: '\u00A0', | |
html: [' ', ' ', ' '], | |
breaking: false, | |
width: 'Typically 1/4 em, same as a regular space but usually not adjusted with justification', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Narrow no-break space', | |
utf: '\u202F', | |
html: [' ', ' '], | |
breaking: false, | |
width: 'Narrower than a non-breaking or breaking space', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Figure space', | |
utf: '\u2007', | |
html: [' ', ' '], | |
breaking: false, | |
width: 'The width of digits (tabular space)', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Punctuation space', | |
utf: '\u2008', | |
html: [' ', ' '], | |
breaking: true, | |
width: 'Width of a period (.)', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Word joiner', | |
utf: '\u2060', | |
html: ['⁠', '&8288;'], | |
breaking: false, | |
width: 'None (invisible character)', | |
unicode_category: 'Other, Format', | |
matched_by_s_character_class: false | |
}, | |
{ | |
name: 'Ideographic space', | |
utf: '\u3000', | |
html: [' ', ' '], | |
breaking: false, | |
width: 'The width of ideographic (CJK) characters', | |
unicode_category: 'Separator, Space', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Zero width no-break space (BOM often intepreted as)', | |
utf: '\uFEFF', | |
html: ['', ''], | |
breaking: false, | |
width: 'None (invisible character)', | |
unicode_category: 'Other, Format', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Character tabulation', | |
utf: '\u0009', | |
html: ['	', '	', '	'], | |
width: 'Up to the next tab stop', | |
unicode_category: 'Other, Control', | |
matched_by_s_character_class: true | |
}, | |
{ | |
name: 'Line tabulation', | |
utf: '\u000B', | |
html: ['', ''], | |
width: 'doesn’t apply (vertical)', | |
unicode_category: 'Other, Control', | |
matched_by_s_character_class: true | |
} | |
]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's a way to know the type of spaces used in a string: