Last active
February 27, 2020 10:44
-
-
Save yupmin/53f85457e07ef3d27618b3c585819072 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @param string $str | |
* @param bool $truncated | |
* @return bool | |
*/ | |
function is_utf8(string $str, bool $truncated = false): bool | |
{ | |
$length = strlen($str); | |
if ($length == 0) { | |
return true; | |
} | |
for ($i = 0; $i < $length; $i++) { | |
$high = ord($str{$i}); | |
if ($high < 0x80) { | |
continue; | |
} elseif ($high <= 0xC1) { | |
return false; | |
} elseif ($high < 0xE0) { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
continue; | |
} | |
} elseif ($high < 0xF0) { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
continue; | |
} | |
} | |
} elseif ($high < 0xF5) { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
if (++$i >= $length) { | |
return $truncated; | |
} elseif (($str{$i} & "\xC0") == "\x80") { | |
continue; | |
} | |
} | |
} | |
} // F5~FF is invalid by RFC 3629 | |
return false; | |
} | |
return true; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment