Created
April 8, 2014 06:00
-
-
Save noondreams/10095718 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Encoder | |
{ | |
const ENCODE_STYLE_HTML = 0 ; | |
const ENCODE_STYLE_JAVASCRIPT = 1 ; | |
const ENCODE_STYLE_CSS = 2 ; | |
const ENCODE_STYLE_URL = 3 ; | |
const ENCODE_STYLE_URL_SPECIAL = 4 ; | |
private static $URL_UNRESERVED_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcedfghijklmnopqrstuvwxyz-_.~' ; | |
public function encodeForHTML ( $value ) | |
{ | |
$value = str_replace ( '&' , '&' , $value ) ; | |
$value = str_replace ( '<' , '<' , $value ) ; | |
$value = str_replace ( '>' , '>' , $value ) ; | |
$value = str_replace ( '"' , '"' , $value ) ; | |
$value = str_replace ( '\'' , ''' , $value ) ; // ' is not recommended | |
$value = str_replace ( '/' , '/' , $value ) ; // forward slash can help end HTML entity | |
return $value ; | |
} | |
public function encodeForHTMLAttribute ( $value ) | |
{ | |
return $this -> _encodeString ( $value ) ; | |
} | |
public function encodeForJavascript ( $value ) | |
{ | |
return $this -> _encodeString ( $value , self::ENCODE_STYLE_JAVASCRIPT ) ; | |
} | |
public function encodeForURL ( $value ) | |
{ | |
return $this -> _encodeString ( $value , self::ENCODE_STYLE_URL_SPECIAL ) ; | |
} | |
public function encodeForCSS ( $value ) | |
{ | |
return $this -> _encodeString ( $value , self::ENCODE_STYLE_CSS ) ; | |
} | |
/** | |
* Encodes any special characters in the path portion of the URL. Does not | |
* modify the forward slash used to denote directories. If your directory | |
* names contain slashes (rare), use the plain urlencode on each directory | |
* component and then join them together with a forward slash. | |
* | |
* Based on http://en.wikipedia.org/wiki/Percent-encoding and | |
* http://tools.ietf.org/html/rfc3986 | |
*/ | |
public function encodeURLPath ( $value ) | |
{ | |
$length = mb_strlen ( $value ) ; | |
if ( $length == 0 ) | |
{ | |
return $value ; | |
} | |
$output = '' ; | |
for ( $i = 0 ; $i < $length ; $i ++ ) | |
{ | |
$char = mb_substr ( $value , $i , 1 ) ; | |
if ( $char == '/' ) | |
{ | |
// Slashes are allowed in paths. | |
$output .= $char ; | |
} | |
else if ( mb_strpos ( self::$URL_UNRESERVED_CHARS , $char ) == false ) | |
{ | |
// It's not in the unreserved list so it needs to be encoded. | |
$output .= $this -> _encodeCharacter ( $char , self::ENCODE_STYLE_URL ) ; | |
} | |
else | |
{ | |
// It's in the unreserved list so let it through. | |
$output .= $char ; | |
} | |
} | |
return $output ; | |
} | |
private function _encodeString ( $value , $style = self::ENCODE_STYLE_HTML ) | |
{ | |
if ( mb_strlen ( $value ) == 0 ) | |
{ | |
return $value ; | |
} | |
//returns an array of chars in the string, explode and str_split where not used so utf-8 is supported | |
// , preg_split splits the string by matching delimiters | |
// /u means unicode charcters | |
// (?<!subpattern) negative look behind | |
// (?!subpattern) negative look ahead | |
// ^ is the first character and $ is the last | |
$characters = preg_split ( '/(?<!^)(?!$)/u' , $value ) ; | |
$output = '' ; | |
foreach ( $characters as $c ) | |
{ | |
$output .= $this -> _encodeCharacter ( $c , $style ) ; | |
} | |
return $output ; | |
} | |
private function _encodeCharacter ( $c , $style = self::ENCODE_STYLE_HTML ) | |
{ | |
if ( ctype_alnum ( $c ) ) | |
{ | |
return $c ; | |
} | |
if ( ($style === self::ENCODE_STYLE_URL_SPECIAL) && ($c == '/' || $c == ':') ) | |
{ | |
return $c ; | |
} | |
$charCode = $this -> _unicodeOrdinal ( $c ) ; | |
$prefixes = array( | |
self::ENCODE_STYLE_HTML => array( '&#x' , '&#x' ) , | |
self::ENCODE_STYLE_JAVASCRIPT => array( '\\x' , '\\u' ) , | |
self::ENCODE_STYLE_CSS => array( '\\' , '\\' ) , | |
self::ENCODE_STYLE_URL => array( '%' , '%' ) , | |
self::ENCODE_STYLE_URL_SPECIAL => array( '%' , '%' ) , | |
) ; | |
$suffixes = array( | |
self::ENCODE_STYLE_HTML => ';' , | |
self::ENCODE_STYLE_JAVASCRIPT => '' , | |
self::ENCODE_STYLE_CSS => '' , | |
self::ENCODE_STYLE_URL => '' , | |
self::ENCODE_STYLE_URL_SPECIAL => '' , | |
) ; | |
// if ASCII, encode with \\xHH | |
if ( $charCode < 256 ) | |
{ | |
$prefix = $prefixes[ $style ][ 0 ] ; | |
$suffix = $suffixes[ $style ] ; | |
return $prefix . str_pad ( strtoupper ( dechex ( $charCode ) ) , 2 , '0' ) . $suffix ; | |
} | |
// otherwise encode with \\uHHHH | |
$prefix = $prefixes[ $style ][ 1 ] ; | |
$suffix = $suffixes[ $style ] ; | |
return $prefix . str_pad ( strtoupper ( dechex ( $charCode ) ) , 4 , '0' ) . $suffix ; | |
} | |
private function _unicodeOrdinal ( $u ) | |
{ | |
/* | |
* Convert character encoding | |
* Converts the character encoding of string str to to_encoding from optionally from_encoding. | |
* string mb_convert_encoding ( string $str | |
* , string $to_encoding | |
* [, mixed $from_encoding = mb_internal_encoding() ] ) | |
*/ | |
$c = mb_convert_encoding ( $u , 'UCS-2LE' , 'UTF-8' ) ; | |
$c1 = ord ( substr ( $c , 0 , 1 ) ) ; | |
$c2 = ord ( substr ( $c , 1 , 1 ) ) ; | |
return $c2 * 256 + $c1 ; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment