Skip to content

Instantly share code, notes, and snippets.

@adrianbj
Forked from Kudratullah/unicode2html.php
Last active August 29, 2015 14:13

Revisions

  1. @Kudratullah Kudratullah renamed this gist Aug 6, 2014. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. @Kudratullah Kudratullah created this gist Aug 6, 2014.
    46 changes: 46 additions & 0 deletions unicode2html
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,46 @@
    <?php

    // Works Perfict
    function uni2html($string){
    //preg_replace(): Compilation failed: PCRE does not support \L, \l, \N{name}, \U, or \u at offset 1.
    //so can't use $string = preg_replace('/\\u([0-9A-Za-z]+)/', '&#x$1;', $string); directly.
    $string = explode('\\', $string);
    $string = implode('%', $string);
    $string = preg_replace('/%u([0-9A-Za-z]+)/', '&#x$1;', $string);
    return html_entity_decode($string, ENT_COMPAT, 'UTF-8');
    }
    // this also works perfict
    function unicode2html($string){
    $string = explode('\\', $string);
    $string = implode('%', $string);
    $string = preg_replace('/%u([0-9A-Za-z]+)/', '&#x$1;', $string);
    return mb_convert_encoding($string, 'UTF-8', 'HTML-ENTITIES');
    }
    // Works Perfict
    function unicode_escape_sequences($str){
    //$working = json_encode($str);
    $working = preg_replace('/\\\u([0-9a-z]{4})/', '&#x$1;', $str);
    return $working;// json_decode($working);
    }

    // Works Perfict
    function replace_unicode_escape_sequence($match) {
    return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
    }
    echo $str = preg_replace_callback('/\\\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $string);

    // Works Perfict
    echo preg_replace_callback('/(?:\\\\u[0-9a-fA-Z]{4})+/', function ($v){
    $v = strtr($v[0], array('\\u' => ''));
    return mb_convert_encoding(pack('H*', $v), 'UTF-8', 'UTF-16BE');
    }, $string);


    // One of the easiest way & works really well.
    // json_decode function automatically convert unicode character into readable html
    print_r(json_decode($string));

    // This Also Works
    $unicodeChar = '\u1000';
    echo json_decode('"'.$unicodeChar.'"');
    ?>