package main // HTMLUnescape replace from src the sequence of bytes \u003c, \u003e, \u0026, // \u2028, \u2029 by their representative characters <, >, &, U+2028, U+2029. // Mostly it's does the reverse result of the HTMLEscape function present in // encoding/json package of the standard library. // // The function receives a byte slice for avoiding string allocations during the // replacement besides it modifies src and returns it. func HTMLUnescape(src []byte) []byte { // We don't need to iterate until the end of the src once we have checked the // 6th character before the end because if it didn't match inside of loop then // we won't find any escaped character. for i := 0; i < (len(src) - 5); { if src[i] != '\\' { i++ continue } // if there aren't more than 5 characters ahead it isn't possible to find // any of the escaped characters. if (i + 5) >= len(src) { break } if src[i+1] != 'u' { i += 2 continue } var c byte if c = src[i+2]; c != '0' && c != '2' { i += 3 continue } if c == '2' { switch { case src[i+3] != '0': i += 3 continue case src[i+4] != '2': i += 4 continue case src[i+5] != '8' && src[i+5] != '9': i += 5 continue } // This is \u2028 or \u2029 which correspond to U+2028 and U+2029 (E2 80 // A8 and E2 80 A9) c = src[i+5] src = append(src[:i+3], src[i+6:]...) src[i] = 0xE2 src[i+1] = 0x80 if c == '8' { src[i+2] = 0xA8 } else { src[i+2] = 0xA9 } i += 3 continue } switch { case src[i+3] != '0': i += 3 continue case src[i+4] != '3' && src[i+4] != '2': i += 5 continue } if src[i+4] == '2' { if src[i+5] != '6' { i += 6 continue } // This is \u0026 which corresponds to '&' src = append(src[:i+1], src[i+6:]...) src[i] = '&' i++ continue } if c = src[i+5]; c != 'c' && c != 'e' { i += 6 continue } // This is \u003c or \u003e which corresponds to '<' and '>' src = append(src[:i+1], src[i+6:]...) if c == 'c' { src[i] = '<' } else { src[i] = '>' } i++ } return src }