Skip to content

Instantly share code, notes, and snippets.

@ik5
Forked from bradleypeabody/gist:185b1d7ed6c0c2ab6cec
Last active August 5, 2020 11:58
Show Gist options
  • Save ik5/65de721ca495fa1bf451 to your computer and use it in GitHub Desktop.
Save ik5/65de721ca495fa1bf451 to your computer and use it in GitHub Desktop.
golang, convert UTF-16 to UTF-8 string
package main
import "fmt"
import "unicode/utf16"
import "unicode/utf8"
import "bytes"
func main() {
b := []byte{
0xff, // BOM
0xfe, // BOM
'T',
0x00,
'E',
0x00,
'S',
0x00,
'T',
0x00,
0x6C,
0x34,
'\n',
0x00,
}
s, err := DecodeUTF16(b)
if err != nil {
panic(err)
}
fmt.Println(s)
}
// EncodeUTF16 get a utf8 string and translate it into a slice of bytes of ucs2
func EncodeUTF16(s string, add_bom bool) []byte {
r := []rune(s)
iresult := utf16.Encode(r)
var bytes []byte
if add_bom {
bytes = make([]byte, 2)
bytes = []byte{254, 255}
}
for _, i := range iresult {
temp := make([]byte, 2)
binary.BigEndian.PutUint16(temp, i)
bytes = append(bytes, temp...)
}
return bytes
}
// DecodeUTF16 get a slice of bytes and decode it to UTF-8
func DecodeUTF16(b []byte) (string, error) {
if len(b)%2 != 0 {
return "", fmt.Errorf("Must have even length byte slice")
}
bom := UTF16Bom(b)
if bom < 0 {
return "", fmt.Errorf("Buffer is too small")
}
u16s := make([]uint16, 1)
ret := &bytes.Buffer{}
b8buf := make([]byte, 4)
lb := len(b)
for i := 0; i < lb; i += 2 {
//assuming bom is big endian if 0 returned
if bom == 0 || bom == 1 {
u16s[0] = uint16(b[i+1]) + (uint16(b[i]) << 8)
}
if bom == 2 {
u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8)
}
r := utf16.Decode(u16s)
n := utf8.EncodeRune(b8buf, r[0])
ret.Write([]byte(string(b8buf[:n])))
}
return ret.String(), nil
}
// UTF16Bom returns 0 for no BOM, 1 for Big Endian and 2 for little endian
// it will return -1 if b is too small for having BOM
func UTF16Bom(b []byte) int8 {
if len(b) < 2 {
return -1
}
if b[0] == 0xFE && b[1] == 0xFF {
return 1
}
if b[0] == 0xFF && b[1] == 0xFE {
return 2
}
return 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment