-
-
Save bradleypeabody/185b1d7ed6c0c2ab6cec to your computer and use it in GitHub Desktop.
| package main | |
| // http://play.golang.org/p/fVf7duRtdH | |
| import "fmt" | |
| import "unicode/utf16" | |
| import "unicode/utf8" | |
| import "bytes" | |
| func main() { | |
| b := []byte{ | |
| 0xff, // BOM | |
| 0xfe, // BOM | |
| 'T', | |
| 0x00, | |
| 'E', | |
| 0x00, | |
| 'S', | |
| 0x00, | |
| 'T', | |
| 0x00, | |
| 0x6C, | |
| 0x34, | |
| '\n', | |
| 0x00, | |
| } | |
| s, err := DecodeUTF16(b) | |
| if err != nil { | |
| panic(err) | |
| } | |
| fmt.Println(s) | |
| } | |
| func DecodeUTF16(b []byte) (string, error) { | |
| if len(b)%2 != 0 { | |
| return "", fmt.Errorf("Must have even length byte slice") | |
| } | |
| u16s := make([]uint16, 1) | |
| ret := &bytes.Buffer{} | |
| b8buf := make([]byte, 4) | |
| lb := len(b) | |
| for i := 0; i < lb; i += 2 { | |
| u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8) | |
| r := utf16.Decode(u16s) | |
| n := utf8.EncodeRune(b8buf, r[0]) | |
| ret.Write(b8buf[:n]) | |
| } | |
| return ret.String(), nil | |
| } |
Incorrect result when decoding any surrogate pair, should take care of the high/low surrogate range.
A quick fix to increase u16s size to 2 u16s := make([]uint16, 2) and:
if u16s[0] >= 0xD800 && u16s[0] <= 0xE000 {
log.Println("lead")
i = i + 2
u16s[1] = uint16(b[i]) + (uint16(b[i+1]) << 8)
}
golang already has support for decoding []byte into []uint16 (respecting the endianness):
func DecodeUtf16(b []byte, order binary.ByteOrder) (string, error) {
ints := make([]uint16, len(b)/2)
if err := binary.Read(bytes.NewReader(b), order, &ints); err != nil {
return "", err
}
return string(utf16.Decode(ints)), nil
}@akirabbq @ik5
complete solution (which also works with surrogate pairs): utf16.go
from the blog of http://angelonotes.blogspot.com/2015/09/golang-utf16-utf8.html
bs_UTF16LE, _, _ := transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder(), []byte("測試"))
bs_UTF16BE, _, _ := transform.Bytes(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder(), []byte("測試"))
bs_UTF8LE, _, _ := transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), bs_UTF16LE)
bs_UTF8BE, _, _ := transform.Bytes(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), bs_UTF16BE)Saved me a lot, thank you
Thanks very much!
golangalready has support for decoding[]byteinto[]uint16(respecting the endianness):func DecodeUtf16(b []byte, order binary.ByteOrder) (string, error) { ints := make([]uint16, len(b)/2) if err := binary.Read(bytes.NewReader(b), order, &ints); err != nil { return "", err } return string(utf16.Decode(ints)), nil }@akirabbq @ik5
complete solution (which also works with surrogate pairs): utf16.go
You send just function I need to convert clob Oracle data to string.
Life saver, thanks
This helped me to decode UTF-16LE to UTF-8: https://blog.fearcat.in/a?ID=00001-1bd90844-ce0c-4fac-9b8f-fe3d8a30451d
decoder := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
utf8bytes, err := decoder.Bytes(data) // data contains UTF16LE as read from a file
Thank you!