Created
July 9, 2013 13:59
-
-
Save wcp1231/5957563 to your computer and use it in GitHub Desktop.
判断文件的编码类型
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static String getFilecharset(File sourceFile) { | |
String charset = "GBK"; | |
byte[] first3Bytes = new byte[3]; | |
try { | |
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(sourceFile)); | |
bis.mark(0); | |
int read = bis.read(first3Bytes, 0, 3); | |
System.out.println("字节大小:"+read); | |
if (read == -1) { | |
return charset; //文件编码为 ANSI | |
} else if (first3Bytes[0] == (byte) 0xFF | |
&& first3Bytes[1] == (byte) 0xFE) { | |
charset = "UTF-16LE"; //文件编码为 Unicode | |
} else if (first3Bytes[0] == (byte) 0xFE | |
&& first3Bytes[1] == (byte) 0xFF) { | |
charset = "UTF-16BE"; //文件编码为 Unicode big endian | |
} else if (first3Bytes[0] == (byte) 0xEF | |
&& first3Bytes[1] == (byte) 0xBB | |
&& first3Bytes[2] == (byte) 0xBF) { | |
charset = "UTF-8"; //文件编码为 UTF-8 | |
} | |
bis.reset(); | |
bis.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
return charset; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment