--- title: Java获取文件真实格式 date: 2020-01-16 13:47:41 description: 根据数据流的头部信息匹配文件的真实格式 categories: [技术总结] tags: [Java] --- ## 需求 很多时候,文件的类型和后缀名不一致,这样就不能简单的用后缀名判断文件的格式。 例如:POI导入excel只能支持.xls和.xlsx的文件解析,但有些真实格式为csv或html的.xls文件混入其中就会导致导入失败,那么就需要根据头部信息匹配格式 ### 获取文件真实类型 1. 获取文件的头部字节 1. 将头部字节转换成16进制字符串 1. 匹配字符串获取文件类型 ```java FileTypeJudge.java /** * 文件类型判断类 */ public final class FileTypeJudge { /** * Constructor */ private FileTypeJudge() {} /** * 将文件头转换成16进制字符串 */ private static String bytesToHexString(byte[] src){ StringBuilder stringBuilder = new StringBuilder(); if (src == null || src.length <= 0) { return null; } for (byte b : src) { int v = b & 0xFF; String hv = Integer.toHexString(v); if (hv.length() < 2) { stringBuilder.append(0); } stringBuilder.append(hv); } return stringBuilder.toString(); } /** * 得到文件头 */ private static String getFileContent(InputStream inputStream) throws IOException { byte[] b = new byte[28]; try { inputStream.read(b, 0, 28); } finally { if (inputStream != null) { inputStream.close(); } } return bytesToHexString(b); } /** * 判断文件类型 */ public static FileType getType(InputStream inputStream) throws IOException { String fileHead = getFileContent(inputStream); if (fileHead == null || fileHead.length() == 0) { return null; } for (FileType type : FileType.values()) { if (fileHead.startsWith(type.code)) { return type; } } return null; } /** * InputStream转ByteArrayOutputStream,用于流的复制 */ public static ByteArrayOutputStream cloneInputStream(InputStream input) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len; while ((len = input.read(buffer)) > -1) { baos.write(buffer, 0, len); } baos.flush(); return baos; } } ``` ### 关于流的复制 由于判断文件格式需要读取流的头部信息,这个读取是不可逆的,而判断完文件格式后通常还需要读文件,所以需要复制一个InputStream ```java 复制InputStream InputStream fis = request.getFile("file").getInputStream(); ByteArrayOutputStream temp = cloneInputStream(fis); // 打开两个新的输入流,一个用于判断文件类型,还有一个用于读取文件内容 InputStream stream1 = new ByteArrayInputStream(temp.toByteArray()); InputStream stream2 = new ByteArrayInputStream(temp.toByteArray()); ``` ### 文件类型枚举 罗列了大部分文件格式的头部信息,如果有新的格式可以测试后添加进去 ```java FileType.java /** * 文件类型枚取 */ public enum FileType { /** * JPEG(jpg) */ JPG("ffd8ffe000104a464946","jpg"), /** * PNG(png) */ PNG("89504e470d0a1a0a0000","png"), /** * GIF(gif) */ GIF("47494638396126026f01","gif"), /** * TIFF(tif) */ TIF("49492a00227105008037","tif"), /** * 16色位图(bmp) */ BMP_16("424d228c010000000000","bmp"), /** * 24位位图(bmp) */ BMP_24("424d8240090000000000","bmp"), /** * 256色位图(bmp) */ BMP_256("424d8e1b030000000000","bmp"), /** * CAD(dwg) */ DWG("41433130313500000000","dwg"), /** * HTML(html) */ HTML("3c21444f435459504520","html"), /** * HTM(htm) */ HTM("3c21646f637479706520","htm"), /** * 猎聘、智联简历。htm */ HTM_2("3c68746d6c20786d6c6e","htm"), /** * css */ CSS("48544d4c207b0d0a0942","css"), /** * js */ JS("696b2e71623d696b2e71","js"), /** * RichTextFormat(rtf) */ RTF("7b5c727466315c616e73","rtf"), /** * Photoshop(psd) */ PSD("38425053000100000000","psd"), /** * Email[OutlookExpress6](eml) */ EML("46726f6d3a203d3f6762","eml"), /** * MSExcel注意:word、msi和excel的文件头一样 */ DOC("d0cf11e0a1b11ae10000","doc"), /** * docx文件 */ DOCX("504b0304140006000800","docx"), /** * MSAccess(mdb) */ MDB("5374616E64617264204A","mdb"), PS("252150532D41646F6265","ps"), /** * AdobeAcrobat(pdf) */ PDF("255044462d312e350d0a","pdf"), /** * rmvb/rm相同 */ RMVB("2e524d46000000120001","rmvb"), /** * flv与f4v相同 */ FLV("464c5601050000000900","flv"), MP4("00000020667479706d70","mp4"), MP3("49443303000000002176","mp3"), /** * */ MPG("000001ba210001000180","mpg"), /** * wmv与asf相同 */ WMV("3026b2758e66cf11a6d9","wmv"), /** * Wave(wav) */ WAV("52494646e27807005741","wav"), AVI("52494646d07d60074156","avi"), /** * MIDI(mid) */ MID("4d546864000000060001","mid"), ZIP("504b0304140000000800","zip"), RAR("526172211a0700cf9073","rar"), INI("235468697320636f6e66","ini"), JAR("504b03040a0000000000","jar"), /** * 可执行文件 */ EXE("4d5a9000030000000400","exe"), /** * jsp文件 */ JSP("3c25402070616765206c","jsp"), /** * MF文件 */ MF("4d616e69666573742d56","mf"), /** * xml文件 */ XML("3c3f786d6c2076657273","xml"), /** * sql文件 */ SQL("494e5345525420494e54","sql"), /** * java文件 */ JAVA("7061636b616765207765","java"), /** * bat文件 */ BAT("406563686f206f66660d","bat"), /** * gz文件 */ GZ("1f8b0800000000000000","gz"), /** * bat文件 */ PROPERTIES("6c6f67346a2e726f6f74","properties"), /** * bat文件 */ CLASS("cafebabe0000002e0041","class"), /** * bat文件 */ CHM("49545346030000006000","chm"), /** * bat文件 */ MXP("04000000010000001300","mxp"), TORRENT("6431303a637265617465","torrent"), /** * 51job简历。mht */ MHT("46726f6d3a3cd3c920cd","mht"), /** * Quicktime(mov) */ MOV("6D6F6F76","mov"), /** * WordPerfect(wpd) */ WPD("FF575043","wpd"), /** * OutlookExpress(dbx) */ DBX("CFAD12FEC5FD746F","dbx"), /** * Outlook(pst) */ PST("2142444E","pst"), /** * Quicken(qdf) */ QDF("AC9EBD8F","qdf"), /** * WindowsPassword(pwl) */ PWL("E3828596","pwl"), /** * RealAudio(ram) */ RAM("2E7261FD","ram"); public String code; public String name; FileType(String code, String name) { this.code = code; this.name = name; } } ```