|
11 | 11 | *******************************************************************************/ |
12 | 12 | package com.maxprograms.xml; |
13 | 13 |
|
| 14 | +import java.io.BufferedReader; |
14 | 15 | import java.io.File; |
| 16 | +import java.io.FileInputStream; |
| 17 | +import java.io.FileReader; |
15 | 18 | import java.io.IOException; |
| 19 | +import java.lang.System.Logger; |
| 20 | +import java.lang.System.Logger.Level; |
| 21 | +import java.nio.charset.StandardCharsets; |
| 22 | +import java.text.MessageFormat; |
| 23 | +import java.util.StringTokenizer; |
16 | 24 |
|
17 | 25 | public class XMLUtils { |
18 | 26 |
|
@@ -81,4 +89,56 @@ public static String getAbsolutePath(String homeFile, String relative) throws IO |
81 | 89 | public static boolean isXmlSpace(char c) { |
82 | 90 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
83 | 91 | } |
| 92 | + |
| 93 | + public static String getXMLEncoding(String fileName) { |
| 94 | + // return UTF-8 as default |
| 95 | + String result = StandardCharsets.UTF_8.name(); |
| 96 | + try { |
| 97 | + // check if there is a BOM (byte order mark) |
| 98 | + // at the start of the document |
| 99 | + byte[] array = new byte[2]; |
| 100 | + try (FileInputStream inputStream = new FileInputStream(fileName)) { |
| 101 | + int bytes = inputStream.read(array); |
| 102 | + if (bytes == -1) { |
| 103 | + MessageFormat mf = new MessageFormat(Messages.getString("XMLUtils.1")); |
| 104 | + throw new IOException(mf.format(new String[] { fileName })); |
| 105 | + } |
| 106 | + } |
| 107 | + byte[] lt = "<".getBytes(); |
| 108 | + byte[] feff = { -1, -2 }; |
| 109 | + byte[] fffe = { -2, -1 }; |
| 110 | + if (array[0] != lt[0]) { |
| 111 | + // there is a BOM, now check the order |
| 112 | + if (array[0] == fffe[0] && array[1] == fffe[1]) { |
| 113 | + return StandardCharsets.UTF_16BE.name(); |
| 114 | + } |
| 115 | + if (array[0] == feff[0] && array[1] == feff[1]) { |
| 116 | + return StandardCharsets.UTF_16LE.name(); |
| 117 | + } |
| 118 | + } |
| 119 | + // check declared encoding |
| 120 | + String line = ""; |
| 121 | + try (FileReader input = new FileReader(fileName); BufferedReader buffer = new BufferedReader(input)) { |
| 122 | + line = buffer.readLine(); |
| 123 | + } |
| 124 | + if (line.startsWith("<?")) { |
| 125 | + line = line.substring(2, line.indexOf("?>")); |
| 126 | + line = line.replace("\'", "\""); |
| 127 | + StringTokenizer tokenizer = new StringTokenizer(line); |
| 128 | + while (tokenizer.hasMoreTokens()) { |
| 129 | + String token = tokenizer.nextToken(); |
| 130 | + if (token.startsWith("encoding")) { |
| 131 | + result = token.substring(token.indexOf('\"') + 1, token.lastIndexOf('\"')); |
| 132 | + } |
| 133 | + } |
| 134 | + } |
| 135 | + } catch (Exception e) { |
| 136 | + Logger logger = System.getLogger(XMLUtils.class.getName()); |
| 137 | + logger.log(Level.ERROR, e.getMessage(), e); |
| 138 | + } |
| 139 | + if (result.equalsIgnoreCase("utf-8")) { |
| 140 | + result = StandardCharsets.UTF_8.name(); |
| 141 | + } |
| 142 | + return result; |
| 143 | + } |
84 | 144 | } |
0 commit comments