Skip to content

Commit d10ba2e

Browse files
committed
Added support for very large XML files
1 parent eeb88c0 commit d10ba2e

6 files changed

Lines changed: 66 additions & 2 deletions

File tree

lib/xmljava.jar

1.26 KB
Binary file not shown.

src/com/maxprograms/xml/Constants.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
public class Constants {
1616

17-
public static final String VERSION = "1.7.0";
18-
public static final String BUILD = "20240210_0953";
17+
public static final String VERSION = "1.8.0";
18+
public static final String BUILD = "20240322_0813";
1919

2020
private Constants() {
2121
// private for security

src/com/maxprograms/xml/SAXBuilder.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ public Document build(ByteArrayInputStream stream) throws SAXException, IOExcept
8080
parser.setFeature("http://apache.org/xml/features/validation/schema", true);
8181
parser.setFeature("http://apache.org/xml/features/validation/dynamic", true);
8282
}
83+
parser.setProperty("http://www.oracle.com/xml/jaxp/properties/totalEntitySizeLimit", 0);
8384
boolean clearHandler = false;
8485
if (contentHandler == null) {
8586
contentHandler = new CustomContentHandler();
@@ -145,6 +146,7 @@ public Document build(URL url) throws SAXException, IOException, ParserConfigura
145146
parser.setFeature("http://apache.org/xml/features/validation/schema", true);
146147
parser.setFeature("http://apache.org/xml/features/validation/dynamic", true);
147148
}
149+
parser.setProperty("http://www.oracle.com/xml/jaxp/properties/totalEntitySizeLimit", 0);
148150
boolean clearHandler = false;
149151
if (contentHandler == null) {
150152
contentHandler = new CustomContentHandler();

src/com/maxprograms/xml/XMLUtils.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,16 @@
1111
*******************************************************************************/
1212
package com.maxprograms.xml;
1313

14+
import java.io.BufferedReader;
1415
import java.io.File;
16+
import java.io.FileInputStream;
17+
import java.io.FileReader;
1518
import java.io.IOException;
19+
import java.lang.System.Logger;
20+
import java.lang.System.Logger.Level;
21+
import java.nio.charset.StandardCharsets;
22+
import java.text.MessageFormat;
23+
import java.util.StringTokenizer;
1624

1725
public class XMLUtils {
1826

@@ -81,4 +89,56 @@ public static String getAbsolutePath(String homeFile, String relative) throws IO
8189
public static boolean isXmlSpace(char c) {
8290
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
8391
}
92+
93+
public static String getXMLEncoding(String fileName) {
94+
// return UTF-8 as default
95+
String result = StandardCharsets.UTF_8.name();
96+
try {
97+
// check if there is a BOM (byte order mark)
98+
// at the start of the document
99+
byte[] array = new byte[2];
100+
try (FileInputStream inputStream = new FileInputStream(fileName)) {
101+
int bytes = inputStream.read(array);
102+
if (bytes == -1) {
103+
MessageFormat mf = new MessageFormat(Messages.getString("XMLUtils.1"));
104+
throw new IOException(mf.format(new String[] { fileName }));
105+
}
106+
}
107+
byte[] lt = "<".getBytes();
108+
byte[] feff = { -1, -2 };
109+
byte[] fffe = { -2, -1 };
110+
if (array[0] != lt[0]) {
111+
// there is a BOM, now check the order
112+
if (array[0] == fffe[0] && array[1] == fffe[1]) {
113+
return StandardCharsets.UTF_16BE.name();
114+
}
115+
if (array[0] == feff[0] && array[1] == feff[1]) {
116+
return StandardCharsets.UTF_16LE.name();
117+
}
118+
}
119+
// check declared encoding
120+
String line = "";
121+
try (FileReader input = new FileReader(fileName); BufferedReader buffer = new BufferedReader(input)) {
122+
line = buffer.readLine();
123+
}
124+
if (line.startsWith("<?")) {
125+
line = line.substring(2, line.indexOf("?>"));
126+
line = line.replace("\'", "\"");
127+
StringTokenizer tokenizer = new StringTokenizer(line);
128+
while (tokenizer.hasMoreTokens()) {
129+
String token = tokenizer.nextToken();
130+
if (token.startsWith("encoding")) {
131+
result = token.substring(token.indexOf('\"') + 1, token.lastIndexOf('\"'));
132+
}
133+
}
134+
}
135+
} catch (Exception e) {
136+
Logger logger = System.getLogger(XMLUtils.class.getName());
137+
logger.log(Level.ERROR, e.getMessage(), e);
138+
}
139+
if (result.equalsIgnoreCase("utf-8")) {
140+
result = StandardCharsets.UTF_8.name();
141+
}
142+
return result;
143+
}
84144
}

src/com/maxprograms/xml/xmljava.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ RelaxNGParser.1=Missing ''{0}'' in <externalRef>
2222
SAXBuilder.1=File ''{0}'' does not exist
2323
XMLOutputter.0=Header contains wrong content type
2424
XMLOutputter.1=Unknown node type
25+
XMLUtils.1=Error reading BOM from {0}

src/com/maxprograms/xml/xmljava_es.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ RelaxNGParser.1=Falta ''{0}'' en <externalRef>
2222
SAXBuilder.1=El archivo ''{0}'' no existe
2323
XMLOutputter.0=El encabezado contiene un tipo de contenido incorrecto
2424
XMLOutputter.1=Tipo de nodo desconocido
25+
XMLUtils.1=Error al leer Marca de Orden de Bytes (BOM) de {0}

0 commit comments

Comments
 (0)