From 43aace00c073aeb2f268aaec3262ed2db29f2375 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Wed, 17 Dec 2025 17:14:57 +0300 Subject: [PATCH 1/3] Add warnings for Invalid language escape sequences in text strings --- src/main/java/org/verapdf/cos/COSString.java | 68 ++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/main/java/org/verapdf/cos/COSString.java b/src/main/java/org/verapdf/cos/COSString.java index ee6bb3d0..f16a32bd 100644 --- a/src/main/java/org/verapdf/cos/COSString.java +++ b/src/main/java/org/verapdf/cos/COSString.java @@ -221,17 +221,85 @@ public String getASCIIString() { public boolean isTextString() { if (value.length > 2) { if ((value[0] & 0xFF) == 0xFE && (value[1] & 0xFF) == 0xFF) { + checkUTF16BEEscapeSequence(value); return true; } } if (value.length > 3) { if ((value[0] & 0xFF) == 0xEF && (value[1] & 0xFF) == 0xBB && (value[2] & 0xFF) == 0xBF) { + checkUTF8EscapeSequence(value); return true; } } return PDFDocEncoding.isPDFDocEncodingString(value); } + private boolean checkUTF16BEEscapeSequence(byte[] value) { + for (int i = 0; i < value.length; i++) { + if (i + 1 < value.length && value[i] == 0x00 && value[i + 1] == 0x1B) { + if (i + 5 < value.length && value[i + 4] == 0x00 && value[i + 5] == 0x1B) { + if (isASCIILetter(value[i + 2]) && isASCIILetter(value[i + 3])) { + i += 5; + continue; + } else { + LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + return false; + } + } + + if (i + 7 < value.length && value[i + 6] == 0x00 && value[i + 7] == 0x1B) { + if (isASCIILetter(value[i + 2]) && isASCIILetter(value[i + 3]) && + isASCIILetter(value[i + 4]) && isASCIILetter(value[i + 5])) { + i += 7; + continue; + } else { + LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + return false; + } + } + LOGGER.warning("Text string language escape sequence have invalid length"); + return false; + } + } + return true; + } + + private boolean checkUTF8EscapeSequence(byte[] value) { + for (int i = 0; i < value.length; i++) { + if (value[i] == 0x1B) { + if (i + 3 < value.length && value[i + 3] == 0x1B) { + if (isASCIILetter(value[i + 1]) && isASCIILetter(value[i + 2])) { + i += 3; + continue; + } else { + LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + return false; + } + } + + if (i + 5 < value.length && value[i + 5] == 0x1B) { + if (isASCIILetter(value[i + 1]) && isASCIILetter(value[i + 2]) && + isASCIILetter(value[i + 3]) && isASCIILetter(value[i + 4])) { + i += 5; + continue; + } else { + LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + return false; + } + } + + LOGGER.warning("Text string language escape sequence have invalid length"); + return false; + } + } + return true; + } + + private boolean isASCIILetter(byte b) { + int c = b & 0xFF; + return (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A); + } + protected String toLitString() { StringBuilder result = new StringBuilder(); result.append('('); From 7d90e11cf6055c977657f1ecaf648c9420abdfd3 Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Thu, 18 Dec 2025 12:43:20 +0300 Subject: [PATCH 2/3] Update COSString.java --- src/main/java/org/verapdf/cos/COSString.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/verapdf/cos/COSString.java b/src/main/java/org/verapdf/cos/COSString.java index f16a32bd..d93613ae 100644 --- a/src/main/java/org/verapdf/cos/COSString.java +++ b/src/main/java/org/verapdf/cos/COSString.java @@ -257,7 +257,7 @@ private boolean checkUTF16BEEscapeSequence(byte[] value) { return false; } } - LOGGER.warning("Text string language escape sequence have invalid length"); + LOGGER.warning("Text string language escape sequence has invalid length"); return false; } } @@ -288,7 +288,7 @@ private boolean checkUTF8EscapeSequence(byte[] value) { } } - LOGGER.warning("Text string language escape sequence have invalid length"); + LOGGER.warning("Text string language escape sequence has invalid length"); return false; } } From 656b2d32764cac4f0631e772361c08e1524a395b Mon Sep 17 00:00:00 2001 From: Kakhnovich Raman Date: Thu, 18 Dec 2025 12:58:08 +0300 Subject: [PATCH 3/3] Add StringWarnings --- .../org/verapdf/as/warnings/StringWarnings.java | 7 +++++++ src/main/java/org/verapdf/cos/COSString.java | 15 ++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 src/main/java/org/verapdf/as/warnings/StringWarnings.java diff --git a/src/main/java/org/verapdf/as/warnings/StringWarnings.java b/src/main/java/org/verapdf/as/warnings/StringWarnings.java new file mode 100644 index 00000000..d022b9f6 --- /dev/null +++ b/src/main/java/org/verapdf/as/warnings/StringWarnings.java @@ -0,0 +1,7 @@ +package org.verapdf.as.warnings; + +public class StringWarnings { + public static final String NOT_ASCII_LETTER = "Text string language escape sequence contains not ASCII letter"; + public static final String INVALID_LANGUAGE_ESCAPE_SEQUENCE_LENGTH = "Text string language escape sequence has invalid length"; + public static final String NOT_SUPPORTED_UTF16LE_ENCODING = "String object uses encoding UTF16-LE not supported by PDF"; +} diff --git a/src/main/java/org/verapdf/cos/COSString.java b/src/main/java/org/verapdf/cos/COSString.java index d93613ae..88b752ae 100644 --- a/src/main/java/org/verapdf/cos/COSString.java +++ b/src/main/java/org/verapdf/cos/COSString.java @@ -20,6 +20,7 @@ */ package org.verapdf.cos; +import org.verapdf.as.warnings.StringWarnings; import org.verapdf.cos.filters.COSFilterASCIIHexEncode; import org.verapdf.cos.visitor.ICOSVisitor; import org.verapdf.cos.visitor.IVisitor; @@ -111,7 +112,7 @@ public String getString() { return new String(value, 2, value.length - 2, StandardCharsets.UTF_16BE); } if ((value[0] & 0xFF) == 0xFF && (value[1] & 0xFF) == 0xFE) { - LOGGER.log(Level.WARNING, "String object uses encoding UTF16-LE not supported by PDF"); + LOGGER.log(Level.WARNING, StringWarnings.NOT_SUPPORTED_UTF16LE_ENCODING); } } if (value.length >= 3) { @@ -242,7 +243,7 @@ private boolean checkUTF16BEEscapeSequence(byte[] value) { i += 5; continue; } else { - LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + LOGGER.log(Level.WARNING, StringWarnings.NOT_ASCII_LETTER); return false; } } @@ -253,11 +254,11 @@ private boolean checkUTF16BEEscapeSequence(byte[] value) { i += 7; continue; } else { - LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + LOGGER.log(Level.WARNING, StringWarnings.NOT_ASCII_LETTER); return false; } } - LOGGER.warning("Text string language escape sequence has invalid length"); + LOGGER.log(Level.WARNING,StringWarnings.INVALID_LANGUAGE_ESCAPE_SEQUENCE_LENGTH); return false; } } @@ -272,7 +273,7 @@ private boolean checkUTF8EscapeSequence(byte[] value) { i += 3; continue; } else { - LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + LOGGER.log(Level.WARNING, StringWarnings.NOT_ASCII_LETTER); return false; } } @@ -283,12 +284,12 @@ private boolean checkUTF8EscapeSequence(byte[] value) { i += 5; continue; } else { - LOGGER.warning("Text string language escape sequence contains character (not ASCII letter)"); + LOGGER.log(Level.WARNING, StringWarnings.NOT_ASCII_LETTER); return false; } } - LOGGER.warning("Text string language escape sequence has invalid length"); + LOGGER.log(Level.WARNING,StringWarnings.INVALID_LANGUAGE_ESCAPE_SEQUENCE_LENGTH); return false; } }