Skip to content

Commit a4f05c8

Browse files
committed
Improve mp3 and ogg parsing accuracy
Prioritize Xing/VBRI frame counts when estimating MP3 duration and exclude trailing ID3v1 bytes from frame scanning to reduce bitrate drift on tagged files. Improve MP3 error reporting by surfacing unsupported MPEG Layer I/II inputs with a clear exception instead of a generic no-frame failure. Harden OGG parsing by validating page sequence numbers per target logical stream, isolating metrics by stream serial, and inferring Vorbis CBR/VBR mode from observed bitrate variation. Add regression tests for MP3 and OGG edge cases and update release metadata for version 1.1.1.
1 parent 0d82fad commit a4f05c8

File tree

6 files changed

+462
-103
lines changed

6 files changed

+462
-103
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [1.1.1] - 2026-03-14
9+
10+
### Fixed
11+
- Improved MP3 duration estimation in [`Mp3Parser.estimateDurationMillis()`](src/main/java/me/tamkungz/codecmedia/internal/audio/mp3/Mp3Parser.java) to prioritize Xing/VBRI frame-count metadata before scanned sample totals.
12+
- Excluded trailing ID3v1 tag bytes from MP3 audio scan range in [`Mp3Parser`](src/main/java/me/tamkungz/codecmedia/internal/audio/mp3/Mp3Parser.java), reducing bitrate drift when footer tags are present.
13+
- Added clearer non-Layer III error handling in [`Mp3Parser.parse()`](src/main/java/me/tamkungz/codecmedia/internal/audio/mp3/Mp3Parser.java) for MPEG Layer I/II inputs.
14+
- Strengthened OGG logical-stream parsing in [`OggParser`](src/main/java/me/tamkungz/codecmedia/internal/audio/ogg/OggParser.java) with per-stream page-sequence validation and serial-scoped metrics for multiplexed files.
15+
- Refined Vorbis bitrate-mode classification in [`OggParser.detectVorbisBitrateMode()`](src/main/java/me/tamkungz/codecmedia/internal/audio/ogg/OggParser.java) to infer from observed bitrate variation instead of coarse nominal/page-count heuristics.
16+
- Replaced broad OGG payload string scanning with structured Vorbis/Opus comment-header parsing in [`OggParser`](src/main/java/me/tamkungz/codecmedia/internal/audio/ogg/OggParser.java), and fixed sequence tracking to use `long` to avoid overflow.
17+
18+
### Added
19+
- Added MP3 parser regression tests for Xing-priority duration, trailing ID3v1 handling, and unsupported Layer I/II diagnostics in [`Mp3ParserTest`](src/test/java/me/tamkungz/codecmedia/internal/audio/mp3/Mp3ParserTest.java).
20+
- Added OGG parser tests for Vorbis CBR/VBR mode inference, broken page-sequence detection, and multiplexed-stream metric isolation in [`OggParserTest`](src/test/java/me/tamkungz/codecmedia/internal/audio/ogg/OggParserTest.java).
21+
22+
### Verified
23+
- Confirmed MP3 parser updates with `mvn -Dtest=Mp3ParserTest test`.
24+
- Confirmed OGG parser updates with `mvn -Dtest=OggParserTest test`.
25+
826
## [1.1.0] - 2026-03-13
927

1028
### Changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
<groupId>me.tamkungz.codecmedia</groupId>
99
<artifactId>codecmedia</artifactId>
10-
<version>1.1.0</version>
10+
<version>1.1.1</version>
1111
<packaging>jar</packaging>
1212

1313
<name>CodecMedia</name>

src/main/java/me/tamkungz/codecmedia/internal/audio/mp3/Mp3Parser.java

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ public static Mp3ProbeInfo parse(byte[] data) throws CodecMediaException {
3131
int audioStart = skipId3v2(reader);
3232
int firstFrameOffset = findFrameOffset(data, audioStart);
3333
if (firstFrameOffset < 0) {
34+
if (containsUnsupportedMpegLayer(data, audioStart)) {
35+
throw new CodecMediaException("Unsupported MPEG audio layer: only Layer III (MP3) is supported");
36+
}
3437
throw new CodecMediaException("No valid MP3 frame found");
3538
}
3639

@@ -42,7 +45,8 @@ public static Mp3ProbeInfo parse(byte[] data) throws CodecMediaException {
4245
int xingFrames = readXingFrameCountIfPresent(data, firstFrameOffset, firstFrame);
4346
int vbriFrames = readVbriFrameCountIfPresent(data, firstFrameOffset, firstFrame);
4447

45-
ParseStats stats = scanFrames(data, firstFrameOffset, firstFrame.sampleRate(), firstFrame.samplesPerFrame());
48+
int scanLimit = effectiveAudioEndOffset(data);
49+
ParseStats stats = scanFrames(data, firstFrameOffset, scanLimit, firstFrame.sampleRate(), firstFrame.samplesPerFrame());
4650
long durationMillis = estimateDurationMillis(stats, xingFrames, vbriFrames);
4751
int avgBitrate = estimateAverageBitrateKbps(stats, durationMillis);
4852
BitrateMode mode = detectBitrateMode(stats, xingFrames, vbriFrames);
@@ -92,6 +96,31 @@ private static int findFrameOffset(byte[] data, int start) {
9296
return -1;
9397
}
9498

99+
private static boolean containsUnsupportedMpegLayer(byte[] data, int start) {
100+
for (int i = Math.max(0, start); i + 4 <= data.length; i++) {
101+
int h = ((data[i] & 0xFF) << 24)
102+
| ((data[i + 1] & 0xFF) << 16)
103+
| ((data[i + 2] & 0xFF) << 8)
104+
| (data[i + 3] & 0xFF);
105+
106+
if ((h & 0xFFE00000) != 0xFFE00000) {
107+
continue;
108+
}
109+
110+
int versionBits = (h >>> 19) & 0b11;
111+
int layerBits = (h >>> 17) & 0b11;
112+
int sampleRateIndex = (h >>> 10) & 0b11;
113+
114+
if (versionBits == 0b01 || sampleRateIndex == 0b11) {
115+
continue;
116+
}
117+
if (layerBits == 0b10 || layerBits == 0b11) {
118+
return true;
119+
}
120+
}
121+
return false;
122+
}
123+
95124
private static Mp3FrameHeader parseFrameHeader(byte[] data, int offset) {
96125
if (offset < 0 || offset + 4 > data.length) {
97126
return null;
@@ -173,16 +202,26 @@ private static int readVbriFrameCountIfPresent(byte[] data, int frameOffset, Mp3
173202
return readIntBE(data, vbriOffset + 14);
174203
}
175204

176-
private static ParseStats scanFrames(byte[] data, int startOffset, int sampleRate, int samplesPerFrame) {
205+
private static int effectiveAudioEndOffset(byte[] data) {
206+
if (data.length >= 128
207+
&& data[data.length - 128] == 'T'
208+
&& data[data.length - 127] == 'A'
209+
&& data[data.length - 126] == 'G') {
210+
return data.length - 128;
211+
}
212+
return data.length;
213+
}
214+
215+
private static ParseStats scanFrames(byte[] data, int startOffset, int scanLimit, int sampleRate, int samplesPerFrame) {
177216
int offset = startOffset;
178217
long totalBits = 0;
179218
long totalSamples = 0;
180219
int frames = 0;
181220
Set<Integer> bitrates = new HashSet<>();
182221

183-
while (offset + 4 <= data.length) {
222+
while (offset + 4 <= scanLimit) {
184223
Mp3FrameHeader h = parseFrameHeader(data, offset);
185-
if (h == null || offset + h.frameLength() > data.length) {
224+
if (h == null || offset + h.frameLength() > scanLimit) {
186225
break;
187226
}
188227

@@ -200,13 +239,13 @@ private static long estimateDurationMillis(ParseStats stats, int xingFrames, int
200239
if (stats.frames() <= 0) {
201240
return 0;
202241
}
203-
if (stats.totalSamples() > 0 && stats.sampleRate() > 0) {
204-
return (stats.totalSamples() * 1000L) / stats.sampleRate();
205-
}
206242
int knownFrames = xingFrames > 0 ? xingFrames : vbriFrames;
207243
if (knownFrames > 0 && stats.samplesPerFrame() > 0 && stats.sampleRate() > 0) {
208244
return ((long) knownFrames * stats.samplesPerFrame() * 1000L) / stats.sampleRate();
209245
}
246+
if (stats.totalSamples() > 0 && stats.sampleRate() > 0) {
247+
return (stats.totalSamples() * 1000L) / stats.sampleRate();
248+
}
210249
return 0;
211250
}
212251

src/main/java/me/tamkungz/codecmedia/internal/audio/ogg/OggParser.java

Lines changed: 190 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package me.tamkungz.codecmedia.internal.audio.ogg;
22

3+
import java.util.HashSet;
4+
import java.util.Set;
5+
36
import me.tamkungz.codecmedia.CodecMediaException;
47
import me.tamkungz.codecmedia.internal.audio.BitrateMode;
58
import me.tamkungz.codecmedia.internal.io.ByteArrayReader;
@@ -28,21 +31,54 @@ public static OggProbeInfo parse(byte[] data) throws CodecMediaException {
2831
}
2932

3033
AudioIdent ident = parseIdentificationPacket(data, identOffset, firstPayloadSize);
34+
long targetSerial = firstPage.serialNumber();
3135

3236
long payloadBits = 0;
33-
int pageCount = 0;
3437
long maxGranule = 0;
38+
long prevGranule = -1;
39+
long prevSequence = -1;
40+
Set<Integer> observedKbps = new HashSet<>();
41+
boolean hasCommentMetadata = false;
3542
int offset = 0;
3643
while (offset + 27 <= data.length) {
3744
OggPageHeader page = parsePageHeader(data, offset);
3845
if (page == null) {
3946
break;
4047
}
41-
payloadBits += (long) page.payloadSize() * 8;
42-
pageCount++;
43-
if (page.granulePosition() > maxGranule) {
44-
maxGranule = page.granulePosition();
48+
49+
if (page.serialNumber() == targetSerial) {
50+
if (prevSequence >= 0 && page.sequenceNumber() != (long) prevSequence + 1L) {
51+
throw new CodecMediaException("Invalid OGG stream: broken page sequence for target stream");
52+
}
53+
prevSequence = page.sequenceNumber();
54+
55+
payloadBits += (long) page.payloadSize() * 8;
56+
if (page.granulePosition() > maxGranule) {
57+
maxGranule = page.granulePosition();
58+
}
59+
60+
int payloadOffset = offset + page.headerSize();
61+
if (!hasCommentMetadata && payloadOffset + page.payloadSize() <= data.length
62+
&& containsCodecCommentSignal(data, payloadOffset, page.payloadSize(), ident.codec())) {
63+
hasCommentMetadata = true;
64+
}
65+
66+
if (prevGranule >= 0 && page.granulePosition() > prevGranule) {
67+
long granuleDelta = page.granulePosition() - prevGranule;
68+
int granuleRate = ident.granuleRate() > 0 ? ident.granuleRate() : ident.sampleRate();
69+
if (granuleRate > 0) {
70+
long millis = (granuleDelta * 1000L) / granuleRate;
71+
if (millis > 0) {
72+
int kbps = (int) (((long) page.payloadSize() * 8L * 1000L) / millis / 1000L);
73+
if (kbps > 0) {
74+
observedKbps.add(kbps);
75+
}
76+
}
77+
}
78+
}
79+
prevGranule = page.granulePosition();
4580
}
81+
4682
offset += page.totalPageSize();
4783
}
4884

@@ -59,14 +95,31 @@ public static OggProbeInfo parse(byte[] data) throws CodecMediaException {
5995
int bitrateKbps = avgBitrate > 0 ? avgBitrate : nominalKbps;
6096

6197
BitrateMode mode = switch (ident.codec()) {
62-
case "vorbis" -> (ident.nominalBitrate() > 0 || pageCount > 2) ? BitrateMode.VBR : BitrateMode.UNKNOWN;
98+
case "vorbis" -> detectVorbisBitrateMode(observedKbps, ident.nominalBitrate(), hasCommentMetadata);
6399
case "opus" -> BitrateMode.VBR;
64100
default -> BitrateMode.UNKNOWN;
65101
};
66102

67103
return new OggProbeInfo(ident.codec(), ident.sampleRate(), ident.channels(), bitrateKbps, mode, durationMillis);
68104
}
69105

106+
private static BitrateMode detectVorbisBitrateMode(
107+
Set<Integer> observedKbps,
108+
long nominalBitrate,
109+
boolean hasCommentMetadata
110+
) {
111+
if (observedKbps.size() > 1) {
112+
return BitrateMode.VBR;
113+
}
114+
if (observedKbps.size() == 1) {
115+
return BitrateMode.CBR;
116+
}
117+
if (nominalBitrate > 0 || hasCommentMetadata) {
118+
return BitrateMode.UNKNOWN;
119+
}
120+
return BitrateMode.UNKNOWN;
121+
}
122+
70123
private static AudioIdent parseIdentificationPacket(byte[] data, int identOffset, int payloadSize) throws CodecMediaException {
71124
if (isVorbisIdentification(data, identOffset, payloadSize)) {
72125
if (payloadSize < 30) {
@@ -156,6 +209,137 @@ private static OggPageHeader parsePageHeader(byte[] data, int offset) {
156209
return new OggPageHeader(version, headerType, granulePosition, serial, sequence, segmentCount, payload, total, headerSize);
157210
}
158211

212+
private static boolean containsCodecCommentSignal(byte[] data, int offset, int payloadSize, String codec) {
213+
if (offset < 0 || payloadSize <= 0 || offset + payloadSize > data.length) {
214+
return false;
215+
}
216+
if ("vorbis".equals(codec)) {
217+
return parseVorbisCommentSignal(data, offset, payloadSize);
218+
}
219+
if ("opus".equals(codec)) {
220+
return parseOpusCommentSignal(data, offset, payloadSize);
221+
}
222+
return false;
223+
}
224+
225+
private static boolean parseVorbisCommentSignal(byte[] data, int offset, int payloadSize) {
226+
if (payloadSize < 11) {
227+
return false;
228+
}
229+
if (data[offset] != 0x03
230+
|| data[offset + 1] != 'v'
231+
|| data[offset + 2] != 'o'
232+
|| data[offset + 3] != 'r'
233+
|| data[offset + 4] != 'b'
234+
|| data[offset + 5] != 'i'
235+
|| data[offset + 6] != 's') {
236+
return false;
237+
}
238+
return parseCommentListForSignals(data, offset + 7, offset + payloadSize, true);
239+
}
240+
241+
private static boolean parseOpusCommentSignal(byte[] data, int offset, int payloadSize) {
242+
if (payloadSize < 12) {
243+
return false;
244+
}
245+
if (data[offset] != 'O'
246+
|| data[offset + 1] != 'p'
247+
|| data[offset + 2] != 'u'
248+
|| data[offset + 3] != 's'
249+
|| data[offset + 4] != 'T'
250+
|| data[offset + 5] != 'a'
251+
|| data[offset + 6] != 'g'
252+
|| data[offset + 7] != 's') {
253+
return false;
254+
}
255+
return parseCommentListForSignals(data, offset + 8, offset + payloadSize, false);
256+
}
257+
258+
private static boolean parseCommentListForSignals(byte[] data, int pos, int end, boolean vorbis) {
259+
int vendorLen = readU32LEAt(data, pos, end);
260+
if (vendorLen < 0) {
261+
return false;
262+
}
263+
pos += 4;
264+
if (pos + vendorLen > end) {
265+
return false;
266+
}
267+
pos += vendorLen;
268+
269+
int commentCount = readU32LEAt(data, pos, end);
270+
if (commentCount < 0) {
271+
return false;
272+
}
273+
pos += 4;
274+
275+
for (int i = 0; i < commentCount; i++) {
276+
int commentLen = readU32LEAt(data, pos, end);
277+
if (commentLen < 0) {
278+
return false;
279+
}
280+
pos += 4;
281+
if (pos + commentLen > end) {
282+
return false;
283+
}
284+
int eq = -1;
285+
for (int j = 0; j < commentLen; j++) {
286+
if (data[pos + j] == '=') {
287+
eq = j;
288+
break;
289+
}
290+
}
291+
if (eq > 0 && hasSignalCommentKey(data, pos, eq, vorbis)) {
292+
return true;
293+
}
294+
pos += commentLen;
295+
}
296+
return false;
297+
}
298+
299+
private static int readU32LEAt(byte[] data, int offset, int endExclusive) {
300+
if (offset < 0 || offset + 4 > endExclusive || offset + 4 > data.length) {
301+
return -1;
302+
}
303+
long value = (data[offset] & 0xFFL)
304+
| ((data[offset + 1] & 0xFFL) << 8)
305+
| ((data[offset + 2] & 0xFFL) << 16)
306+
| ((data[offset + 3] & 0xFFL) << 24);
307+
if (value > Integer.MAX_VALUE) {
308+
return -1;
309+
}
310+
return (int) value;
311+
}
312+
313+
private static boolean hasSignalCommentKey(byte[] data, int keyOffset, int keyLen, boolean vorbis) {
314+
if (startsWithAsciiIgnoreCase(data, keyOffset, keyLen, "REPLAYGAIN_")) {
315+
return true;
316+
}
317+
if (startsWithAsciiIgnoreCase(data, keyOffset, keyLen, "TRACKTOTAL")) {
318+
return true;
319+
}
320+
if (startsWithAsciiIgnoreCase(data, keyOffset, keyLen, "ALBUMGAIN")) {
321+
return true;
322+
}
323+
return !vorbis
324+
&& (startsWithAsciiIgnoreCase(data, keyOffset, keyLen, "R128_TRACK_GAIN")
325+
|| startsWithAsciiIgnoreCase(data, keyOffset, keyLen, "R128_ALBUM_GAIN"));
326+
}
327+
328+
private static boolean startsWithAsciiIgnoreCase(byte[] data, int offset, int len, String prefix) {
329+
if (prefix == null || prefix.isEmpty() || len < prefix.length()) {
330+
return false;
331+
}
332+
for (int i = 0; i < prefix.length(); i++) {
333+
int b = data[offset + i] & 0xFF;
334+
char c = (char) (b >= 'a' && b <= 'z' ? (b - 32) : b);
335+
char p = Character.toUpperCase(prefix.charAt(i));
336+
if (c != p) {
337+
return false;
338+
}
339+
}
340+
return true;
341+
}
342+
159343
private record AudioIdent(
160344
String codec,
161345
int sampleRate,

0 commit comments

Comments
 (0)