From 78568e5d5abe0dae8818ac331ecc46fb0a5b0a0a Mon Sep 17 00:00:00 2001 From: austnwil Date: Fri, 17 Oct 2025 14:41:13 -0700 Subject: [PATCH 01/22] Begin implementation of ByteArrayBytecodeGenerator - Implement readTextReference - Implement readBytesReference - Implement basic refill --- .../amazon/ion/bytecode/BytecodeEmitter.kt | 10 +++ .../bin11/ByteArrayBytecodeGenerator11.kt | 90 +++++++++++++++++-- 2 files changed, 95 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt index bb320365e..7297fb78b 100644 --- a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt +++ b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt @@ -87,4 +87,14 @@ internal object BytecodeEmitter { fun emitShortTimestampReference(destination: BytecodeBuffer, precisionAndOffsetMode: Int, dataPosition: Int) { destination.add2(Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(precisionAndOffsetMode), dataPosition) } + + @JvmStatic + fun emitRefill(destination: BytecodeBuffer) { + destination.add(Instructions.I_REFILL) + } + + @JvmStatic + fun emitEndOfInput(destination: BytecodeBuffer) { + destination.add(Instructions.I_END_OF_INPUT) + } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index e28c6ab48..76b0788e9 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -2,14 +2,94 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11 +import com.amazon.ion.Decimal +import com.amazon.ion.IonException +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.BytecodeEmitter +import com.amazon.ion.bytecode.BytecodeGenerator +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTable +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.ByteSlice +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.unsignedToInt +import com.amazon.ion.impl.bin.utf8.Utf8StringDecoder +import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import java.math.BigInteger +import java.nio.ByteBuffer @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "constructor does not make a defensive copy of source as a performance optimization") -internal class ByteArrayBytecodeGenerator11 -@SuppressFBWarnings("URF_UNREAD_FIELD", justification = "field will be read once this class is implemented") -constructor( +internal class ByteArrayBytecodeGenerator11( private val source: ByteArray, private var i: Int, -) { - // TODO: This should implement BytecodeGenerator +) : BytecodeGenerator { + private val decoder: Utf8StringDecoder = Utf8StringDecoderPool.getInstance().orCreate + + override fun refill( + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symTab: Array + ) { + // For now, write a single instruction to the bytecode buffer, plus the refill or EOF instruction. + // The strategy here will need to be revisited. + val opcode = source[i++].unsignedToInt() + val handler = OpcodeHandlerTable.handler(opcode) + i += handler.convertOpcodeToBytecode( + opcode, + source, + i, + destination, + constantPool, + macroSrc, + macroIndices, + symTab + ) + + // Emit the refill or end of input instruction so caller knows what to do once they run out + // of bytecode in the buffer. + if (i < source.size) { + BytecodeEmitter.emitRefill(destination) + } else { + BytecodeEmitter.emitEndOfInput(destination) + } + } + + override fun readBigIntegerReference(position: Int, length: Int): BigInteger { + TODO("Not yet implemented") + } + + override fun readDecimalReference(position: Int, length: Int): Decimal { + TODO("Not yet implemented") + } + + override fun readShortTimestampReference(position: Int, opcode: Int): Timestamp { + TODO("Not yet implemented") + } + + override fun readTimestampReference(position: Int, length: Int): Timestamp { + TODO("Not yet implemented") + } + + override fun readTextReference(position: Int, length: Int): String { + val buffer = ByteBuffer.wrap(source, position, length) + return decoder.decode(buffer, length) + } + + override fun readBytesReference(position: Int, length: Int): ByteSlice { + return ByteSlice(source, position, position + length - 1) + } + + override fun ionMinorVersion(): Int { + return 1 + } + + override fun getGeneratorForMinorVersion(minorVersion: Int): BytecodeGenerator { + return when (minorVersion) { + 1 -> ByteArrayBytecodeGenerator11(source, i) + // TODO: update with ByteArrayBytecodeGenerator10 once it implements BytecodeGenerator + else -> throw IonException("Minor version $minorVersion not yet implemented for ByteArray-backed data sources.") + } + } } From b92126979578ee58efd90ab4e732bcbc5ef06691 Mon Sep 17 00:00:00 2001 From: austnwil Date: Fri, 17 Oct 2025 15:25:51 -0700 Subject: [PATCH 02/22] Implement readBigIntegerReference --- .../bin11/ByteArrayBytecodeGenerator11.kt | 3 +- .../bin11/bytearray/PrimitiveDecoder.kt | 11 +++ .../bin11/bytearray/PrimitiveDecoderTest.kt | 78 ++++--------------- 3 files changed, 28 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 76b0788e9..cb408a366 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -8,6 +8,7 @@ import com.amazon.ion.Timestamp import com.amazon.ion.bytecode.BytecodeEmitter import com.amazon.ion.bytecode.BytecodeGenerator import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTable +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsBigInteger import com.amazon.ion.bytecode.util.AppendableConstantPoolView import com.amazon.ion.bytecode.util.ByteSlice import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -57,7 +58,7 @@ internal class ByteArrayBytecodeGenerator11( } override fun readBigIntegerReference(position: Int, length: Int): BigInteger { - TODO("Not yet implemented") + return readFixedIntAsBigInteger(source, position, length) } override fun readDecimalReference(position: Int, length: Int): Decimal { diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoder.kt index ece6e9336..c2f3453aa 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoder.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoder.kt @@ -73,6 +73,17 @@ internal object PrimitiveDecoder { } } + @JvmStatic + fun readFixedIntAsBigInteger(source: ByteArray, start: Int, length: Int): BigInteger { + // TODO: ion-java#1114 + if (source.size < start + length) throw IonException("Incomplete data: start=$start, length=$length, limit=${source.size}") + val bytes = ByteArray(length) + for (i in 0 until length) { + bytes[i] = source[start + length - i - 1] + } + return BigInteger(bytes) + } + @JvmStatic fun readFixedUInt16(source: ByteArray, position: Int): UShort { // TODO: ion-java#1114 diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt index 13ae8c482..7ffd79349 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt @@ -16,12 +16,12 @@ import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_INT_READ_WRITE_CASES import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_UINT_READ_ONLY_CASES import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_UINT_READ_WRITE_CASES import com.amazon.ion.TextToBinaryUtils.binaryStringToByteArray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.lengthOfFlexIntOrUIntAt import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt24AsInt import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsBigInteger import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.TestInstance @@ -67,63 +67,11 @@ class PrimitiveDecoderTest { } @ParameterizedTest - @CsvSource( - " 64, 1, 40", - " 3257, 2, B9 0C", - " -3257, 2, 47 F3", - " 78, 1, 4E", - " -6407, 2, F9 E6", - " 0, 1, 00", - " 1, 1, 01", - " 2, 1, 02", - " 3, 1, 03", - " 4, 1, 04", - " 5, 1, 05", - " 14, 1, 0E", - " 127, 1, 7F", - " 128, 2, 80 00", // length boundary - " 729, 2, D9 02", - " 32767, 2, FF 7F", - " 32768, 3, 00 80 00", // length boundary - " 8388607, 3, FF FF 7F", - " 8388608, 4, 00 00 80 00", // length boundary - " ${Int.MAX_VALUE}, 4, FF FF FF 7F", - " 2147483648, 5, 00 00 00 80 00", // length boundary - " 549755813887, 5, FF FF FF FF 7F", - " 549755813888, 6, 00 00 00 00 80 00", // length boundary - " 140737488355327, 6, FF FF FF FF FF 7F", - " 140737488355328, 7, 00 00 00 00 00 80 00", // length boundary - " 36028797018963967, 7, FF FF FF FF FF FF 7F", - " 36028797018963968, 8, 00 00 00 00 00 00 80 00", // length boundary - " ${Long.MAX_VALUE}, 8, FF FF FF FF FF FF FF 7F", - - " -1, 1, FF", - " -2, 1, FE", - " -3, 1, FD", - " -14, 1, F2", - " -128, 1, 80", - " -129, 2, 7F FF", // length boundary - " -729, 2, 27 FD", - " -32768, 2, 00 80", - " -32769, 3, FF 7F FF", // length boundary - " -8388608, 3, 00 00 80", - " -8388609, 4, FF FF 7F FF", // length boundary - " ${Int.MIN_VALUE}, 4, 00 00 00 80", - " -2147483649, 5, FF FF FF 7F FF", // length boundary - " -549755813888, 5, 00 00 00 00 80", - " -549755813889, 6, FF FF FF FF 7F FF", // length boundary - " -140737488355328, 6, 00 00 00 00 00 80", - " -140737488355329, 7, FF FF FF FF FF 7F FF", // length boundary - " -36028797018963968, 7, 00 00 00 00 00 00 80", - " -36028797018963969, 8, FF FF FF FF FF FF 7F FF", // length boundary - " ${Long.MIN_VALUE}, 8, 00 00 00 00 00 00 00 80", - ) - fun testReadFixedIntAsLong(expectedValue: Long, length: Int, input: String) { - val data = if (input.all { it == '0' || it == '1' }) input.binaryStringToByteArray() else input.hexStringToByteArray() - - val value = readFixedIntAsLong(data, 0, data.size) - - assertEquals(expectedValue, value) + @MethodSource(FIXED_INT_64_CASES) + fun testReadFixedInt64(expected: Long, bits: String) { + val data = bits.binaryStringToByteArray() + val actual = PrimitiveDecoder.readFixedInt64(data, 0) + assertEquals(expected, actual) } @ParameterizedTest @@ -153,11 +101,15 @@ class PrimitiveDecoderTest { } @ParameterizedTest - @MethodSource(FIXED_INT_64_CASES) - fun testReadFixedInt64(expected: Long, bits: String) { - val data = bits.binaryStringToByteArray() - val actual = PrimitiveDecoder.readFixedInt64(data, 0) - assertEquals(expected, actual) + @MethodSource(FIXED_INT_8_CASES, FIXED_INT_16_CASES, FIXED_INT_24_CASES, FIXED_INT_32_CASES, FIXED_INT_64_CASES) + @CsvSource( + " 9223372036854775808, 00000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000 00000000", + "-9223372036854775809, 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111 11111111", + ) + fun testReadFixedIntAsBigInteger(expectedValue: BigInteger, input: String) { + val data = input.binaryStringToByteArray() + val value = readFixedIntAsBigInteger(data, 0, data.size) + assertEquals(expectedValue, value) } @ParameterizedTest From c56816b833f4de041de8fc438527e6b871d55f7f Mon Sep 17 00:00:00 2001 From: austnwil Date: Mon, 20 Oct 2025 15:09:31 -0700 Subject: [PATCH 03/22] Implement readShortTimestampReference --- .../bin11/ByteArrayBytecodeGenerator11.kt | 11 +- .../bin11/bytearray/ShortTimestampDecoder.kt | 221 ++++++++++++++++++ .../bytearray/ShortTimestampDecoderTest.kt | 37 +++ 3 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index cb408a366..4ea11e845 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -9,6 +9,7 @@ import com.amazon.ion.bytecode.BytecodeEmitter import com.amazon.ion.bytecode.BytecodeGenerator import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTable import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsBigInteger +import com.amazon.ion.bytecode.bin11.bytearray.ShortTimestampDecoder import com.amazon.ion.bytecode.util.AppendableConstantPoolView import com.amazon.ion.bytecode.util.ByteSlice import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -65,8 +66,16 @@ internal class ByteArrayBytecodeGenerator11( TODO("Not yet implemented") } + // TODO: right now, this function expects the opcode parameter to be the low nibble of the actual opcode (0x0-0xC). + // This is currently what the ShortTimestampOpcodeHandler writes to the I_SHORT_TIMESTAMP_REF bytecode. This might + // not be correct behavior. If this is acceptable, this parameter should probably be renamed, since it isn't the + // actual opcode of the encoded timestamp. If this isn't, then ShortTimestampOpcodeHandler needs fixed. + // The justification for this behavior is that ShortTimestampOpcodeHandler already separates the low nibble of the + // opcode for use in a lookup table, so we might as well propagate that value to the bytecode instead of the full + // opcode - especially since, in its current implementation, ShortTimestampDecoder.readTimestamp() also uses the + // low nibble in a lookup table. override fun readShortTimestampReference(position: Int, opcode: Int): Timestamp { - TODO("Not yet implemented") + return ShortTimestampDecoder.readTimestamp(source, position, opcode) } override fun readTimestampReference(position: Int, length: Int): Timestamp { diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt new file mode 100644 index 000000000..fa82208c6 --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt @@ -0,0 +1,221 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong +import java.math.BigDecimal + +/** + * Helper class for decoding the various short timestamp encoding variants from a [ByteArray]. + */ +internal object ShortTimestampDecoder { + private const val MASK_4 = 0b1111 + private const val MASK_5 = 0b11111 + private const val MASK_6 = 0b111111 + private const val MASK_7 = 0b1111111 + private const val MASK_4L = 0b1111L + private const val MASK_5L = 0b11111L + private const val MASK_6L = 0b111111L + private const val MASK_7L = 0b1111111L + private const val MASK_10L = 0b1111111111L + private const val MASK_20L = 0b11111111111111111111L + private const val MASK_30L = 0b111111111111111111111111111111L + private const val MASK_UTC_OR_UNKNOWN_BIT = 0b1000_00000000_00000000_00000000 + private const val MASK_UTC_OR_UNKNOWN_BITL = 0b1000_00000000_00000000_00000000L + + private val opcodeToDecoderFunctionTable = arrayOf( + ShortTimestampDecoder::readTimestampToYear, + ShortTimestampDecoder::readTimestampToMonth, + ShortTimestampDecoder::readTimestampToDay, + ShortTimestampDecoder::readTimestampToMinuteUTCOrUnknown, + ShortTimestampDecoder::readTimestampToSecondUTCOrUnknown, + ShortTimestampDecoder::readTimestampToMillisecondUTCOrUnknown, + ShortTimestampDecoder::readTimestampToMicrosecondUTCOrUnknown, + ShortTimestampDecoder::readTimestampToNanosecondUTCOrUnknown, + ShortTimestampDecoder::readTimestampToMinuteWithOffset, + ShortTimestampDecoder::readTimestampToSecondWithOffset, + ShortTimestampDecoder::readTimestampToMillisecondWithOffset, + ShortTimestampDecoder::readTimestampToMicrosecondWithOffset, + ShortTimestampDecoder::readTimestampToNanosecondWithOffset, + ) + + fun readTimestampToYear(source: ByteArray, position: Int): Timestamp { + val year = readFixedInt8AsShort(source, position).toInt() + return Timestamp.forYear(year + 1970) + } + + fun readTimestampToMonth(source: ByteArray, position: Int): Timestamp { + val yearAndMonth = readFixedInt16(source, position).toInt() + val year = yearAndMonth.and(MASK_7) + val month = yearAndMonth.shr(7) + + return Timestamp.forMonth(year + 1970, month) + } + + fun readTimestampToDay(source: ByteArray, position: Int): Timestamp { + val yearMonthAndDay = readFixedInt16(source, position).toInt() + val year = yearMonthAndDay.and(MASK_7) + val month = yearMonthAndDay.shr(7).and(MASK_4) + val day = yearMonthAndDay.shr(11) + + return Timestamp.forDay(year + 1970, month, day) + } + + fun readTimestampToMinuteUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedInt32(source, position) + val year = data.and(MASK_7) + val month = data.shr(7).and(MASK_4) + val day = data.shr(11).and(MASK_5) + val hour = data.shr(16).and(MASK_5) + val minute = data.shr(21).and(MASK_6) + val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BIT) != 0 + + return Timestamp.forMinute(year + 1970, month, day, hour, minute, if (isUTC) 0 else null) + } + + fun readTimestampToSecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val second = data.shr(28).and(MASK_6L).toInt() + val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L + + return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, if (isUTC) 0 else null) + } + + fun readTimestampToMillisecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 6) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val second = data.shr(28).and(MASK_6L) + val fractionalSecond = data.shr(34).and(MASK_10L) + val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToMicrosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 7) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val second = data.shr(28).and(MASK_6L) + val fractionalSecond = data.shr(34).and(MASK_20L) + val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToNanosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val second = data.shr(28).and(MASK_6L) + val fractionalSecond = data.ushr(34).and(MASK_30L) + val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToMinuteWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val offset = data.shr(27).and(MASK_7L).toInt() + + return Timestamp.forMinute(year + 1970, month, day, hour, minute, (offset - 56) * 15) + } + + fun readTimestampToSecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val offset = data.shr(27).and(MASK_7L).toInt() + val second = data.shr(34).and(MASK_6L).toInt() + + return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, (offset - 56) * 15) + } + + fun readTimestampToMillisecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 7) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val offset = data.shr(27).and(MASK_7L).toInt() + val second = data.shr(34).and(MASK_6L) + val fractionalSecond = data.shr(40).and(MASK_10L) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + fun readTimestampToMicrosecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val offset = data.shr(27).and(MASK_7L).toInt() + val second = data.shr(34).and(MASK_6L) + val fractionalSecond = data.shr(40).and(MASK_20L) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + fun readTimestampToNanosecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val highFractionalSecondByte = readFixedInt8AsShort(source, position + 8).toLong().and(MASK_6L) + val year = data.and(MASK_7L).toInt() + val month = data.shr(7).and(MASK_4L).toInt() + val day = data.shr(11).and(MASK_5L).toInt() + val hour = data.shr(16).and(MASK_5L).toInt() + val minute = data.shr(21).and(MASK_6L).toInt() + val offset = data.shr(27).and(MASK_7L).toInt() + val second = data.shr(34).and(MASK_6L) + val fractionalSecond = data.ushr(40).or(highFractionalSecondByte.shl(24)) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + fun readTimestamp(source: ByteArray, position: Int, precisionAndOffsetMode: Int): Timestamp { + // TODO: calling function references like this might be slower than just using a conditional or other solutions. + // Might be worth looking into. + val decoder = opcodeToDecoderFunctionTable[precisionAndOffsetMode] + return decoder(source, position) + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt new file mode 100644 index 000000000..a051b3719 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt @@ -0,0 +1,37 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +class ShortTimestampDecoderTest { + @ParameterizedTest + @CsvSource( + "80 35, 2023T", + "81 35 05, 2023-10T", + "82 35 7D, 2023-10-15T", + "83 35 7D CB 0A, 2023-10-15T11:22Z", + "84 35 7D CB 1A 02, 2023-10-15T11:22:33Z", + "84 35 7D CB 12 02, 2023-10-15T11:22:33-00:00", + "85 35 7D CB 12 F2 06, 2023-10-15T11:22:33.444-00:00", + "86 35 7D CB 12 2E 22 1B, 2023-10-15T11:22:33.444555-00:00", + "87 35 7D CB 12 4A 86 FD 69, 2023-10-15T11:22:33.444555666-00:00", + "88 35 7D CB EA 01, 2023-10-15T11:22+01:15", + "89 35 7D CB EA 85, 2023-10-15T11:22:33+01:15", + "8A 35 7D CB EA 85 BC 01, 2023-10-15T11:22:33.444+01:15", + "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", + "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", + ) + fun `short timestamps are decoded correctly`(bytecode: String, expectedValue: String) { + val data = bytecode.hexStringToByteArray() + val opcode = data[0].unsignedToInt() + val timestamp = ShortTimestampDecoder.readTimestamp(data, 1, opcode and 0xF) + val expectedTimestamp = Timestamp.valueOf(expectedValue.trim()) + assertEquals(expectedTimestamp, timestamp) + } +} From b5b0c29189b715d562943b0c544f50bc272e7a5c Mon Sep 17 00:00:00 2001 From: austnwil Date: Mon, 20 Oct 2025 15:10:25 -0700 Subject: [PATCH 04/22] Rename UTF8 decoder --- .../amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 4ea11e845..58f236d9a 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -25,7 +25,7 @@ internal class ByteArrayBytecodeGenerator11( private val source: ByteArray, private var i: Int, ) : BytecodeGenerator { - private val decoder: Utf8StringDecoder = Utf8StringDecoderPool.getInstance().orCreate + private val utf8Decoder: Utf8StringDecoder = Utf8StringDecoderPool.getInstance().orCreate override fun refill( destination: BytecodeBuffer, @@ -84,7 +84,7 @@ internal class ByteArrayBytecodeGenerator11( override fun readTextReference(position: Int, length: Int): String { val buffer = ByteBuffer.wrap(source, position, length) - return decoder.decode(buffer, length) + return utf8Decoder.decode(buffer, length) } override fun readBytesReference(position: Int, length: Int): ByteSlice { From d60554c44ce18774209b4684ae46b1e298c5ab33 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 22 Oct 2025 15:04:14 -0700 Subject: [PATCH 05/22] Add tests; fix bug in readBytesReference --- .../bin11/ByteArrayBytecodeGenerator11.kt | 2 +- .../java/com/amazon/ion/TextToBinaryUtils.kt | 43 ++ .../bin11/ByteArrayBytecodeGenerator11Test.kt | 96 ++++ .../ion/bytecode/bin11/OpcodeTestCases.kt | 411 ++++++++++++++++++ 4 files changed, 551 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 58f236d9a..c557e20ea 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -88,7 +88,7 @@ internal class ByteArrayBytecodeGenerator11( } override fun readBytesReference(position: Int, length: Int): ByteSlice { - return ByteSlice(source, position, position + length - 1) + return ByteSlice(source, position, position + length) } override fun ionMinorVersion(): Int { diff --git a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt index fcd4dc9a6..76fb252d4 100644 --- a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt +++ b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt @@ -24,6 +24,23 @@ object TextToBinaryUtils { return bytesAsBytes } + /** + * Converts a string of octets in the given radix to an int array. Octets must be separated by a space. + * @param octetString the string of space-separated octets. + * @param radix the radix of the octets in the string. + * @return a new int array. + */ + @JvmStatic + private fun octetStringToIntArray(octetString: String, radix: Int): IntArray { + if (octetString.isEmpty()) return IntArray(0) + val intsAsStrings = octetString.split(" +".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + val intsAsInts = IntArray(intsAsStrings.size) + for (i in intsAsInts.indices) { + intsAsInts[i] = intsAsStrings[i].toInt(radix) + } + return intsAsInts + } + /** * Converts a string of binary octets, such as "10010111 00010011", to a byte array. */ @@ -64,4 +81,30 @@ object TextToBinaryUtils { fun ByteArray.byteArrayToBitString(): String { return this.joinToString(" ") { it.toUByte().toString(2).padStart(8, '0') } } + + /** + * Converts a byte array to a string of hex bytes, such as "A5 0F EC 52". + * The purpose of this method is to make it easier to read and write test assertions. + */ + @JvmStatic + fun ByteArray.byteArrayToHexString(): String { + return this.joinToString(" ") { it.toUByte().toString(16).padStart(2, '0') } + } + + /** + * Converts a string of decimal integers, such as "105 -9349549 0 -12 99999", to an int array. + */ + @JvmStatic + fun String.decimalStringToIntArray(): IntArray { + return octetStringToIntArray(this, 10) + } + + /** + * Helper function for generating FlexUInt hex strings from an unsigned integer. Useful for test + * cases that programmatically generate length-prefixed payloads. + */ + @JvmStatic + fun Int.toSingleHexByte(): String { + return this.toUByte().toString(16).padStart(2, '0') + } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt new file mode 100644 index 000000000..56eb5247c --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -0,0 +1,96 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11 + +import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT16_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT32_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT64_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT64_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.LOB_REFERENCE_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.NULL_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.STRING_REFERENCE_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource + +class ByteArrayBytecodeGenerator11Test { + + @ParameterizedTest + @MethodSource( + BOOLEAN_OPCODE_CASES, NULL_OPCODE_CASES, TYPED_NULL_OPCODE_CASES, FLOAT0_OPCODE_CASES, + FLOAT16_OPCODE_CASES, FLOAT32_OPCODE_CASES, FLOAT64_OPCODE_CASES, SHORT_TIMESTAMP_OPCODE_CASES, + REFERENCE_OPCODE_CASES, INT16_EMITTING_OPCODE_CASES, INT32_EMITTING_OPCODE_CASES, INT64_EMITTING_OPCODE_CASES, + STRING_REFERENCE_OPCODE_CASES, LOB_REFERENCE_OPCODE_CASES + ) + fun `generator produces correct bytecode for all supported opcodes`(inputBytesString: String, expectedBytecodeString: String) { + val inputData = inputBytesString.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(inputData, 0) + + generator.shouldGenerate( + intArrayOf( + *replacePositionTemplates(expectedBytecodeString, 0).decimalStringToIntArray(), + Instructions.I_END_OF_INPUT + ) + ) + } + + @ParameterizedTest + @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) + fun `generator can read short timestamp references`(encodedTimestampBytes: String, expectedBytecodeString: String, expectedTimestampString: String) { + val timestampReferenceBytes = encodedTimestampBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(timestampReferenceBytes, 0) + val bytecode = BytecodeBuffer() + generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) + + val timestampPrecisionAndOffsetMode = Instructions.getData(bytecode.get(0)) + val timestampPosition = bytecode.get(1) + val expectedTimestamp = Timestamp.valueOf(expectedTimestampString) + val readTimestamp = generator.readShortTimestampReference(timestampPosition, timestampPrecisionAndOffsetMode) + assertEquals(expectedTimestamp, readTimestamp) + } + + @ParameterizedTest + @MethodSource(STRING_REFERENCE_OPCODE_CASES) + fun `generator can read string references`(encodedStringBytes: String, expectedBytecodeString: String, expectedString: String) { + val stringReferenceBytes = encodedStringBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(stringReferenceBytes, 0) + val bytecode = BytecodeBuffer() + generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) + + val stringLength = Instructions.getData(bytecode.get(0)) + val stringPosition = bytecode.get(1) + val readString = generator.readTextReference(stringPosition, stringLength) + assertEquals(expectedString, readString) + } + + @ParameterizedTest + @MethodSource(LOB_REFERENCE_OPCODE_CASES) + fun `generator can read lob references`(encodedLobBytes: String, expectedBytecodeString: String, expectedLobBytes: String) { + val lobReferenceBytes = encodedLobBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(lobReferenceBytes, 0) + val bytecode = BytecodeBuffer() + generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) + + val lobLength = Instructions.getData(bytecode.get(0)) + val lobPosition = bytecode.get(1) + val expectedLob = expectedLobBytes.hexStringToByteArray() + val readLob = generator.readBytesReference(lobPosition, lobLength).newByteArray() + assertArrayEquals(expectedLob, readLob) + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt new file mode 100644 index 000000000..37b8dcd52 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -0,0 +1,411 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11 + +import com.amazon.ion.TextToBinaryUtils.byteArrayToHexString +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.TextToBinaryUtils.toSingleHexByte +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.impl.bin.PrimitiveEncoder +import org.junit.jupiter.params.provider.Arguments +import java.nio.charset.StandardCharsets + +/** + * Test cases for every binary 1.1 opcode supported by the bytecode generator + */ +object OpcodeTestCases { + + private const val THIS_NAME = "com.amazon.ion.bytecode.bin11.OpcodeTestCases" + + @JvmStatic + fun replacePositionTemplates(string: String, position: Int): String { + return Regex("%pos:(\\d+)%").replace(string) { matchResult -> + (matchResult.groups[1]?.value!!.toInt() + position).toString() + } + } + + const val BOOLEAN_OPCODE_CASES = "$THIS_NAME#booleanOpcodeCases" + + @JvmStatic + fun booleanOpcodeCases() = listOf( + "6E, ${Instructions.I_BOOL.packInstructionData(1)}", + "6F, ${Instructions.I_BOOL.packInstructionData(0)}", + ).toArguments() + + const val NULL_OPCODE_CASES = "$THIS_NAME#nullOpcodeCases" + + @JvmStatic + fun nullOpcodeCases() = listOf( + "8E, ${Instructions.I_NULL_NULL}", + ).toArguments() + + const val TYPED_NULL_OPCODE_CASES = "$THIS_NAME#typedNullOpcodeCases" + + @JvmStatic + fun typedNullOpcodeCases() = listOf( + "8F 01, ${Instructions.I_NULL_BOOL}", + "8F 02, ${Instructions.I_NULL_INT}", + "8F 03, ${Instructions.I_NULL_FLOAT}", + "8F 04, ${Instructions.I_NULL_DECIMAL}", + "8F 05, ${Instructions.I_NULL_TIMESTAMP}", + "8F 06, ${Instructions.I_NULL_STRING}", + "8F 07, ${Instructions.I_NULL_SYMBOL}", + "8F 08, ${Instructions.I_NULL_BLOB}", + "8F 09, ${Instructions.I_NULL_CLOB}", + "8F 0a, ${Instructions.I_NULL_LIST}", + "8F 0b, ${Instructions.I_NULL_SEXP}", + "8F 0c, ${Instructions.I_NULL_STRUCT}", + ).toArguments() + + const val SHORT_TIMESTAMP_OPCODE_CASES = "$THIS_NAME#shortTimestampOpcodeCases" + + @JvmStatic + fun shortTimestampOpcodeCases() = listOf( + "80 35, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x0)} %pos:1%, 2023T", + "81 35 05, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x1)} %pos:1%, 2023-10T", + "82 35 7D, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x2)} %pos:1%, 2023-10-15T", + "83 35 7D CB 0A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x3)} %pos:1%, 2023-10-15T11:22Z", + "84 35 7D CB 1A 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x4)} %pos:1%, 2023-10-15T11:22:33Z", + "84 35 7D CB 12 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x4)} %pos:1%, 2023-10-15T11:22:33-00:00", + "85 35 7D CB 12 F2 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x5)} %pos:1%, 2023-10-15T11:22:33.444-00:00", + "86 35 7D CB 12 2E 22 1B, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x6)} %pos:1%, 2023-10-15T11:22:33.444555-00:00", + "87 35 7D CB 12 4A 86 FD 69, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x7)} %pos:1%, 2023-10-15T11:22:33.444555666-00:00", + "88 35 7D CB EA 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8)} %pos:1%, 2023-10-15T11:22+01:15", + "89 35 7D CB EA 85, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x9)} %pos:1%, 2023-10-15T11:22:33+01:15", + "8A 35 7D CB EA 85 BC 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xA)} %pos:1%, 2023-10-15T11:22:33.444+01:15", + "8B 35 7D CB EA 85 8B C8 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xB)} %pos:1%, 2023-10-15T11:22:33.444555+01:15", + "8C 35 7D CB EA 85 92 61 7F 1A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xC)} %pos:1%, 2023-10-15T11:22:33.444555666+01:15", + // TODO: add tests for max/min values, other extremes + ).toArguments() + + const val FLOAT0_OPCODE_CASES = "$THIS_NAME#float0OpcodeCases" + + @JvmStatic + fun float0OpcodeCases() = listOf( + "6A, ${Instructions.I_FLOAT_F32} 0", + ).toArguments() + + const val FLOAT16_OPCODE_CASES = "$THIS_NAME#float16OpcodeCases" + + @JvmStatic + fun float16OpcodeCases() = listOf( + "6B 01 00, ${Instructions.I_FLOAT_F32} 864026624", // smallest positive subnormal number + "6B FF 03, ${Instructions.I_FLOAT_F32} 947896320", // largest subnormal number + "6B 00 04, ${Instructions.I_FLOAT_F32} 947912704", // smallest positive normal number + "6B FF 7B, ${Instructions.I_FLOAT_F32} 1199562752", // largest normal number + "6B FF 3B, ${Instructions.I_FLOAT_F32} 1065345024", // largest number less than one + "6B 00 3C, ${Instructions.I_FLOAT_F32} 1065353216", + "6B 01 3C, ${Instructions.I_FLOAT_F32} 1065361408", // smallest number larger than one + + // Same as above, but negative + "6B 01 80, ${Instructions.I_FLOAT_F32} -1283457024", + "6B FF 83, ${Instructions.I_FLOAT_F32} -1199587328", + "6B 00 84, ${Instructions.I_FLOAT_F32} -1199570944", + "6B FF FB, ${Instructions.I_FLOAT_F32} -947920896", + "6B FF BB, ${Instructions.I_FLOAT_F32} -1082138624", + "6B 00 BC, ${Instructions.I_FLOAT_F32} -1082130432", + "6B 01 BC, ${Instructions.I_FLOAT_F32} -1082122240", + + "6B 00 00, ${Instructions.I_FLOAT_F32} 0", + "6B 00 80, ${Instructions.I_FLOAT_F32} -2147483648", + "6B 00 7C, ${Instructions.I_FLOAT_F32} 2139095040", + "6B 00 FC, ${Instructions.I_FLOAT_F32} -8388608", + "6B 01 7E, ${Instructions.I_FLOAT_F32} 2143297536", // quiet NaN + "6B 01 7C, ${Instructions.I_FLOAT_F32} 2139103232", // signaling NaN + "6B 01 FE, ${Instructions.I_FLOAT_F32} -4186112", // negative quiet NaN + "6B 01 FC, ${Instructions.I_FLOAT_F32} -8380416", // negative signaling NaN + "6B 53 7F, ${Instructions.I_FLOAT_F32} 2146066432", // another quiet NaN + "6B 53 FF, ${Instructions.I_FLOAT_F32} -1417216", // another negative quiet NaN + + "6B 00 C0, ${Instructions.I_FLOAT_F32} -1073741824", + "6B 55 35, ${Instructions.I_FLOAT_F32} 1051369472", + "6B 48 42, ${Instructions.I_FLOAT_F32} 1078525952" + ).toArguments() + + const val FLOAT32_OPCODE_CASES = "$THIS_NAME#float32OpcodeCases" + + @JvmStatic + fun float32OpcodeCases() = listOf( + // TODO: cross-check all this stuff one more time + "6C 01 00 00 00, ${Instructions.I_FLOAT_F32} 1", // smallest positive subnormal number + "6C FF FF 7F 00, ${Instructions.I_FLOAT_F32} 8388607", // largest subnormal number + "6C 00 00 80 00, ${Instructions.I_FLOAT_F32} 8388608", // smallest positive normal number + "6C FF FF 7F 7F, ${Instructions.I_FLOAT_F32} 2139095039", // largest normal number + "6C FF FF 7F 3F, ${Instructions.I_FLOAT_F32} 1065353215", // largest number less than one + "6C 00 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353216", + "6C 01 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353217", // smallest number larger than one + + // Same as above, but negative + "6C 01 00 00 80, ${Instructions.I_FLOAT_F32} -2147483647", + "6C FF FF 7F 80, ${Instructions.I_FLOAT_F32} -2139095041", + "6C 00 00 80 80, ${Instructions.I_FLOAT_F32} -2139095040", + "6C FF FF 7F FF, ${Instructions.I_FLOAT_F32} -8388609", + "6C FF FF 7F BF, ${Instructions.I_FLOAT_F32} -1082130433", + "6C 00 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130432", + "6C 01 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130431", + + "6C 00 00 00 00, ${Instructions.I_FLOAT_F32} 0", + "6C 00 00 00 80, ${Instructions.I_FLOAT_F32} -2147483648", + "6C 00 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095040", + "6C 00 00 80 FF, ${Instructions.I_FLOAT_F32} -8388608", + "6C 01 00 C0 7F, ${Instructions.I_FLOAT_F32} 2143289345", // quiet NaN + "6C 01 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095041", // signaling NaN + "6C 01 00 C0 FF, ${Instructions.I_FLOAT_F32} -4194303", // negative quiet NaN + "6C 01 00 80 FF, ${Instructions.I_FLOAT_F32} -8388607", // negative signaling NaN + + "6C 00 00 00 C0, ${Instructions.I_FLOAT_F32} -1073741824", + "6C AB AA AA 3E, ${Instructions.I_FLOAT_F32} 1051372203", + "6C DB 0F 49 40, ${Instructions.I_FLOAT_F32} 1078530011" + ).toArguments() + + const val FLOAT64_OPCODE_CASES = "$THIS_NAME#float64OpcodeCases" + + @JvmStatic + fun float64OpcodeCases() = listOf( + "6D 01 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 1", // smallest positive subnormal number + "6D FF FF FF FF FF FF 0F 00, ${Instructions.I_FLOAT_F64} 1048575 -1", // largest subnormal number + "6D 00 00 00 00 00 00 10 00, ${Instructions.I_FLOAT_F64} 1048576 0", // smallest positive normal number + "6D FF FF FF FF FF FF EF 7F, ${Instructions.I_FLOAT_F64} 2146435071 -1", // largest normal number + "6D FF FF FF FF FF FF EF 3F, ${Instructions.I_FLOAT_F64} 1072693247 -1", // largest number less than one + "6D 00 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 0", + "6D 01 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 1", // smallest number larger than one + "6D 02 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 2", // the second smallest number greater than 1 + + // Same as above, but negative + "6D 01 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 1", + "6D FF FF FF FF FF FF 0F 80, ${Instructions.I_FLOAT_F64} -2146435073 -1", + "6D 00 00 00 00 00 00 10 80, ${Instructions.I_FLOAT_F64} -2146435072 0", + "6D FF FF FF FF FF FF EF FF, ${Instructions.I_FLOAT_F64} -1048577 -1", + "6D FF FF FF FF FF FF EF BF, ${Instructions.I_FLOAT_F64} -1074790401 -1", + "6D 00 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 0", + "6D 01 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 1", + "6D 02 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 2", + + "6D 00 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 0", + "6D 00 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 0", + "6D 00 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 0", + "6D 00 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 0", + "6D 01 00 00 00 00 00 F8 7F, ${Instructions.I_FLOAT_F64} 2146959360 1", // quiet NaN + "6D 01 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 1", // signaling NaN + "6D 01 00 00 00 00 00 F8 FF, ${Instructions.I_FLOAT_F64} -524288 1", // negative quiet NaN + "6D 01 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 1", // negative signaling NaN + "6D FF FF FF FF FF FF FF 7F, ${Instructions.I_FLOAT_F64} 2147483647 -1", // another quiet NaN + "6D FF FF FF FF FF FF FF FF, ${Instructions.I_FLOAT_F64} -1 -1", // another negative quiet NaN + + "6D 00 00 00 00 00 00 00 C0, ${Instructions.I_FLOAT_F64} -1073741824 0", + "6D 55 55 55 55 55 55 D5 3F, ${Instructions.I_FLOAT_F64} 1070945621 1431655765", + "6D 18 2D 44 54 FB 21 09 40, ${Instructions.I_FLOAT_F64} 1074340347 1413754136" + ).toArguments() + + const val REFERENCE_OPCODE_CASES = "$THIS_NAME#referenceOpcodeCases" + + @JvmStatic + fun referenceOpcodeCases(): List { + val arguments = mutableListOf() + + val instructions = arrayOf( + Pair(Instructions.I_ANNOTATION_REF, 0x59), + Pair(Instructions.I_INT_REF, 0xF5), + Pair(Instructions.I_DECIMAL_REF, 0xF6), + Pair(Instructions.I_TIMESTAMP_REF, 0xF7), + Pair(Instructions.I_STRING_REF, 0xF8), + Pair(Instructions.I_SYMBOL_REF, 0xF9), + Pair(Instructions.I_BLOB_REF, 0xFE), + Pair(Instructions.I_CLOB_REF, 0xFF), + ) + + val testTemplates = listOf( + /* + FlexUInt length prefix for referenced payload + | Decimal payload length + | | Expected payload start position + | | | + | | | */ + "03, 1, 2", + "05, 2, 2", + "07, 3, 2", + "09, 4, 2", + "0B, 5, 2", + "1D, 14, 2", + "7F, 63, 2", + "81, 64, 2", + "FF, 127, 2", + "02 02, 128, 3", + "FE FF, 16383, 3", + "04 00 02, 16384, 4", + "FC FF FF, 2097151, 4", + "08 00 00 02, 2097152, 5", + "F8 FF FF 03, 4194303, 5", // maximum length of a payload + "01, 0, 2", // zero-length payload TODO: is this legal? + "00 18 00 00 00 00 00 00 00 00 00 00, 1, 13", // overlong encoding on the FlexUInt + ) + + instructions.forEach { (instruction, opcode) -> + testTemplates.forEach { + val (flexUIntStr, payloadLengthStr, expectedPayloadStartPosStr) = it.split(',') + val payloadLength = payloadLengthStr.trim().toInt() + val expectedPayloadStartPosition = expectedPayloadStartPosStr.trim().toInt() + val expectedBytecodeString = "${instruction.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" + + // Create a dummy payload for this value with all bytes set to zeros. + // Not actually looked at by this test, but simulates an encoded value the handler would actually + // encounter during parsing. + val payload = "00 ".repeat(payloadLength) + val inputBytes = "${opcode.toString(16).uppercase().padStart(2, '0')} $flexUIntStr $payload" + arguments.add(Arguments.of(inputBytes, expectedBytecodeString)) + } + } + + return arguments + } + + const val STRING_REFERENCE_OPCODE_CASES = "$THIS_NAME#stringReferenceOpcodeCases" + + @JvmStatic + fun stringReferenceOpcodeCases(): List { + val arguments = mutableListOf() + val testStrings = listOf( + "Hello world", + "\n\nhello\n\n", + "Love it! \uD83D\uDE0D❤\uFE0F\uD83D\uDC95\uD83D\uDE3B\uD83D\uDC96", + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`~!@#\$%^&*()-_=+[{]}\\|;:'\",<.>/?", + "Ἀνέβην δέ με σῖτος εὐρυβίοιο Ἰλιάδης τε καὶ Ὀδυσσείας καὶ Φοινικίων", + "", + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f", + " \tleading and trailing whitespace\u000c\r\n" + ) + + testStrings.forEach { + val utf8Buffer = StandardCharsets.UTF_8.encode(it) + val utf8Bytes = ByteArray(utf8Buffer.remaining()) + utf8Buffer.get(utf8Bytes) + val flexUIntStr = generateFlexUIntHexString(utf8Bytes.size) + val payloadLength = utf8Bytes.size + val expectedPayloadStartPosition = flexUIntStr.hexStringToByteArray().size + 1 + val expectedBytecodeString = "${Instructions.I_STRING_REF.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" + + val inputBytes = "F8 $flexUIntStr ${utf8Bytes.byteArrayToHexString()}" + arguments.add(Arguments.of(inputBytes, expectedBytecodeString, it)) + } + + return arguments + } + + const val LOB_REFERENCE_OPCODE_CASES = "$THIS_NAME#lobReferenceOpcodeCases" + + @JvmStatic + fun lobReferenceOpcodeCases(): List { + val arguments = mutableListOf() + val testLobBytes = listOf( + "00 00 00 00 00 00 00 00 00 00", + "FF FF FF FF FF FF FF FF FF FF", + "00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF", + "A5", + "" + ) + + testLobBytes.forEach { + val lobSize = it.hexStringToByteArray().size + val flexUIntStr = generateFlexUIntHexString(lobSize) + val expectedPayloadStartPosition = flexUIntStr.hexStringToByteArray().size + 1 + val expectedBytecodeString = "${Instructions.I_BLOB_REF.packInstructionData(lobSize)} %pos:$expectedPayloadStartPosition%" + + val inputBytes = "${OpCode.VARIABLE_LENGTH_BLOB.toSingleHexByte()} $flexUIntStr $it" + arguments.add(Arguments.of(inputBytes, expectedBytecodeString, it)) + } + + return arguments + } + + const val INT16_EMITTING_OPCODE_CASES = "$THIS_NAME#int16EmittingOpcodeCases" + + @JvmStatic + fun int16EmittingOpcodeCases() = listOf( + "60, ${Instructions.I_INT_I16.packInstructionData(0)}", // 0-byte + "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}", // 1-byte positive + "61 97, ${Instructions.I_INT_I16.packInstructionData(-105)}", // 1-byte negative + "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}", // 2-byte positive + "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}", // 2-byte negative + "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}", // 2-byte overlong 0 + "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}", // 2-byte overlong -1 + "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}", + "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}", // length boundary + "61 80, ${Instructions.I_INT_I16.packInstructionData(-128)}", + "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}", // length boundary + "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}", // max value + "62 00 80, ${Instructions.I_INT_I16.packInstructionData(-32768)}", // min value + ).toArguments() + + const val INT32_EMITTING_OPCODE_CASES = "$THIS_NAME#int32EmittingOpcodeCases" + + @JvmStatic + fun int32EmittingOpcodeCases() = listOf( + "63 40 42 0F, ${Instructions.I_INT_I32} 1000000", // 3-byte positive + "63 4F 34 8B, ${Instructions.I_INT_I32} -7654321", // 3-byte negative + "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827", // 4-byte positive + "64 57 97 13 E9, ${Instructions.I_INT_I32} -384592041", // 4-byte negative + "64 00 00 00 00, ${Instructions.I_INT_I32} 0", // 4-byte overlong 0 + "64 FF FF FF FF, ${Instructions.I_INT_I32} -1", // 4-byte overlong -1 + + "63 00 80 00, ${Instructions.I_INT_I32} 32768", // min positive, length boundary from i16 + "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607", + "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608", // length boundary + "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}", // max value + + "63 FF 7F FF, ${Instructions.I_INT_I32} -32769", // max negative, length boundary from i16 + "63 00 00 80, ${Instructions.I_INT_I32} -8388608", + "64 FF FF 7F FF, ${Instructions.I_INT_I32} -8388609", // length boundary + "64 00 00 00 80, ${Instructions.I_INT_I32} ${Int.MIN_VALUE}", // min value + ).toArguments() + + const val INT64_EMITTING_OPCODE_CASES = "$THIS_NAME#int64EmittingOpcodeCases" + + @JvmStatic + fun int64EmittingOpcodeCases() = listOf( + "65 6A 22 7C AB 5C, ${Instructions.I_INT_I64} 92 -1417928086, 398014030442", // 5-byte positive + "65 96 DD 83 54 A3, ${Instructions.I_INT_I64} -93 1417928086, -398014030442", // 5-byte negative + "66 C4 87 8F 09 97 5D, ${Instructions.I_INT_I64} 23959 160401348, 102903281846212", // 6-byte positive + "66 3C 78 70 F6 68 A2, ${Instructions.I_INT_I64} -23960 -160401348, -102903281846212", // 6-byte negative + "67 62 9A 42 56 83 77 10, ${Instructions.I_INT_I64} 1079171 1447205474, 4635005598997090", // 7-byte positive + "67 9E 65 BD A9 7C 88 EF, ${Instructions.I_INT_I64} -1079172 -1447205474, -4635005598997090", // 7-byte negative + "68 A4 F7 64 69 16 27 BF 31, ${Instructions.I_INT_I64} 834610966 1768224676, 3584626805621192612", // 8-byte positive + "68 5C 08 9B 96 E9 D8 40 CE, ${Instructions.I_INT_I64} -834610967 -1768224676, -3584626805621192612", // 8-byte negative + "68 00 00 00 00 00 00 00 00, ${Instructions.I_INT_I64} 0 0, 0", // 8-byte overlong 0 + "68 FF FF FF FF FF FF FF FF, ${Instructions.I_INT_I64} -1 -1, -1", // 8-byte overlong -1 + + "65 00 00 00 80 00, ${Instructions.I_INT_I64} 0 -2147483648, 2147483648", // min positive, length boundary from i32 + "65 FF FF FF FF 7F, ${Instructions.I_INT_I64} 127 -1, 549755813887", + "66 00 00 00 00 80 00, ${Instructions.I_INT_I64} 128 0, 549755813888", // length boundary + "66 FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 32767 -1, 140737488355327", + "67 00 00 00 00 00 80 00, ${Instructions.I_INT_I64} 32768 0, 140737488355328", // length boundary + "67 FF FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 8388607 -1, 36028797018963967", + "68 00 00 00 00 00 00 80 00, ${Instructions.I_INT_I64} 8388608 0, 36028797018963968", // length boundary + "68 FF FF FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 2147483647 -1, ${Long.MAX_VALUE}", // max value + + "65 FF FF FF 7F FF, ${Instructions.I_INT_I64} -1 2147483647, -2147483649", // max negative, length boundary from i32 + "65 00 00 00 00 80, ${Instructions.I_INT_I64} -128 0, -549755813888", + "66 FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -129 -1, -549755813889", // length boundary + "66 00 00 00 00 00 80, ${Instructions.I_INT_I64} -32768 0, -140737488355328", + "67 FF FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -32769 -1, -140737488355329", // length boundary + "67 00 00 00 00 00 00 80, ${Instructions.I_INT_I64} -8388608 0, -36028797018963968", + "68 FF FF FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -8388609 -1, -36028797018963969", // length boundary + "68 00 00 00 00 00 00 00 80, ${Instructions.I_INT_I64} -2147483648 0, ${Long.MIN_VALUE}", // min value + ).toArguments() + + private fun List.toArguments() = map { + Arguments.of(*it.split(',').map { it.trim() }.toTypedArray()) + } + + /** + * Helper function for generating FlexUInt hex strings from an unsigned integer. Useful for test + * cases that programmatically generate length-prefixed payloads. + */ + private fun generateFlexUIntHexString(value: Int): String { + val asLong = value.toLong() + val length = PrimitiveEncoder.flexUIntLength(asLong) + val bytes = ByteArray(length) + PrimitiveEncoder.writeFlexIntOrUIntInto(bytes, 0, asLong, length) + return bytes.byteArrayToHexString() + } +} From 115c3f090101628c3528b4656e97687c1aae22b0 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 22 Oct 2025 15:41:50 -0700 Subject: [PATCH 06/22] Add comments --- .../ion/bytecode/bin11/OpcodeTestCases.kt | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt index 37b8dcd52..2014421a3 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -12,12 +12,31 @@ import org.junit.jupiter.params.provider.Arguments import java.nio.charset.StandardCharsets /** - * Test cases for every binary 1.1 opcode supported by the bytecode generator + * Test cases for every binary 1.1 opcode supported by the bytecode generator. Test cases have the following components: + * - Hex string of input bytes to test + * - Decimal string of expected bytecode after compiling the input bytes + * - String representation of the value encoded by these bytes. This is opcode-specific and up to individual opcode + * handlers to parse and understand. Not every test case supplies this as of yet + * + * Bytecode can contain placeholders in the form `%pos:%`, which should be replaced with `` plus the + * index of the first byte of the binary in the input. For example, if a bytecode string contains `%pos:30%` and the + * test suite is writing the binary at index 0 of a byte array passed to a + * [ByteArrayBytecodeGenerator11], then the placeholder should be replaced with `30`, and if the binary were written at + * index 5, the placeholder should be replaced with `35`. This allows tests cases where the resulting bytecode is + * sensitive to the opcode's position in the input (e.g. `OP_*_REF` codes) to be reused across test cases that use them + * at different offsets. Pass the decimal string to [replacePositionTemplates] to parse these placeholders. */ object OpcodeTestCases { private const val THIS_NAME = "com.amazon.ion.bytecode.bin11.OpcodeTestCases" + /** + * Parse any placeholders in the form `%pos:%` in [string] to `` plus [position]. Reveals the + * correct bytecode for opcodes that are sensitive to their position in the input. + * + * [position] should be the index in a BytecodeGenerator's input at which you are writing the corresponding + * binary-encoded value. + */ @JvmStatic fun replacePositionTemplates(string: String, position: Int): String { return Regex("%pos:(\\d+)%").replace(string) { matchResult -> From 7de8637b7ca41e5ca02266adf0e0359a62b4cb8c Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 23 Oct 2025 09:50:14 -0700 Subject: [PATCH 07/22] Add test for bytecode generator; refactor opcode handler test cases - Opcode handler tests now use the common test cases shared by the bytecode generator tests - Added test to bytecode generator that tests long inputs containing all supported opcodes --- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 87 +++++- .../ion/bytecode/bin11/OpcodeTestCases.kt | 268 ++++++++++-------- .../bytearray/BooleanOpcodeHandlerTest.kt | 67 ++--- .../bin11/bytearray/FloatOpcodeHandlerTest.kt | 226 ++------------- .../bin11/bytearray/IntOpcodeHandlerTest.kt | 207 ++++---------- .../bin11/bytearray/NullOpcodeHandlerTest.kt | 37 +-- .../bin11/bytearray/OpcodeHandlerTestUtil.kt | 59 ++++ .../bytearray/ReferenceOpcodeHandlerTest.kt | 147 ++-------- .../bytearray/ShortTimestampDecoderTest.kt | 1 + .../ShortTimestampOpcodeHandlerTest.kt | 59 +--- .../bytearray/TypedNullOpcodeHandlerTest.kt | 47 +-- 11 files changed, 419 insertions(+), 786 deletions(-) create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 56eb5247c..d86d4b76a 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -5,27 +5,48 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT16_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT32_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT64_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_EMITTING_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT0_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT24_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT64_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT8_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.LOB_REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.NULL_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.STRING_REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.booleanOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float0OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float16OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float32OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float64OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int0OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int16OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int32OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int64EmittingOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int8OpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.lobReferenceOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.nullOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.referenceOpcodeCases import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.shortTimestampOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.stringReferenceOpcodeCases +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.typedNullOpcodeCases import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.util.BytecodeBuffer import com.amazon.ion.bytecode.util.ConstantPool import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -35,8 +56,8 @@ class ByteArrayBytecodeGenerator11Test { @MethodSource( BOOLEAN_OPCODE_CASES, NULL_OPCODE_CASES, TYPED_NULL_OPCODE_CASES, FLOAT0_OPCODE_CASES, FLOAT16_OPCODE_CASES, FLOAT32_OPCODE_CASES, FLOAT64_OPCODE_CASES, SHORT_TIMESTAMP_OPCODE_CASES, - REFERENCE_OPCODE_CASES, INT16_EMITTING_OPCODE_CASES, INT32_EMITTING_OPCODE_CASES, INT64_EMITTING_OPCODE_CASES, - STRING_REFERENCE_OPCODE_CASES, LOB_REFERENCE_OPCODE_CASES + REFERENCE_OPCODE_CASES, INT0_OPCODE_CASES, INT8_OPCODE_CASES, INT16_OPCODE_CASES, INT24_OPCODE_CASES, + INT32_OPCODE_CASES, INT64_EMITTING_OPCODE_CASES, STRING_REFERENCE_OPCODE_CASES, LOB_REFERENCE_OPCODE_CASES ) fun `generator produces correct bytecode for all supported opcodes`(inputBytesString: String, expectedBytecodeString: String) { val inputData = inputBytesString.hexStringToByteArray() @@ -50,6 +71,64 @@ class ByteArrayBytecodeGenerator11Test { ) } + /** + * Concatenates all the tests for all supported opcodes together into a single test string. This tests the REFILL + * behavior and validates that reference instructions that appear in the middle of the input are handled correctly. + */ + @Test + fun `generator produces correct bytecode for sequence of all supported opcodes`() { + var inputData = byteArrayOf() + var expectedBytecode = intArrayOf() + + val opcodeTests = booleanOpcodeCases() + + nullOpcodeCases() + + typedNullOpcodeCases() + + float0OpcodeCases() + + float16OpcodeCases() + + float32OpcodeCases() + + float64OpcodeCases() + + shortTimestampOpcodeCases() + + referenceOpcodeCases() + + int0OpcodeCases() + + int8OpcodeCases() + + int16OpcodeCases() + + int32OpcodeCases() + + int64EmittingOpcodeCases() + + stringReferenceOpcodeCases() + + lobReferenceOpcodeCases() + + // Build up the input bytes and expected bytecode from the individual opcode tests. Each compiled top-level + // value will be separated by I_REFILL. + var bytesRead = 0 + opcodeTests.forEach { args -> + val (inputBytesString: String, expectedBytecodeString) = args.get().map { it as String } + val nextBytes = inputBytesString.hexStringToByteArray() + inputData = inputData.plus(nextBytes) + val nextBytecode = replacePositionTemplates(expectedBytecodeString, bytesRead) + .decimalStringToIntArray() + expectedBytecode = expectedBytecode.plus(nextBytecode.plus(Instructions.I_REFILL)) + bytesRead += nextBytes.size + } + + // Replace the last REFILL added by the loop with an END_OF_INPUT. We don't want a refill followed by nothing + // and then EOF. + expectedBytecode[expectedBytecode.size - 1] = Instructions.I_END_OF_INPUT + + val generator = ByteArrayBytecodeGenerator11(inputData, 0) + val bytecodeBuffer = BytecodeBuffer() + val constantPool = ConstantPool() + val macroSrc = intArrayOf() + val macroIndices = intArrayOf() + val symbolTable = arrayOf() + var isEOF: Boolean + do { + generator.refill(bytecodeBuffer, constantPool, macroSrc, macroIndices, symbolTable) + isEOF = bytecodeBuffer.get(bytecodeBuffer.size() - 1) == Instructions.I_END_OF_INPUT + } while (!isEOF) + + assertEqualBytecode(expectedBytecode, bytecodeBuffer.toArray()) + } + @ParameterizedTest @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) fun `generator can read short timestamp references`(encodedTimestampBytes: String, expectedBytecodeString: String, expectedTimestampString: String) { diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt index 2014421a3..e18eab9b6 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -16,7 +16,7 @@ import java.nio.charset.StandardCharsets * - Hex string of input bytes to test * - Decimal string of expected bytecode after compiling the input bytes * - String representation of the value encoded by these bytes. This is opcode-specific and up to individual opcode - * handlers to parse and understand. Not every test case supplies this as of yet + * handlers to parse and understand. Not every test case supplies this. * * Bytecode can contain placeholders in the form `%pos:%`, which should be replaced with `` plus the * index of the first byte of the binary in the input. For example, if a bytecode string contains `%pos:30%` and the @@ -48,8 +48,8 @@ object OpcodeTestCases { @JvmStatic fun booleanOpcodeCases() = listOf( - "6E, ${Instructions.I_BOOL.packInstructionData(1)}", - "6F, ${Instructions.I_BOOL.packInstructionData(0)}", + "6E, ${Instructions.I_BOOL.packInstructionData(1)}, true", + "6F, ${Instructions.I_BOOL.packInstructionData(0)}, false", ).toArguments() const val NULL_OPCODE_CASES = "$THIS_NAME#nullOpcodeCases" @@ -102,44 +102,44 @@ object OpcodeTestCases { @JvmStatic fun float0OpcodeCases() = listOf( - "6A, ${Instructions.I_FLOAT_F32} 0", + "6A, ${Instructions.I_FLOAT_F32} 0, 0", ).toArguments() const val FLOAT16_OPCODE_CASES = "$THIS_NAME#float16OpcodeCases" @JvmStatic fun float16OpcodeCases() = listOf( - "6B 01 00, ${Instructions.I_FLOAT_F32} 864026624", // smallest positive subnormal number - "6B FF 03, ${Instructions.I_FLOAT_F32} 947896320", // largest subnormal number - "6B 00 04, ${Instructions.I_FLOAT_F32} 947912704", // smallest positive normal number - "6B FF 7B, ${Instructions.I_FLOAT_F32} 1199562752", // largest normal number - "6B FF 3B, ${Instructions.I_FLOAT_F32} 1065345024", // largest number less than one - "6B 00 3C, ${Instructions.I_FLOAT_F32} 1065353216", - "6B 01 3C, ${Instructions.I_FLOAT_F32} 1065361408", // smallest number larger than one + "6B 01 00, ${Instructions.I_FLOAT_F32} 864026624, 0.000000059604645", // smallest positive subnormal number + "6B FF 03, ${Instructions.I_FLOAT_F32} 947896320, 0.000060975552", // largest subnormal number + "6B 00 04, ${Instructions.I_FLOAT_F32} 947912704, 0.00006103515625", // smallest positive normal number + "6B FF 7B, ${Instructions.I_FLOAT_F32} 1199562752, 65504", // largest normal number + "6B FF 3B, ${Instructions.I_FLOAT_F32} 1065345024, 0.99951172", // largest number less than one + "6B 00 3C, ${Instructions.I_FLOAT_F32} 1065353216, 1", + "6B 01 3C, ${Instructions.I_FLOAT_F32} 1065361408, 1.00097656", // smallest number larger than one // Same as above, but negative - "6B 01 80, ${Instructions.I_FLOAT_F32} -1283457024", - "6B FF 83, ${Instructions.I_FLOAT_F32} -1199587328", - "6B 00 84, ${Instructions.I_FLOAT_F32} -1199570944", - "6B FF FB, ${Instructions.I_FLOAT_F32} -947920896", - "6B FF BB, ${Instructions.I_FLOAT_F32} -1082138624", - "6B 00 BC, ${Instructions.I_FLOAT_F32} -1082130432", - "6B 01 BC, ${Instructions.I_FLOAT_F32} -1082122240", - - "6B 00 00, ${Instructions.I_FLOAT_F32} 0", - "6B 00 80, ${Instructions.I_FLOAT_F32} -2147483648", - "6B 00 7C, ${Instructions.I_FLOAT_F32} 2139095040", - "6B 00 FC, ${Instructions.I_FLOAT_F32} -8388608", - "6B 01 7E, ${Instructions.I_FLOAT_F32} 2143297536", // quiet NaN - "6B 01 7C, ${Instructions.I_FLOAT_F32} 2139103232", // signaling NaN - "6B 01 FE, ${Instructions.I_FLOAT_F32} -4186112", // negative quiet NaN - "6B 01 FC, ${Instructions.I_FLOAT_F32} -8380416", // negative signaling NaN - "6B 53 7F, ${Instructions.I_FLOAT_F32} 2146066432", // another quiet NaN - "6B 53 FF, ${Instructions.I_FLOAT_F32} -1417216", // another negative quiet NaN - - "6B 00 C0, ${Instructions.I_FLOAT_F32} -1073741824", - "6B 55 35, ${Instructions.I_FLOAT_F32} 1051369472", - "6B 48 42, ${Instructions.I_FLOAT_F32} 1078525952" + "6B 01 80, ${Instructions.I_FLOAT_F32} -1283457024, -0.000000059604645", + "6B FF 83, ${Instructions.I_FLOAT_F32} -1199587328, -0.000060975552", + "6B 00 84, ${Instructions.I_FLOAT_F32} -1199570944, -0.00006103515625", + "6B FF FB, ${Instructions.I_FLOAT_F32} -947920896, -65504", + "6B FF BB, ${Instructions.I_FLOAT_F32} -1082138624, -0.99951172", + "6B 00 BC, ${Instructions.I_FLOAT_F32} -1082130432, -1", + "6B 01 BC, ${Instructions.I_FLOAT_F32} -1082122240, -1.00097656", + + "6B 00 00, ${Instructions.I_FLOAT_F32} 0, 0", + "6B 00 80, ${Instructions.I_FLOAT_F32} -2147483648, -0", + "6B 00 7C, ${Instructions.I_FLOAT_F32} 2139095040, Infinity", + "6B 00 FC, ${Instructions.I_FLOAT_F32} -8388608, -Infinity", + "6B 01 7E, ${Instructions.I_FLOAT_F32} 2143297536, NaN", // quiet NaN + "6B 01 7C, ${Instructions.I_FLOAT_F32} 2139103232, NaN", // signaling NaN + "6B 01 FE, ${Instructions.I_FLOAT_F32} -4186112, NaN", // negative quiet NaN + "6B 01 FC, ${Instructions.I_FLOAT_F32} -8380416, NaN", // negative signaling NaN + "6B 53 7F, ${Instructions.I_FLOAT_F32} 2146066432, NaN", // another quiet NaN + "6B 53 FF, ${Instructions.I_FLOAT_F32} -1417216, NaN", // another negative quiet NaN + + "6B 00 C0, ${Instructions.I_FLOAT_F32} -1073741824, -2", + "6B 55 35, ${Instructions.I_FLOAT_F32} 1051369472, 0.33325195", + "6B 48 42, ${Instructions.I_FLOAT_F32} 1078525952, 3.140625" ).toArguments() const val FLOAT32_OPCODE_CASES = "$THIS_NAME#float32OpcodeCases" @@ -147,78 +147,82 @@ object OpcodeTestCases { @JvmStatic fun float32OpcodeCases() = listOf( // TODO: cross-check all this stuff one more time - "6C 01 00 00 00, ${Instructions.I_FLOAT_F32} 1", // smallest positive subnormal number - "6C FF FF 7F 00, ${Instructions.I_FLOAT_F32} 8388607", // largest subnormal number - "6C 00 00 80 00, ${Instructions.I_FLOAT_F32} 8388608", // smallest positive normal number - "6C FF FF 7F 7F, ${Instructions.I_FLOAT_F32} 2139095039", // largest normal number - "6C FF FF 7F 3F, ${Instructions.I_FLOAT_F32} 1065353215", // largest number less than one - "6C 00 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353216", - "6C 01 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353217", // smallest number larger than one + "6C 01 00 00 00, ${Instructions.I_FLOAT_F32} 1, 1.4012984643e-45", // smallest positive subnormal number + "6C FF FF 7F 00, ${Instructions.I_FLOAT_F32} 8388607, 1.1754942107e-38", // largest subnormal number + "6C 00 00 80 00, ${Instructions.I_FLOAT_F32} 8388608, 1.1754943508e-38", // smallest positive normal number + "6C FF FF 7F 7F, ${Instructions.I_FLOAT_F32} 2139095039, 3.4028234664e38", // largest normal number + "6C FF FF 7F 3F, ${Instructions.I_FLOAT_F32} 1065353215, 0.999999940395355225", // largest number less than one + "6C 00 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353216, 1", + "6C 01 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353217, 1.00000011920928955", // smallest number larger than one // Same as above, but negative - "6C 01 00 00 80, ${Instructions.I_FLOAT_F32} -2147483647", - "6C FF FF 7F 80, ${Instructions.I_FLOAT_F32} -2139095041", - "6C 00 00 80 80, ${Instructions.I_FLOAT_F32} -2139095040", - "6C FF FF 7F FF, ${Instructions.I_FLOAT_F32} -8388609", - "6C FF FF 7F BF, ${Instructions.I_FLOAT_F32} -1082130433", - "6C 00 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130432", - "6C 01 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130431", - - "6C 00 00 00 00, ${Instructions.I_FLOAT_F32} 0", - "6C 00 00 00 80, ${Instructions.I_FLOAT_F32} -2147483648", - "6C 00 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095040", - "6C 00 00 80 FF, ${Instructions.I_FLOAT_F32} -8388608", - "6C 01 00 C0 7F, ${Instructions.I_FLOAT_F32} 2143289345", // quiet NaN - "6C 01 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095041", // signaling NaN - "6C 01 00 C0 FF, ${Instructions.I_FLOAT_F32} -4194303", // negative quiet NaN - "6C 01 00 80 FF, ${Instructions.I_FLOAT_F32} -8388607", // negative signaling NaN - - "6C 00 00 00 C0, ${Instructions.I_FLOAT_F32} -1073741824", - "6C AB AA AA 3E, ${Instructions.I_FLOAT_F32} 1051372203", - "6C DB 0F 49 40, ${Instructions.I_FLOAT_F32} 1078530011" + "6C 01 00 00 80, ${Instructions.I_FLOAT_F32} -2147483647, -1.4012984643e-45", + "6C FF FF 7F 80, ${Instructions.I_FLOAT_F32} -2139095041, -1.1754942107e-38", + "6C 00 00 80 80, ${Instructions.I_FLOAT_F32} -2139095040, -1.1754943508e-38", + "6C FF FF 7F FF, ${Instructions.I_FLOAT_F32} -8388609, -3.4028234664e38", + "6C FF FF 7F BF, ${Instructions.I_FLOAT_F32} -1082130433, -0.999999940395355225", + "6C 00 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130432, -1", + "6C 01 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130431, -1.00000011920928955", + + "6C 00 00 00 00, ${Instructions.I_FLOAT_F32} 0, 0", + "6C 00 00 00 80, ${Instructions.I_FLOAT_F32} -2147483648, -0", + "6C 00 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095040, Infinity", + "6C 00 00 80 FF, ${Instructions.I_FLOAT_F32} -8388608, -Infinity", + "6C 01 00 C0 7F, ${Instructions.I_FLOAT_F32} 2143289345, NaN", // quiet NaN + "6C 01 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095041, NaN", // signaling NaN + "6C 01 00 C0 FF, ${Instructions.I_FLOAT_F32} -4194303, NaN", // negative quiet NaN + "6C 01 00 80 FF, ${Instructions.I_FLOAT_F32} -8388607, NaN", // negative signaling NaN + + "6C 00 00 00 C0, ${Instructions.I_FLOAT_F32} -1073741824, -2", + "6C AB AA AA 3E, ${Instructions.I_FLOAT_F32} 1051372203, 0.333333343267440796", + "6C DB 0F 49 40, ${Instructions.I_FLOAT_F32} 1078530011, 3.14159274101257324" ).toArguments() const val FLOAT64_OPCODE_CASES = "$THIS_NAME#float64OpcodeCases" @JvmStatic fun float64OpcodeCases() = listOf( - "6D 01 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 1", // smallest positive subnormal number - "6D FF FF FF FF FF FF 0F 00, ${Instructions.I_FLOAT_F64} 1048575 -1", // largest subnormal number - "6D 00 00 00 00 00 00 10 00, ${Instructions.I_FLOAT_F64} 1048576 0", // smallest positive normal number - "6D FF FF FF FF FF FF EF 7F, ${Instructions.I_FLOAT_F64} 2146435071 -1", // largest normal number - "6D FF FF FF FF FF FF EF 3F, ${Instructions.I_FLOAT_F64} 1072693247 -1", // largest number less than one - "6D 00 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 0", - "6D 01 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 1", // smallest number larger than one - "6D 02 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 2", // the second smallest number greater than 1 + "6D 01 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 1, 4.9406564584124654e-324", // smallest positive subnormal number + "6D FF FF FF FF FF FF 0F 00, ${Instructions.I_FLOAT_F64} 1048575 -1, 2.2250738585072009e-308", // largest subnormal number + "6D 00 00 00 00 00 00 10 00, ${Instructions.I_FLOAT_F64} 1048576 0, 2.2250738585072014e-308", // smallest positive normal number + "6D FF FF FF FF FF FF EF 7F, ${Instructions.I_FLOAT_F64} 2146435071 -1, 1.7976931348623157e308", // largest normal number + "6D FF FF FF FF FF FF EF 3F, ${Instructions.I_FLOAT_F64} 1072693247 -1, 0.99999999999999988898", // largest number less than one + "6D 00 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 0, 1", + "6D 01 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 1, 1.0000000000000002220", // smallest number larger than one + "6D 02 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 2, 1.0000000000000004441", // the second smallest number greater than 1 // Same as above, but negative - "6D 01 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 1", - "6D FF FF FF FF FF FF 0F 80, ${Instructions.I_FLOAT_F64} -2146435073 -1", - "6D 00 00 00 00 00 00 10 80, ${Instructions.I_FLOAT_F64} -2146435072 0", - "6D FF FF FF FF FF FF EF FF, ${Instructions.I_FLOAT_F64} -1048577 -1", - "6D FF FF FF FF FF FF EF BF, ${Instructions.I_FLOAT_F64} -1074790401 -1", - "6D 00 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 0", - "6D 01 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 1", - "6D 02 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 2", - - "6D 00 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 0", - "6D 00 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 0", - "6D 00 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 0", - "6D 00 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 0", - "6D 01 00 00 00 00 00 F8 7F, ${Instructions.I_FLOAT_F64} 2146959360 1", // quiet NaN - "6D 01 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 1", // signaling NaN - "6D 01 00 00 00 00 00 F8 FF, ${Instructions.I_FLOAT_F64} -524288 1", // negative quiet NaN - "6D 01 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 1", // negative signaling NaN - "6D FF FF FF FF FF FF FF 7F, ${Instructions.I_FLOAT_F64} 2147483647 -1", // another quiet NaN - "6D FF FF FF FF FF FF FF FF, ${Instructions.I_FLOAT_F64} -1 -1", // another negative quiet NaN - - "6D 00 00 00 00 00 00 00 C0, ${Instructions.I_FLOAT_F64} -1073741824 0", - "6D 55 55 55 55 55 55 D5 3F, ${Instructions.I_FLOAT_F64} 1070945621 1431655765", - "6D 18 2D 44 54 FB 21 09 40, ${Instructions.I_FLOAT_F64} 1074340347 1413754136" + "6D 01 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 1, -4.9406564584124654e-324", + "6D FF FF FF FF FF FF 0F 80, ${Instructions.I_FLOAT_F64} -2146435073 -1, -2.2250738585072009e-308", + "6D 00 00 00 00 00 00 10 80, ${Instructions.I_FLOAT_F64} -2146435072 0, -2.2250738585072014e-308", + "6D FF FF FF FF FF FF EF FF, ${Instructions.I_FLOAT_F64} -1048577 -1, -1.7976931348623157e308", + "6D FF FF FF FF FF FF EF BF, ${Instructions.I_FLOAT_F64} -1074790401 -1, -0.99999999999999988898", + "6D 00 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 0, -1", + "6D 01 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 1, -1.0000000000000002220", + "6D 02 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 2, -1.0000000000000004441", + + "6D 00 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 0, 0", + "6D 00 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 0, -0", + "6D 00 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 0, Infinity", + "6D 00 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 0, -Infinity", + "6D 01 00 00 00 00 00 F8 7F, ${Instructions.I_FLOAT_F64} 2146959360 1, NaN", // quiet NaN + "6D 01 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 1, NaN", // signaling NaN + "6D 01 00 00 00 00 00 F8 FF, ${Instructions.I_FLOAT_F64} -524288 1, NaN", // negative quiet NaN + "6D 01 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 1, NaN", // negative signaling NaN + "6D FF FF FF FF FF FF FF 7F, ${Instructions.I_FLOAT_F64} 2147483647 -1, NaN", // another quiet NaN + "6D FF FF FF FF FF FF FF FF, ${Instructions.I_FLOAT_F64} -1 -1, NaN", // another negative quiet NaN + + "6D 00 00 00 00 00 00 00 C0, ${Instructions.I_FLOAT_F64} -1073741824 0, -2", + "6D 55 55 55 55 55 55 D5 3F, ${Instructions.I_FLOAT_F64} 1070945621 1431655765, 0.33333333333333331483", + "6D 18 2D 44 54 FB 21 09 40, ${Instructions.I_FLOAT_F64} 1074340347 1413754136, 3.141592653589793116" ).toArguments() const val REFERENCE_OPCODE_CASES = "$THIS_NAME#referenceOpcodeCases" + /** + * Generates tests for handlers that emit similar *_REF bytecode (instructions packed with a UInt22 reference length + * and followed by a UInt32 position of the data). + */ @JvmStatic fun referenceOpcodeCases(): List { val arguments = mutableListOf() @@ -337,45 +341,61 @@ object OpcodeTestCases { return arguments } - const val INT16_EMITTING_OPCODE_CASES = "$THIS_NAME#int16EmittingOpcodeCases" + const val INT0_OPCODE_CASES = "$THIS_NAME#int0OpcodeCases" + + @JvmStatic + fun int0OpcodeCases() = listOf( + "60, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 0-byte + ).toArguments() + + const val INT8_OPCODE_CASES = "$THIS_NAME#int8OpcodeCases" + + @JvmStatic + fun int8OpcodeCases() = listOf( + "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}, 50", // 1-byte positive + "61 97, ${Instructions.I_INT_I16.packInstructionData(-105)}, -105", // 1-byte negative + "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}, 127", // max value + "61 80, ${Instructions.I_INT_I16.packInstructionData(-128)}, -128", // min value + ).toArguments() + + const val INT16_OPCODE_CASES = "$THIS_NAME#int16OpcodeCases" + + @JvmStatic + fun int16OpcodeCases() = listOf( + "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}, 29478", // 2-byte positive + "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}, -944", // 2-byte negative + "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 2-byte overlong 0 + "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}, -1", // 2-byte overlong -1 + "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}, 128", // min positive + "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}, -129", // max negative + "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}, 32767", // max value + "62 00 80, ${Instructions.I_INT_I16.packInstructionData(-32768)}, -32768", // min value + ).toArguments() + + const val INT24_OPCODE_CASES = "$THIS_NAME#int24OpcodeCases" @JvmStatic - fun int16EmittingOpcodeCases() = listOf( - "60, ${Instructions.I_INT_I16.packInstructionData(0)}", // 0-byte - "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}", // 1-byte positive - "61 97, ${Instructions.I_INT_I16.packInstructionData(-105)}", // 1-byte negative - "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}", // 2-byte positive - "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}", // 2-byte negative - "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}", // 2-byte overlong 0 - "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}", // 2-byte overlong -1 - "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}", - "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}", // length boundary - "61 80, ${Instructions.I_INT_I16.packInstructionData(-128)}", - "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}", // length boundary - "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}", // max value - "62 00 80, ${Instructions.I_INT_I16.packInstructionData(-32768)}", // min value + fun int24OpcodeCases() = listOf( + "63 40 42 0F, ${Instructions.I_INT_I32} 1000000, 1000000", // 3-byte positive + "63 4F 34 8B, ${Instructions.I_INT_I32} -7654321, -7654321", // 3-byte negative + "63 00 80 00, ${Instructions.I_INT_I32} 32768, 32768", // min positive, length boundary from i16 + "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607, 8388607", // max value + "63 FF 7F FF, ${Instructions.I_INT_I32} -32769, -32769", // max negative, length boundary from i16 + "63 00 00 80, ${Instructions.I_INT_I32} -8388608, -8388608", // min value ).toArguments() - const val INT32_EMITTING_OPCODE_CASES = "$THIS_NAME#int32EmittingOpcodeCases" + const val INT32_OPCODE_CASES = "$THIS_NAME#int32OpcodeCases" @JvmStatic - fun int32EmittingOpcodeCases() = listOf( - "63 40 42 0F, ${Instructions.I_INT_I32} 1000000", // 3-byte positive - "63 4F 34 8B, ${Instructions.I_INT_I32} -7654321", // 3-byte negative - "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827", // 4-byte positive - "64 57 97 13 E9, ${Instructions.I_INT_I32} -384592041", // 4-byte negative - "64 00 00 00 00, ${Instructions.I_INT_I32} 0", // 4-byte overlong 0 - "64 FF FF FF FF, ${Instructions.I_INT_I32} -1", // 4-byte overlong -1 - - "63 00 80 00, ${Instructions.I_INT_I32} 32768", // min positive, length boundary from i16 - "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607", - "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608", // length boundary - "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}", // max value - - "63 FF 7F FF, ${Instructions.I_INT_I32} -32769", // max negative, length boundary from i16 - "63 00 00 80, ${Instructions.I_INT_I32} -8388608", - "64 FF FF 7F FF, ${Instructions.I_INT_I32} -8388609", // length boundary - "64 00 00 00 80, ${Instructions.I_INT_I32} ${Int.MIN_VALUE}", // min value + fun int32OpcodeCases() = listOf( + "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827, 2118304827", // 4-byte positive + "64 57 97 13 E9, ${Instructions.I_INT_I32} -384592041, -384592041", // 4-byte negative + "64 00 00 00 00, ${Instructions.I_INT_I32} 0, 0", // 4-byte overlong 0 + "64 FF FF FF FF, ${Instructions.I_INT_I32} -1, -1", // 4-byte overlong -1 + "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608, 8388608", // length boundary + "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}, ${Int.MAX_VALUE}", // max value + "64 FF FF 7F FF, ${Instructions.I_INT_I32} -8388609, -8388609", // length boundary + "64 00 00 00 80, ${Instructions.I_INT_I32} ${Int.MIN_VALUE}, ${Int.MIN_VALUE}", // min value ).toArguments() const val INT64_EMITTING_OPCODE_CASES = "$THIS_NAME#int64EmittingOpcodeCases" diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt index f3c7aa215..7077c82a3 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt @@ -2,61 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test +import org.junit.jupiter.api.fail +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource class BooleanOpcodeHandlerTest { - @Test - fun `handler emits true bytecode for true opcode`() { - val byteArray: ByteArray = "6E".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = byteArray[position++].unsignedToInt() - position += BooleanOpcodeHandler.convertOpcodeToBytecode( - opcode, - byteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedInstruction = Instructions.I_BOOL.packInstructionData(1) - assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) - assertEquals(1, position) - } - - @Test - fun `handler emits false bytecode for false opcode`() { - val byteArray: ByteArray = "6F".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = byteArray[position++].unsignedToInt() - position += BooleanOpcodeHandler.convertOpcodeToBytecode( - opcode, - byteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedInstruction = Instructions.I_BOOL.packInstructionData(0) - assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) - assertEquals(1, position) + @ParameterizedTest + @MethodSource(BOOLEAN_OPCODE_CASES) + fun `boolean opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = BooleanOpcodeHandler.shouldCompile(input, bytecode) + val expectedBool = expectedValue.toBoolean() + val representedBool = when (Instructions.getData(buffer.get(0))) { + 1 -> true + 0 -> false + else -> fail("Unexpected packed instruction emitted from boolean opcode compiler: ${buffer.get(0)}") + } + assertEquals(expectedBool, representedBool) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt index 0a1bf5818..01eac715b 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt @@ -2,228 +2,54 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode -import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT16_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT32_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT64_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource import kotlin.String class FloatOpcodeHandlerTest { - @Test - fun `float0 opcode handler emits correct bytecode`() { - val inputByteArray: ByteArray = "6A".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += Float0OpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedBytecode = intArrayOf(Instructions.I_FLOAT_F32, 0) - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(1, position) + @ParameterizedTest + @MethodSource(FLOAT0_OPCODE_CASES) + fun `float0 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Float0OpcodeHandler.shouldCompile(input, bytecode) + val expectedFloat = expectedValue.toFloat() + val representedFloat = Float.fromBits(buffer.get(1)) + assertEquals(expectedFloat, representedFloat) } @ParameterizedTest - @CsvSource( - "6B 01 00, 0.000000059604645", // smallest positive subnormal number - "6B FF 03, 0.000060975552", // largest subnormal number - "6B 00 04, 0.00006103515625", // smallest positive normal number - "6B FF 7B, 65504", // largest normal number - "6B FF 3B, 0.99951172", // largest number less than one - "6B 00 3C, 1", - "6B 01 3C, 1.00097656", // smallest number larger than one - - // Same as above, but negative - "6B 01 80, -0.000000059604645", - "6B FF 83, -0.000060975552", - "6B 00 84, -0.00006103515625", - "6B FF FB, -65504", - "6B FF BB, -0.99951172", - "6B 00 BC, -1", - "6B 01 BC, -1.00097656", - - "6B 00 00, 0", - "6B 00 80, -0", - "6B 00 7C, Infinity", - "6B 00 FC, -Infinity", - "6B 01 7E, NaN", // quiet NaN - "6B 01 7C, NaN", // signaling NaN - "6B 01 FE, NaN", // negative quiet NaN - "6B 01 FC, NaN", // negative signaling NaN - "6B 53 7F, NaN", // another quiet NaN - "6B 53 FF, NaN", // another negative quiet NaN - - "6B 00 C0, -2", - "6B 55 35, 0.33325195", - "6B 48 42, 3.140625" - ) - fun `float16 opcode handler emits correct bytecode`(inputString: String, expectedFloat: Float) { - val inputByteArray: ByteArray = inputString.hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += Float16OpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - // We cannot use assertEqualBytecode here because Java will parse all the NaNs in the test suite as quiet with - // all data bits 0, but the actual generated bytecode will have NaN floats with the signaling semantics and data - // preserved from the input. - // Instead, test that the buffer is the right length, the instruction is correct, and the stored float has - // equal value. - assertEquals(2, buffer.size()) - assertEquals(Instructions.I_FLOAT_F32, buffer.get(0)) - + @MethodSource(FLOAT16_OPCODE_CASES) + fun `float16 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Float16OpcodeHandler.shouldCompile(input, bytecode) + val expectedFloat = expectedValue.toFloat() val representedFloat = Float.fromBits(buffer.get(1)) assertEquals(expectedFloat, representedFloat) - - assertEquals(3, position) } @ParameterizedTest - @CsvSource( - "6C 01 00 00 00, 1.4012984643e-45", // smallest positive subnormal number - "6C FF FF 7F 00, 1.1754942107e-38", // largest subnormal number - "6C 00 00 80 00, 1.1754943508e-38", // smallest positive normal number - "6C FF FF 7F 7F, 3.4028234664e38", // largest normal number - "6C FF FF 7F 3F, 0.999999940395355225", // largest number less than one - "6C 00 00 80 3F, 1", - "6C 01 00 80 3F, 1.00000011920928955", // smallest number larger than one - - // Same as above, but negative - "6C 01 00 00 80, -1.4012984643e-45", - "6C FF FF 7F 80, -1.1754942107e-38", - "6C 00 00 80 80, -1.1754943508e-38", - "6C FF FF 7F FF, -3.4028234664e38", - "6C FF FF 7F BF, -0.999999940395355225", - "6C 00 00 80 BF, -1", - "6C 01 00 80 BF, -1.00000011920928955", - - "6C 00 00 00 00, 0", - "6C 00 00 00 80, -0", - "6C 00 00 80 7F, Infinity", - "6C 00 00 80 FF, -Infinity", - "6C 01 00 C0 7F, NaN", // quiet NaN - "6C 01 00 80 7F, NaN", // signaling NaN - "6C 01 00 C0 FF, NaN", // negative quiet NaN - "6C 01 00 80 FF, NaN", // negative signaling NaN - - "6C 00 00 00 C0, -2", - "6C AB AA AA 3E, 0.333333343267440796", - "6C DB 0F 49 40, 3.14159274101257324" - ) - fun `float32 opcode handler emits correct bytecode`(inputString: String, expectedFloat: Float) { - val inputByteArray: ByteArray = inputString.hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += Float32OpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - assertEquals(2, buffer.size()) - assertEquals(Instructions.I_FLOAT_F32, buffer.get(0)) - + @MethodSource(FLOAT32_OPCODE_CASES) + fun `float32 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Float32OpcodeHandler.shouldCompile(input, bytecode) + val expectedFloat = expectedValue.toFloat() val representedFloat = Float.fromBits(buffer.get(1)) assertEquals(expectedFloat, representedFloat) - - assertEquals(5, position) } @ParameterizedTest - @CsvSource( - "6D 01 00 00 00 00 00 00 00, 4.9406564584124654e-324", // smallest positive subnormal number - "6D FF FF FF FF FF FF 0F 00, 2.2250738585072009e-308", // largest subnormal number - "6D 00 00 00 00 00 00 10 00, 2.2250738585072014e-308", // smallest positive normal number - "6D FF FF FF FF FF FF EF 7F, 1.7976931348623157e308", // largest normal number - "6D FF FF FF FF FF FF EF 3F, 0.99999999999999988898", // largest number less than one - "6D 00 00 00 00 00 00 F0 3F, 1", - "6D 01 00 00 00 00 00 F0 3F, 1.0000000000000002220", // smallest number larger than one - "6D 02 00 00 00 00 00 F0 3F, 1.0000000000000004441", // the second smallest number greater than 1 - - // Same as above, but negative - "6D 01 00 00 00 00 00 00 80, -4.9406564584124654e-324", - "6D FF FF FF FF FF FF 0F 80, -2.2250738585072009e-308", - "6D 00 00 00 00 00 00 10 80, -2.2250738585072014e-308", - "6D FF FF FF FF FF FF EF FF, -1.7976931348623157e308", - "6D FF FF FF FF FF FF EF BF, -0.99999999999999988898", - "6D 00 00 00 00 00 00 F0 BF, -1", - "6D 01 00 00 00 00 00 F0 BF, -1.0000000000000002220", - "6D 02 00 00 00 00 00 F0 BF, -1.0000000000000004441", - - "6D 00 00 00 00 00 00 00 00, 0", - "6D 00 00 00 00 00 00 00 80, -0", - "6D 00 00 00 00 00 00 F0 7F, Infinity", - "6D 00 00 00 00 00 00 F0 FF, -Infinity", - "6D 01 00 00 00 00 00 F8 7F, NaN", // quiet NaN - "6D 01 00 00 00 00 00 F0 7F, NaN", // signaling NaN - "6D 01 00 00 00 00 00 F8 FF, NaN", // negative quiet NaN - "6D 01 00 00 00 00 00 F0 FF, NaN", // negative signaling NaN - "6D FF FF FF FF FF FF FF 7F, NaN", // another quiet NaN - "6D FF FF FF FF FF FF FF FF, NaN", // another negative quiet NaN - - "6D 00 00 00 00 00 00 00 C0, -2", - "6D 55 55 55 55 55 55 D5 3F, 0.33333333333333331483", - "6D 18 2D 44 54 FB 21 09 40, 3.141592653589793116" - ) - fun `float64 opcode handler emits correct bytecode`(inputString: String, expectedDouble: Double) { - val inputByteArray: ByteArray = inputString.hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += DoubleOpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - assertEquals(3, buffer.size()) - assertEquals(Instructions.I_FLOAT_F64, buffer.get(0)) - + @MethodSource(FLOAT64_OPCODE_CASES) + fun `float64 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = DoubleOpcodeHandler.shouldCompile(input, bytecode) + val expectedFloat = expectedValue.toDouble() val representedFloat = Double.fromBits( buffer.get(1).toLong().shl(32) .or(buffer.get(2).toLong().and(0xFFFF_FFFF)) ) - assertEquals(expectedDouble, representedFloat) - - assertEquals(9, position) + assertEquals(expectedFloat, representedFloat) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt index 460684985..60cf57bea 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt @@ -2,179 +2,72 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT0_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT24_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT64_EMITTING_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT8_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource import kotlin.String class IntOpcodeHandlerTest { - // These tests need to include the IVM in the test bytecode (or any 4 bytes before the FixedInt) - // because the BinaryPrimitiveReader has logic that expects this to always be the case. @ParameterizedTest - @CsvSource( - "60, 0, 0", // 0-byte - "61 32, 1, 50", // 1-byte positive - "61 97, 1, -105", // 1-byte negative - "62 26 73, 2, 29478", // 2-byte positive - "62 50 FC, 2, -944", // 2-byte negative - "62 00 00, 2, 0", // 2-byte overlong 0 - "62 FF FF, 2, -1", // 2-byte overlong -1 - - "61 7F, 1, 127", - "62 80 00, 2, 128", // length boundary - "62 FF 7F, 2, 32767", // max value - - "61 80, 1, -128", - "62 7F FF, 2, -129", // length boundary - "62 00 80, 2, -32768", // min value - ) - fun testI16EmittingIntHandlers( - inputString: String, - expectedBytesRead: Int, - expectedInt16: Short - ) { - val handlersByBytesRead = arrayOf(Int0OpcodeHandler, Int8OpcodeHandler, Int16OpcodeHandler) - - val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 4 // skip the IVM - val opcode = inputByteArray[position++].unsignedToInt() - position += handlersByBytesRead[expectedBytesRead].convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedInstruction = Instructions.I_INT_I16.packInstructionData(expectedInt16.toInt()) - assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) - assertEquals(5 + expectedBytesRead, position) - - val representedInteger = Instructions.getData(buffer.get(0)).toShort() - assertEquals(expectedInt16, representedInteger) + @MethodSource(INT0_OPCODE_CASES) + fun `int0 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Int0OpcodeHandler.shouldCompile(input, bytecode) + val expectedShort = expectedValue.toShort() + val representedShort = Instructions.getData(buffer.get(0)).toShort() + assertEquals(expectedShort, representedShort) } @ParameterizedTest - @CsvSource( - "63 40 42 0F, 3, 1000000", // 3-byte positive - "63 4F 34 8B, 3, -7654321", // 3-byte negative - "64 3B C4 42 7E, 4, 2118304827", // 4-byte positive - "64 57 97 13 E9, 4, -384592041", // 4-byte negative - "64 00 00 00 00, 4, 0", // 4-byte overlong 0 - "64 FF FF FF FF, 4, -1", // 4-byte overlong -1 - - "63 00 80 00, 3, 32768", // min positive, length boundary from i16 - "63 FF FF 7F, 3, 8388607", - "64 00 00 80 00, 4, 8388608", // length boundary - "64 FF FF FF 7F, 4, ${Int.MAX_VALUE}", // max value - - "63 FF 7F FF, 3, -32769", // max negative, length boundary from i16 - "63 00 00 80, 3, -8388608", - "64 FF FF 7F FF, 4, -8388609", // length boundary - "64 00 00 00 80, 4, ${Int.MIN_VALUE}", // min value - ) - fun testI32EmittingIntHandlers( - inputString: String, - expectedBytesRead: Int, - expectedInt32: Int - ) { - val handlersByBytesRead = arrayOf(Int24OpcodeHandler, Int32OpcodeHandler) - - val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 4 // skip the IVM - val opcode = inputByteArray[position++].unsignedToInt() - position += handlersByBytesRead[expectedBytesRead - 3].convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedBytecode = intArrayOf(Instructions.I_INT_I32, expectedInt32) - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(5 + expectedBytesRead, position) + @MethodSource(INT8_OPCODE_CASES) + fun `int8 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Int8OpcodeHandler.shouldCompile(input, bytecode) + val expectedShort = expectedValue.toShort() + val representedShort = Instructions.getData(buffer.get(0)).toShort() + assertEquals(expectedShort, representedShort) } @ParameterizedTest - @CsvSource( - "65 6A 22 7C AB 5C, 5, 398014030442", // 5-byte positive - "65 96 DD 83 54 A3, 5, -398014030442", // 5-byte negative - "66 C4 87 8F 09 97 5D, 6, 102903281846212", // 6-byte positive - "66 3C 78 70 F6 68 A2, 6, -102903281846212", // 6-byte negative - "67 62 9A 42 56 83 77 10, 7, 4635005598997090", // 7-byte positive - "67 9E 65 BD A9 7C 88 EF, 7, -4635005598997090", // 7-byte negative - "68 A4 F7 64 69 16 27 BF 31, 8, 3584626805621192612", // 8-byte positive - "68 5C 08 9B 96 E9 D8 40 CE, 8, -3584626805621192612", // 8-byte negative - "68 00 00 00 00 00 00 00 00, 8, 0", // 8-byte overlong 0 - "68 FF FF FF FF FF FF FF FF, 8, -1", // 8-byte overlong -1 - - "65 00 00 00 80 00, 5, 2147483648", // min positive, length boundary from i32 - "65 FF FF FF FF 7F, 5, 549755813887", - "66 00 00 00 00 80 00, 6, 549755813888", // length boundary - "66 FF FF FF FF FF 7F, 6, 140737488355327", - "67 00 00 00 00 00 80 00, 7, 140737488355328", // length boundary - "67 FF FF FF FF FF FF 7F, 7, 36028797018963967", - "68 00 00 00 00 00 00 80 00, 8, 36028797018963968", // length boundary - "68 FF FF FF FF FF FF FF 7F, 8, ${Long.MAX_VALUE}", // max value - - "65 FF FF FF 7F FF, 5, -2147483649", // max negative, length boundary from i32 - "65 00 00 00 00 80, 5, -549755813888", - "66 FF FF FF FF 7F FF, 6, -549755813889", // length boundary - "66 00 00 00 00 00 80, 6, -140737488355328", - "67 FF FF FF FF FF 7F FF, 7, -140737488355329", // length boundary - "67 00 00 00 00 00 00 80, 7, -36028797018963968", - "68 FF FF FF FF FF FF 7F FF, 8, -36028797018963969", // length boundary - "68 00 00 00 00 00 00 00 80, 8, ${Long.MIN_VALUE}", // min value - ) - fun testI64EmittingIntHandler( - inputString: String, - expectedBytesRead: Int, - expectedInt64: Long - ) { - val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() - val buffer = BytecodeBuffer() + @MethodSource(INT16_OPCODE_CASES) + fun `int16 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Int16OpcodeHandler.shouldCompile(input, bytecode) + val expectedShort = expectedValue.toShort() + val representedShort = Instructions.getData(buffer.get(0)).toShort() + assertEquals(expectedShort, representedShort) + } - var position = 4 // skip the IVM - val opcode = inputByteArray[position++].unsignedToInt() - position += LongIntOpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) + @ParameterizedTest + @MethodSource(INT24_OPCODE_CASES) + fun `int24 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Int24OpcodeHandler.shouldCompile(input, bytecode) + val expectedInt = expectedValue.toInt() + val representedInt = buffer.get(1) + assertEquals(expectedInt, representedInt) + } - val expectedBytecode = intArrayOf( - Instructions.I_INT_I64, - (expectedInt64 ushr 32).toInt(), - expectedInt64.toInt() - ) - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(5 + expectedBytesRead, position) + @ParameterizedTest + @MethodSource(INT32_OPCODE_CASES) + fun `int32 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = Int32OpcodeHandler.shouldCompile(input, bytecode) + val expectedInt = expectedValue.toInt() + val representedInt = buffer.get(1) + assertEquals(expectedInt, representedInt) + } - val representedInteger = (buffer.get(1).toLong() shl 32) or (buffer.get(2).toLong() and 0xFFFF_FFFF) - assertEquals(expectedInt64, representedInteger) + @ParameterizedTest + @MethodSource(INT64_EMITTING_OPCODE_CASES) + fun `long int opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + val buffer = LongIntOpcodeHandler.shouldCompile(input, bytecode) + val expectedLong = expectedValue.toLong() + val representedLong = (buffer.get(1).toLong() shl 32) or (buffer.get(2).toLong() and 0xFFFF_FFFF) + assertEquals(expectedLong, representedLong) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt index a1a855f80..eaa069dca 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt @@ -2,37 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode -import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.NULL_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource class NullOpcodeHandlerTest { - @Test - fun `handler emits null bytecode for null opcode`() { - val byteArray: ByteArray = "8E".hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = byteArray[position++].unsignedToInt() - position += NullOpcodeHandler.convertOpcodeToBytecode( - opcode, - byteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedInstruction = Instructions.I_NULL_NULL - assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) - assertEquals(1, position) + @ParameterizedTest + @MethodSource(NULL_OPCODE_CASES) + fun `null opcode handler emits correct bytecode`(input: String, bytecode: String) { + NullOpcodeHandler.shouldCompile(input, bytecode) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt new file mode 100644 index 000000000..850618243 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt @@ -0,0 +1,59 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals + +internal object OpcodeHandlerTestUtil { + /** + * Asserts that an opcode handler compiles the given input bytes to the given bytecode and that the position + * returned by the handler points immediately after the last byte in the input. + * + * @return The bytecode buffer containing the bytecode compiled by this handler, for convenience of test cases + * that wish to further validate the compiled bytecode represents a particular value + */ + fun OpcodeToBytecodeHandler.shouldCompile(inputBytes: ByteArray, expectedBytecode: IntArray): BytecodeBuffer { + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputBytes[position++].unsignedToInt() + position += this.convertOpcodeToBytecode( + opcode, + inputBytes, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(inputBytes.size, position) + + return buffer + } + + /** + * Asserts that an opcode handler compiles the given input bytes to the given bytecode and that the position + * returned by the handler points immediately after the last byte in the input. + * + * Takes a hex string for the input bytes and a decimal string for the expected bytecode. + * + * @return The bytecode buffer containing the bytecode compiled by this handler, for convenience of test cases + * that wish to further validate the compiled bytecode represents a particular value + */ + fun OpcodeToBytecodeHandler.shouldCompile(inputBytes: String, expectedBytecode: String): BytecodeBuffer { + return this.shouldCompile( + inputBytes.hexStringToByteArray(), + replacePositionTemplates(expectedBytecode, 0).decimalStringToIntArray() + ) + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt index 81986bbd2..4d79656f4 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt @@ -2,145 +2,38 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.TextToBinaryUtils.toSingleHexByte +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt -import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.fail import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource @TestInstance(TestInstance.Lifecycle.PER_CLASS) internal class ReferenceOpcodeHandlerTest { - /** - * Generates tests for handlers that emit similar *_REF bytecode (instructions packed with a UInt22 reference length - * and followed by a UInt32 position of the data). Returns a list of [Arguments] objects in the form: - * - Instance of [ReferenceOpcodeHandler] to test - * - Input [ByteArray] with meaningless bytes for the payload data - * - Expected generated bytecode as [IntArray] - * - Expected end position as [Int] - */ - private fun referenceOpcodeHandlerTestCases(): List { - val arguments = mutableListOf() - - val instructions = arrayOf( - Pair(Instructions.I_ANNOTATION_REF, 0x59), - Pair(Instructions.I_INT_REF, 0xF5), - Pair(Instructions.I_DECIMAL_REF, 0xF6), - Pair(Instructions.I_TIMESTAMP_REF, 0xF7), - Pair(Instructions.I_STRING_REF, 0xF8), - Pair(Instructions.I_SYMBOL_REF, 0xF9), - Pair(Instructions.I_BLOB_REF, 0xFE), - Pair(Instructions.I_CLOB_REF, 0xFF), - ) - - val testTemplates = listOf( - /* - FlexUInt length prefix for referenced payload - | Decimal payload length - | | Expected payload start position - | | | Expected end position after handling - | | | | */ - "03, 1, 2, 3", - "05, 2, 2, 4", - "07, 3, 2, 5", - "09, 4, 2, 6", - "0B, 5, 2, 7", - "1D, 14, 2, 16", - "7F, 63, 2, 65", - "81, 64, 2, 66", - "FF, 127, 2, 129", - "02 02, 128, 3, 131", - "FE FF, 16383, 3, 16386", - "04 00 02, 16384, 4, 16388", - "FC FF FF, 2097151, 4, 2097155", - "08 00 00 02, 2097152, 5, 2097157", - "F8 FF FF 03, 4194303, 5, 4194308", // maximum length of a payload - "01, 0, 2, 2", // zero-length payload TODO: is this legal? - "00 18 00 00 00 00 00 00 00 00 00 00, 1, 13, 14", // overlong encoding on the FlexUInt - ) - - // This loop maps the above templates into a tests for each opcode. The templates above consist of some bytecode - // representing the FlexUInt length of the payload, the decimal value of the FlexUInt length prefix, the expected - // start position of the payload, and the expected end position of the generator after executing the handler. - // - // The opcode will be prepended to the bytecode string, and null bytes will be generated to fill the payload. - // - // Output Arguments objects are tuples of: - // (ReferenceOpcodeHandler to test, bytecode to test, expected bytecode, expected end position) - // - // Example - this string: - // "0B, 5, 2, 7" - // will emit the following Arguments objects: - // , F5 0B <5 payload bytes>, , 7 - // , F6 0B <5 payload bytes>, , 7 - // ... - instructions.forEach { (instruction, opcode) -> - testTemplates.forEach { - val (flexUIntStr, payloadLengthStr, expectedPayloadStartPosStr, expectedEndPositionStr) = it.split(',') - val payloadLength = payloadLengthStr.trim().toInt() - val expectedPayloadStartPosition = expectedPayloadStartPosStr.trim().toInt() - val expectedEndPosition = expectedEndPositionStr.trim().toInt() - - // Create a dummy payload for this value with all bytes set to zeros. - // Not actually looked at by this test, but simulates an encoded value the handler would actually - // encounter during parsing. - val payload = ByteArray(payloadLength) - arguments.add( - Arguments.of( - ReferenceOpcodeHandler(instruction), - byteArrayOf( - opcode.toByte(), // write the opcode - *flexUIntStr.trim().hexStringToByteArray(), // then the FlexUInt - *payload // then the payload bytes - ), - intArrayOf( - instruction.packInstructionData(payloadLength), - expectedPayloadStartPosition - ), - expectedEndPosition - ) - ) - } - } - - return arguments - } - /** * Test that variable-length payload opcodes generate the correct *_REF bytecode. * Does not validate the actual payload in any way. */ @ParameterizedTest - @MethodSource("referenceOpcodeHandlerTestCases") - fun `handlers for OP_X_REF opcodes emit correct bytecode`( - handler: ReferenceOpcodeHandler, - inputByteArray: ByteArray, - expectedBytecode: IntArray, - expectedEndPosition: Int - ) { - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += handler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(expectedEndPosition, position) + @MethodSource(REFERENCE_OPCODE_CASES) + fun `handlers for OP_X_REF opcodes emit correct bytecode`(input: String, bytecode: String) { + val opcode = input.take(2).toInt(16) + val handler = when (opcode) { + OpCode.ANNOTATION_TEXT -> ReferenceOpcodeHandler(Instructions.I_ANNOTATION_REF) + OpCode.VARIABLE_LENGTH_INTEGER -> ReferenceOpcodeHandler(Instructions.I_INT_REF) + OpCode.VARIABLE_LENGTH_DECIMAL -> ReferenceOpcodeHandler(Instructions.I_DECIMAL_REF) + OpCode.VARIABLE_LENGTH_TIMESTAMP -> ReferenceOpcodeHandler(Instructions.I_TIMESTAMP_REF) + OpCode.VARIABLE_LENGTH_STRING -> ReferenceOpcodeHandler(Instructions.I_STRING_REF) + OpCode.VARIABLE_LENGTH_SYMBOL -> ReferenceOpcodeHandler(Instructions.I_SYMBOL_REF) + OpCode.VARIABLE_LENGTH_BLOB -> ReferenceOpcodeHandler(Instructions.I_BLOB_REF) + OpCode.VARIABLE_LENGTH_CLOB -> ReferenceOpcodeHandler(Instructions.I_CLOB_REF) + else -> fail("Opcode is not a variable-length reference opcode: 0x${opcode.toSingleHexByte()}") + } + handler.shouldCompile(input, bytecode) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt index a051b3719..6570adc7c 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt @@ -10,6 +10,7 @@ import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource class ShortTimestampDecoderTest { + @ParameterizedTest @CsvSource( "80 35, 2023T", diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt index b7baa5220..82e73b45a 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt @@ -2,64 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode -import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt -import org.junit.jupiter.api.Assertions.assertEquals +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource import kotlin.String class ShortTimestampOpcodeHandlerTest { @ParameterizedTest - @CsvSource( - "80 35, 0, 2", // 2023T - "81 35 05, 1, 3", // 2023-10T - "82 35 7D, 2, 3", // 2023-10-15T - "83 35 7D CB 0A, 3, 5", // 2023-10-15T11:22Z - "84 35 7D CB 1A 02, 4, 6", // 2023-10-15T11:22:33Z - "84 35 7D CB 12 02, 4, 6", // 2023-10-15T11:22:33-00:00 - "85 35 7D CB 12 F2 06, 5, 7", // 2023-10-15T11:22:33.444-00:00 - "86 35 7D CB 12 2E 22 1B, 6, 8", // 2023-10-15T11:22:33.444555-00:00 - "87 35 7D CB 12 4A 86 FD 69, 7, 9", // 2023-10-15T11:22:33.444555666-00:00 - "88 35 7D CB EA 01, 8, 6", // 2023-10-15T11:22+01:15 - "89 35 7D CB EA 85, 9, 6", // 2023-10-15T11:22:33+01:15 - "8A 35 7D CB EA 85 BC 01, 10, 8", // 2023-10-15T11:22:33.444+01:15 - "8B 35 7D CB EA 85 8B C8 06, 11, 9", // 2023-10-15T11:22:33.444555+01:15 - "8C 35 7D CB EA 85 92 61 7F 1A, 12, 10", // 2023-10-15T11:22:33.444555666+01:15 - ) - fun `short timestamp opcode handler emits correct bytecode`( - inputString: String, - expectedPrecisionAndOffsetMode: Int, - expectedEndPosition: Int - ) { - val inputByteArray = inputString.hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputByteArray[position++].unsignedToInt() - position += ShortTimestampOpcodeHandler.convertOpcodeToBytecode( - opcode, - inputByteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - val expectedPayloadStartPosition = 1 - val expectedBytecode = intArrayOf( - Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(expectedPrecisionAndOffsetMode), - expectedPayloadStartPosition - ) - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(expectedEndPosition, position) + @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) + fun `short timestamp opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { + ShortTimestampOpcodeHandler.shouldCompile(input, bytecode) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt index dd5c8cca6..f39ccca86 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt @@ -2,51 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode -import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt -import org.junit.jupiter.api.Assertions.assertEquals +import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource class TypedNullOpcodeHandlerTest { @ParameterizedTest - @CsvSource( - "8F 01, ${Instructions.I_NULL_BOOL}", - "8F 02, ${Instructions.I_NULL_INT}", - "8F 03, ${Instructions.I_NULL_FLOAT}", - "8F 04, ${Instructions.I_NULL_DECIMAL}", - "8F 05, ${Instructions.I_NULL_TIMESTAMP}", - "8F 06, ${Instructions.I_NULL_STRING}", - "8F 07, ${Instructions.I_NULL_SYMBOL}", - "8F 08, ${Instructions.I_NULL_BLOB}", - "8F 09, ${Instructions.I_NULL_CLOB}", - "8F 0a, ${Instructions.I_NULL_LIST}", - "8F 0b, ${Instructions.I_NULL_SEXP}", - "8F 0c, ${Instructions.I_NULL_STRUCT}", - ) - fun testTypedNull(inputString: String, expectedInstruction: Int) { - val byteArray: ByteArray = inputString.hexStringToByteArray() - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = byteArray[position++].unsignedToInt() - position += TypedNullOpcodeHandler.convertOpcodeToBytecode( - opcode, - byteArray, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) - assertEquals(2, position) + @MethodSource(TYPED_NULL_OPCODE_CASES) + fun `typed null opcode handler emits correct bytecode`(input: String, bytecode: String) { + TypedNullOpcodeHandler.shouldCompile(input, bytecode) } } From 6c6e3772201811208eb3d0751edb7711fe69f2f2 Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 23 Oct 2025 11:34:59 -0700 Subject: [PATCH 08/22] Fix broken refill logic; adjust comments --- .../bin11/ByteArrayBytecodeGenerator11.kt | 49 +++++++++---------- .../java/com/amazon/ion/TextToBinaryUtils.kt | 3 +- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 13 ++--- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index c557e20ea..bd47aa117 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -23,7 +23,7 @@ import java.nio.ByteBuffer @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "constructor does not make a defensive copy of source as a performance optimization") internal class ByteArrayBytecodeGenerator11( private val source: ByteArray, - private var i: Int, + private var currentPosition: Int, ) : BytecodeGenerator { private val utf8Decoder: Utf8StringDecoder = Utf8StringDecoderPool.getInstance().orCreate @@ -34,24 +34,23 @@ internal class ByteArrayBytecodeGenerator11( macroIndices: IntArray, symTab: Array ) { - // For now, write a single instruction to the bytecode buffer, plus the refill or EOF instruction. - // The strategy here will need to be revisited. - val opcode = source[i++].unsignedToInt() - val handler = OpcodeHandlerTable.handler(opcode) - i += handler.convertOpcodeToBytecode( - opcode, - source, - i, - destination, - constantPool, - macroSrc, - macroIndices, - symTab - ) + var opcode = 0 + while (currentPosition < source.size && !isSystemValue(opcode)) { + opcode = source[currentPosition++].unsignedToInt() + val handler = OpcodeHandlerTable.handler(opcode) + currentPosition += handler.convertOpcodeToBytecode( + opcode, + source, + currentPosition, + destination, + constantPool, + macroSrc, + macroIndices, + symTab + ) + } - // Emit the refill or end of input instruction so caller knows what to do once they run out - // of bytecode in the buffer. - if (i < source.size) { + if (currentPosition < source.size) { BytecodeEmitter.emitRefill(destination) } else { BytecodeEmitter.emitEndOfInput(destination) @@ -66,14 +65,6 @@ internal class ByteArrayBytecodeGenerator11( TODO("Not yet implemented") } - // TODO: right now, this function expects the opcode parameter to be the low nibble of the actual opcode (0x0-0xC). - // This is currently what the ShortTimestampOpcodeHandler writes to the I_SHORT_TIMESTAMP_REF bytecode. This might - // not be correct behavior. If this is acceptable, this parameter should probably be renamed, since it isn't the - // actual opcode of the encoded timestamp. If this isn't, then ShortTimestampOpcodeHandler needs fixed. - // The justification for this behavior is that ShortTimestampOpcodeHandler already separates the low nibble of the - // opcode for use in a lookup table, so we might as well propagate that value to the bytecode instead of the full - // opcode - especially since, in its current implementation, ShortTimestampDecoder.readTimestamp() also uses the - // low nibble in a lookup table. override fun readShortTimestampReference(position: Int, opcode: Int): Timestamp { return ShortTimestampDecoder.readTimestamp(source, position, opcode) } @@ -97,9 +88,13 @@ internal class ByteArrayBytecodeGenerator11( override fun getGeneratorForMinorVersion(minorVersion: Int): BytecodeGenerator { return when (minorVersion) { - 1 -> ByteArrayBytecodeGenerator11(source, i) + 1 -> ByteArrayBytecodeGenerator11(source, currentPosition) // TODO: update with ByteArrayBytecodeGenerator10 once it implements BytecodeGenerator else -> throw IonException("Minor version $minorVersion not yet implemented for ByteArray-backed data sources.") } } + + private fun isSystemValue(opcode: Int): Boolean { + return opcode in 0xE0..0xE8 + } } diff --git a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt index 76fb252d4..623229ae5 100644 --- a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt +++ b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt @@ -100,8 +100,7 @@ object TextToBinaryUtils { } /** - * Helper function for generating FlexUInt hex strings from an unsigned integer. Useful for test - * cases that programmatically generate length-prefixed payloads. + * Prints the hex representation of an integer as a single byte, e.g. FF for -1 */ @JvmStatic fun Int.toSingleHexByte(): String { diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index d86d4b76a..77b7da732 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -72,8 +72,8 @@ class ByteArrayBytecodeGenerator11Test { } /** - * Concatenates all the tests for all supported opcodes together into a single test string. This tests the REFILL - * behavior and validates that reference instructions that appear in the middle of the input are handled correctly. + * Concatenates all the tests for all supported non-system opcodes together into a single test string. This + * validates that reference instructions that appear in the middle of the input are handled correctly. */ @Test fun `generator produces correct bytecode for sequence of all supported opcodes`() { @@ -97,8 +97,7 @@ class ByteArrayBytecodeGenerator11Test { stringReferenceOpcodeCases() + lobReferenceOpcodeCases() - // Build up the input bytes and expected bytecode from the individual opcode tests. Each compiled top-level - // value will be separated by I_REFILL. + // Build up the input bytes and expected bytecode from the individual opcode tests. var bytesRead = 0 opcodeTests.forEach { args -> val (inputBytesString: String, expectedBytecodeString) = args.get().map { it as String } @@ -106,13 +105,11 @@ class ByteArrayBytecodeGenerator11Test { inputData = inputData.plus(nextBytes) val nextBytecode = replacePositionTemplates(expectedBytecodeString, bytesRead) .decimalStringToIntArray() - expectedBytecode = expectedBytecode.plus(nextBytecode.plus(Instructions.I_REFILL)) bytesRead += nextBytes.size + expectedBytecode = expectedBytecode.plus(nextBytecode) } - // Replace the last REFILL added by the loop with an END_OF_INPUT. We don't want a refill followed by nothing - // and then EOF. - expectedBytecode[expectedBytecode.size - 1] = Instructions.I_END_OF_INPUT + expectedBytecode = expectedBytecode.plus(Instructions.I_END_OF_INPUT) val generator = ByteArrayBytecodeGenerator11(inputData, 0) val bytecodeBuffer = BytecodeBuffer() From b1ae1f441584a269c8422fa6df4ca1ffc33d15d1 Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 23 Oct 2025 12:04:37 -0700 Subject: [PATCH 09/22] Minor fixes --- .../ion/bytecode/bin11/OpcodeTestCases.kt | 35 +++++---- .../bin11/bytearray/PrimitiveDecoderTest.kt | 71 +++++++++++++++++-- .../bytearray/ShortTimestampDecoderTest.kt | 4 +- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt index e18eab9b6..3005f61af 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -146,7 +146,6 @@ object OpcodeTestCases { @JvmStatic fun float32OpcodeCases() = listOf( - // TODO: cross-check all this stuff one more time "6C 01 00 00 00, ${Instructions.I_FLOAT_F32} 1, 1.4012984643e-45", // smallest positive subnormal number "6C FF FF 7F 00, ${Instructions.I_FLOAT_F32} 8388607, 1.1754942107e-38", // largest subnormal number "6C 00 00 80 00, ${Instructions.I_FLOAT_F32} 8388608, 1.1754943508e-38", // smallest positive normal number @@ -352,9 +351,9 @@ object OpcodeTestCases { @JvmStatic fun int8OpcodeCases() = listOf( - "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}, 50", // 1-byte positive + "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}, 50", // 1-byte positive "61 97, ${Instructions.I_INT_I16.packInstructionData(-105)}, -105", // 1-byte negative - "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}, 127", // max value + "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}, 127", // max value "61 80, ${Instructions.I_INT_I16.packInstructionData(-128)}, -128", // min value ).toArguments() @@ -362,13 +361,13 @@ object OpcodeTestCases { @JvmStatic fun int16OpcodeCases() = listOf( - "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}, 29478", // 2-byte positive - "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}, -944", // 2-byte negative - "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 2-byte overlong 0 - "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}, -1", // 2-byte overlong -1 - "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}, 128", // min positive - "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}, -129", // max negative - "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}, 32767", // max value + "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}, 29478", // 2-byte positive + "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}, -944", // 2-byte negative + "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 2-byte overlong 0 + "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}, -1", // 2-byte overlong -1 + "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}, 128", // min positive + "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}, -129", // max negative + "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}, 32767", // max value "62 00 80, ${Instructions.I_INT_I16.packInstructionData(-32768)}, -32768", // min value ).toArguments() @@ -376,11 +375,11 @@ object OpcodeTestCases { @JvmStatic fun int24OpcodeCases() = listOf( - "63 40 42 0F, ${Instructions.I_INT_I32} 1000000, 1000000", // 3-byte positive + "63 40 42 0F, ${Instructions.I_INT_I32} 1000000, 1000000", // 3-byte positive "63 4F 34 8B, ${Instructions.I_INT_I32} -7654321, -7654321", // 3-byte negative - "63 00 80 00, ${Instructions.I_INT_I32} 32768, 32768", // min positive, length boundary from i16 - "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607, 8388607", // max value - "63 FF 7F FF, ${Instructions.I_INT_I32} -32769, -32769", // max negative, length boundary from i16 + "63 00 80 00, ${Instructions.I_INT_I32} 32768, 32768", // min positive, length boundary from i16 + "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607, 8388607", // max value + "63 FF 7F FF, ${Instructions.I_INT_I32} -32769, -32769", // max negative, length boundary from i16 "63 00 00 80, ${Instructions.I_INT_I32} -8388608, -8388608", // min value ).toArguments() @@ -388,13 +387,13 @@ object OpcodeTestCases { @JvmStatic fun int32OpcodeCases() = listOf( - "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827, 2118304827", // 4-byte positive + "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827, 2118304827", // 4-byte positive "64 57 97 13 E9, ${Instructions.I_INT_I32} -384592041, -384592041", // 4-byte negative - "64 00 00 00 00, ${Instructions.I_INT_I32} 0, 0", // 4-byte overlong 0 + "64 00 00 00 00, ${Instructions.I_INT_I32} 0, 0", // 4-byte overlong 0 "64 FF FF FF FF, ${Instructions.I_INT_I32} -1, -1", // 4-byte overlong -1 - "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608, 8388608", // length boundary - "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}, ${Int.MAX_VALUE}", // max value + "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608, 8388608", // length boundary "64 FF FF 7F FF, ${Instructions.I_INT_I32} -8388609, -8388609", // length boundary + "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}, ${Int.MAX_VALUE}", // max value "64 00 00 00 80, ${Instructions.I_INT_I32} ${Int.MIN_VALUE}, ${Int.MIN_VALUE}", // min value ).toArguments() diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt index 7ffd79349..6104110dc 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt @@ -16,6 +16,7 @@ import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_INT_READ_WRITE_CASES import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_UINT_READ_ONLY_CASES import com.amazon.ion.PrimitiveTestCases_1_1.FLEX_UINT_READ_WRITE_CASES import com.amazon.ion.TextToBinaryUtils.binaryStringToByteArray +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.lengthOfFlexIntOrUIntAt import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt24AsInt @@ -67,11 +68,63 @@ class PrimitiveDecoderTest { } @ParameterizedTest - @MethodSource(FIXED_INT_64_CASES) - fun testReadFixedInt64(expected: Long, bits: String) { - val data = bits.binaryStringToByteArray() - val actual = PrimitiveDecoder.readFixedInt64(data, 0) - assertEquals(expected, actual) + @CsvSource( + " 64, 1, 40", + " 3257, 2, B9 0C", + " -3257, 2, 47 F3", + " 78, 1, 4E", + " -6407, 2, F9 E6", + " 0, 1, 00", + " 1, 1, 01", + " 2, 1, 02", + " 3, 1, 03", + " 4, 1, 04", + " 5, 1, 05", + " 14, 1, 0E", + " 127, 1, 7F", + " 128, 2, 80 00", // length boundary + " 729, 2, D9 02", + " 32767, 2, FF 7F", + " 32768, 3, 00 80 00", // length boundary + " 8388607, 3, FF FF 7F", + " 8388608, 4, 00 00 80 00", // length boundary + " ${Int.MAX_VALUE}, 4, FF FF FF 7F", + " 2147483648, 5, 00 00 00 80 00", // length boundary + " 549755813887, 5, FF FF FF FF 7F", + " 549755813888, 6, 00 00 00 00 80 00", // length boundary + " 140737488355327, 6, FF FF FF FF FF 7F", + " 140737488355328, 7, 00 00 00 00 00 80 00", // length boundary + " 36028797018963967, 7, FF FF FF FF FF FF 7F", + " 36028797018963968, 8, 00 00 00 00 00 00 80 00", // length boundary + " ${Long.MAX_VALUE}, 8, FF FF FF FF FF FF FF 7F", + + " -1, 1, FF", + " -2, 1, FE", + " -3, 1, FD", + " -14, 1, F2", + " -128, 1, 80", + " -129, 2, 7F FF", // length boundary + " -729, 2, 27 FD", + " -32768, 2, 00 80", + " -32769, 3, FF 7F FF", // length boundary + " -8388608, 3, 00 00 80", + " -8388609, 4, FF FF 7F FF", // length boundary + " ${Int.MIN_VALUE}, 4, 00 00 00 80", + " -2147483649, 5, FF FF FF 7F FF", // length boundary + " -549755813888, 5, 00 00 00 00 80", + " -549755813889, 6, FF FF FF FF 7F FF", // length boundary + " -140737488355328, 6, 00 00 00 00 00 80", + " -140737488355329, 7, FF FF FF FF FF 7F FF", // length boundary + " -36028797018963968, 7, 00 00 00 00 00 00 80", + " -36028797018963969, 8, FF FF FF FF FF FF 7F FF", // length boundary + " ${Long.MIN_VALUE}, 8, 00 00 00 00 00 00 00 80", + ) + fun testReadFixedIntAsLong(expectedValue: Long, length: Int, input: String) { + val data = if (input.all { it == '0' || it == '1' }) input.binaryStringToByteArray() else input.hexStringToByteArray() + + val value = readFixedIntAsLong(data, 0, data.size) + + assertEquals(expectedValue, value) } @ParameterizedTest @@ -100,6 +153,14 @@ class PrimitiveDecoderTest { assertEquals(expectedValue, value) } + @ParameterizedTest + @MethodSource(FIXED_INT_64_CASES) + fun testReadFixedInt64(expected: Long, bits: String) { + val data = bits.binaryStringToByteArray() + val actual = PrimitiveDecoder.readFixedInt64(data, 0) + assertEquals(expected, actual) + } + @ParameterizedTest @MethodSource(FIXED_INT_8_CASES, FIXED_INT_16_CASES, FIXED_INT_24_CASES, FIXED_INT_32_CASES, FIXED_INT_64_CASES) @CsvSource( diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt index 6570adc7c..41346bd05 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt @@ -28,8 +28,8 @@ class ShortTimestampDecoderTest { "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", ) - fun `short timestamps are decoded correctly`(bytecode: String, expectedValue: String) { - val data = bytecode.hexStringToByteArray() + fun `short timestamps are decoded correctly`(input: String, expectedValue: String) { + val data = input.hexStringToByteArray() val opcode = data[0].unsignedToInt() val timestamp = ShortTimestampDecoder.readTimestamp(data, 1, opcode and 0xF) val expectedTimestamp = Timestamp.valueOf(expectedValue.trim()) From bdda6f3a0a5b9389d4030a701f997b8c9a270883 Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 23 Oct 2025 13:39:34 -0700 Subject: [PATCH 10/22] Remove unnecessary test cases causing OOM errors --- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 73 ------------------- .../ion/bytecode/bin11/OpcodeTestCases.kt | 10 ++- 2 files changed, 7 insertions(+), 76 deletions(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 77b7da732..2b1801a13 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -5,7 +5,6 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES @@ -24,29 +23,12 @@ import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.STRING_REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.booleanOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float0OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float16OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float32OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.float64OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int0OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int16OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int32OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int64EmittingOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.int8OpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.lobReferenceOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.nullOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.referenceOpcodeCases import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.shortTimestampOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.stringReferenceOpcodeCases -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.typedNullOpcodeCases import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.util.BytecodeBuffer import com.amazon.ion.bytecode.util.ConstantPool import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -71,61 +53,6 @@ class ByteArrayBytecodeGenerator11Test { ) } - /** - * Concatenates all the tests for all supported non-system opcodes together into a single test string. This - * validates that reference instructions that appear in the middle of the input are handled correctly. - */ - @Test - fun `generator produces correct bytecode for sequence of all supported opcodes`() { - var inputData = byteArrayOf() - var expectedBytecode = intArrayOf() - - val opcodeTests = booleanOpcodeCases() + - nullOpcodeCases() + - typedNullOpcodeCases() + - float0OpcodeCases() + - float16OpcodeCases() + - float32OpcodeCases() + - float64OpcodeCases() + - shortTimestampOpcodeCases() + - referenceOpcodeCases() + - int0OpcodeCases() + - int8OpcodeCases() + - int16OpcodeCases() + - int32OpcodeCases() + - int64EmittingOpcodeCases() + - stringReferenceOpcodeCases() + - lobReferenceOpcodeCases() - - // Build up the input bytes and expected bytecode from the individual opcode tests. - var bytesRead = 0 - opcodeTests.forEach { args -> - val (inputBytesString: String, expectedBytecodeString) = args.get().map { it as String } - val nextBytes = inputBytesString.hexStringToByteArray() - inputData = inputData.plus(nextBytes) - val nextBytecode = replacePositionTemplates(expectedBytecodeString, bytesRead) - .decimalStringToIntArray() - bytesRead += nextBytes.size - expectedBytecode = expectedBytecode.plus(nextBytecode) - } - - expectedBytecode = expectedBytecode.plus(Instructions.I_END_OF_INPUT) - - val generator = ByteArrayBytecodeGenerator11(inputData, 0) - val bytecodeBuffer = BytecodeBuffer() - val constantPool = ConstantPool() - val macroSrc = intArrayOf() - val macroIndices = intArrayOf() - val symbolTable = arrayOf() - var isEOF: Boolean - do { - generator.refill(bytecodeBuffer, constantPool, macroSrc, macroIndices, symbolTable) - isEOF = bytecodeBuffer.get(bytecodeBuffer.size() - 1) == Instructions.I_END_OF_INPUT - } while (!isEOF) - - assertEqualBytecode(expectedBytecode, bytecodeBuffer.toArray()) - } - @ParameterizedTest @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) fun `generator can read short timestamp references`(encodedTimestampBytes: String, expectedBytecodeString: String, expectedTimestampString: String) { diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt index 3005f61af..f5e7c59e9 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -258,7 +258,7 @@ object OpcodeTestCases { "04 00 02, 16384, 4", "FC FF FF, 2097151, 4", "08 00 00 02, 2097152, 5", - "F8 FF FF 03, 4194303, 5", // maximum length of a payload + // Testing up to max length causes OOM errors on java 8 "01, 0, 2", // zero-length payload TODO: is this legal? "00 18 00 00 00 00 00 00 00 00 00 00, 1, 13", // overlong encoding on the FlexUInt ) @@ -270,11 +270,15 @@ object OpcodeTestCases { val expectedPayloadStartPosition = expectedPayloadStartPosStr.trim().toInt() val expectedBytecodeString = "${instruction.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" + val inputBytesSB = StringBuilder(payloadLength * 3 + flexUIntStr.length + 4) + inputBytesSB.append("${opcode.toSingleHexByte()} $flexUIntStr ") // Create a dummy payload for this value with all bytes set to zeros. // Not actually looked at by this test, but simulates an encoded value the handler would actually // encounter during parsing. - val payload = "00 ".repeat(payloadLength) - val inputBytes = "${opcode.toString(16).uppercase().padStart(2, '0')} $flexUIntStr $payload" + for (i in 0 until payloadLength) { + inputBytesSB.append("00 ") + } + val inputBytes = inputBytesSB.toString() arguments.add(Arguments.of(inputBytes, expectedBytecodeString)) } } From bcdf13362a5c3378519b8538d47e95f8bd266675 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Mon, 27 Oct 2025 09:44:08 -0700 Subject: [PATCH 11/22] Apply suggestions from code review Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt | 2 ++ .../amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt | 2 ++ .../ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt index fa82208c6..9aa419dfe 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt @@ -11,6 +11,8 @@ import java.math.BigDecimal /** * Helper class for decoding the various short timestamp encoding variants from a [ByteArray]. + * + * TODO(perf): avoid auto-boxing the `0` integer for the offset when constructing the Timestamp instance. */ internal object ShortTimestampDecoder { private const val MASK_4 = 0b1111 diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt index 6104110dc..274b8f95f 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/PrimitiveDecoderTest.kt @@ -166,6 +166,8 @@ class PrimitiveDecoderTest { @CsvSource( " 9223372036854775808, 00000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000 00000000", "-9223372036854775809, 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111 11111111", + " 1, 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000", + " -1, 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111", ) fun testReadFixedIntAsBigInteger(expectedValue: BigInteger, input: String) { val data = input.binaryStringToByteArray() diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt index 4d79656f4..e8717a275 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt @@ -32,7 +32,7 @@ internal class ReferenceOpcodeHandlerTest { OpCode.VARIABLE_LENGTH_SYMBOL -> ReferenceOpcodeHandler(Instructions.I_SYMBOL_REF) OpCode.VARIABLE_LENGTH_BLOB -> ReferenceOpcodeHandler(Instructions.I_BLOB_REF) OpCode.VARIABLE_LENGTH_CLOB -> ReferenceOpcodeHandler(Instructions.I_CLOB_REF) - else -> fail("Opcode is not a variable-length reference opcode: 0x${opcode.toSingleHexByte()}") + else -> fail("Opcode is not a variable-length reference opcode: 0x${opcode.toByte().toHexString()}") } handler.shouldCompile(input, bytecode) } From 5e1983a555343b64d37a83b66d3e4b05860c8be4 Mon Sep 17 00:00:00 2001 From: austnwil Date: Mon, 27 Oct 2025 10:48:45 -0700 Subject: [PATCH 12/22] Implement more PR suggestions --- .../amazon/ion/bytecode/BytecodeEmitter.kt | 14 +- .../bin11/ByteArrayBytecodeGenerator11.kt | 10 +- .../bin11/bytearray/ShortTimestampDecoder.kt | 223 ------------------ .../bytearray/ShortTimestampOpcodeHandler.kt | 4 +- .../bin11/bytearray/TimestampDecoder.kt | 220 +++++++++++++++++ .../ion/impl/bin/Ion_1_1_Constants.java | 42 ++-- .../java/com/amazon/ion/TextToBinaryUtils.kt | 18 +- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 42 ++-- .../ion/bytecode/bin11/OpcodeTestCases.kt | 34 +-- .../bytearray/ReferenceOpcodeHandlerTest.kt | 2 +- ...DecoderTest.kt => TimestampDecoderTest.kt} | 4 +- 11 files changed, 297 insertions(+), 316 deletions(-) delete mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt create mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt rename src/test/java/com/amazon/ion/bytecode/bin11/bytearray/{ShortTimestampDecoderTest.kt => TimestampDecoderTest.kt} (93%) diff --git a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt index 7297fb78b..4b75f7ca5 100644 --- a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt +++ b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt @@ -84,17 +84,7 @@ internal object BytecodeEmitter { } @JvmStatic - fun emitShortTimestampReference(destination: BytecodeBuffer, precisionAndOffsetMode: Int, dataPosition: Int) { - destination.add2(Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(precisionAndOffsetMode), dataPosition) - } - - @JvmStatic - fun emitRefill(destination: BytecodeBuffer) { - destination.add(Instructions.I_REFILL) - } - - @JvmStatic - fun emitEndOfInput(destination: BytecodeBuffer) { - destination.add(Instructions.I_END_OF_INPUT) + fun emitShortTimestampReference(destination: BytecodeBuffer, opcode: Int, dataPosition: Int) { + destination.add2(Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(opcode), dataPosition) } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index bd47aa117..af93deb58 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -5,11 +5,11 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.Decimal import com.amazon.ion.IonException import com.amazon.ion.Timestamp -import com.amazon.ion.bytecode.BytecodeEmitter import com.amazon.ion.bytecode.BytecodeGenerator import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTable import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsBigInteger -import com.amazon.ion.bytecode.bin11.bytearray.ShortTimestampDecoder +import com.amazon.ion.bytecode.bin11.bytearray.TimestampDecoder +import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.util.AppendableConstantPoolView import com.amazon.ion.bytecode.util.ByteSlice import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -51,9 +51,9 @@ internal class ByteArrayBytecodeGenerator11( } if (currentPosition < source.size) { - BytecodeEmitter.emitRefill(destination) + destination.add(Instructions.I_REFILL) } else { - BytecodeEmitter.emitEndOfInput(destination) + destination.add(Instructions.I_END_OF_INPUT) } } @@ -66,7 +66,7 @@ internal class ByteArrayBytecodeGenerator11( } override fun readShortTimestampReference(position: Int, opcode: Int): Timestamp { - return ShortTimestampDecoder.readTimestamp(source, position, opcode) + return TimestampDecoder.readShortTimestamp(source, position, opcode) } override fun readTimestampReference(position: Int, length: Int): Timestamp { diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt deleted file mode 100644 index 9aa419dfe..000000000 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoder.kt +++ /dev/null @@ -1,223 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -package com.amazon.ion.bytecode.bin11.bytearray - -import com.amazon.ion.Timestamp -import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 -import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 -import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort -import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong -import java.math.BigDecimal - -/** - * Helper class for decoding the various short timestamp encoding variants from a [ByteArray]. - * - * TODO(perf): avoid auto-boxing the `0` integer for the offset when constructing the Timestamp instance. - */ -internal object ShortTimestampDecoder { - private const val MASK_4 = 0b1111 - private const val MASK_5 = 0b11111 - private const val MASK_6 = 0b111111 - private const val MASK_7 = 0b1111111 - private const val MASK_4L = 0b1111L - private const val MASK_5L = 0b11111L - private const val MASK_6L = 0b111111L - private const val MASK_7L = 0b1111111L - private const val MASK_10L = 0b1111111111L - private const val MASK_20L = 0b11111111111111111111L - private const val MASK_30L = 0b111111111111111111111111111111L - private const val MASK_UTC_OR_UNKNOWN_BIT = 0b1000_00000000_00000000_00000000 - private const val MASK_UTC_OR_UNKNOWN_BITL = 0b1000_00000000_00000000_00000000L - - private val opcodeToDecoderFunctionTable = arrayOf( - ShortTimestampDecoder::readTimestampToYear, - ShortTimestampDecoder::readTimestampToMonth, - ShortTimestampDecoder::readTimestampToDay, - ShortTimestampDecoder::readTimestampToMinuteUTCOrUnknown, - ShortTimestampDecoder::readTimestampToSecondUTCOrUnknown, - ShortTimestampDecoder::readTimestampToMillisecondUTCOrUnknown, - ShortTimestampDecoder::readTimestampToMicrosecondUTCOrUnknown, - ShortTimestampDecoder::readTimestampToNanosecondUTCOrUnknown, - ShortTimestampDecoder::readTimestampToMinuteWithOffset, - ShortTimestampDecoder::readTimestampToSecondWithOffset, - ShortTimestampDecoder::readTimestampToMillisecondWithOffset, - ShortTimestampDecoder::readTimestampToMicrosecondWithOffset, - ShortTimestampDecoder::readTimestampToNanosecondWithOffset, - ) - - fun readTimestampToYear(source: ByteArray, position: Int): Timestamp { - val year = readFixedInt8AsShort(source, position).toInt() - return Timestamp.forYear(year + 1970) - } - - fun readTimestampToMonth(source: ByteArray, position: Int): Timestamp { - val yearAndMonth = readFixedInt16(source, position).toInt() - val year = yearAndMonth.and(MASK_7) - val month = yearAndMonth.shr(7) - - return Timestamp.forMonth(year + 1970, month) - } - - fun readTimestampToDay(source: ByteArray, position: Int): Timestamp { - val yearMonthAndDay = readFixedInt16(source, position).toInt() - val year = yearMonthAndDay.and(MASK_7) - val month = yearMonthAndDay.shr(7).and(MASK_4) - val day = yearMonthAndDay.shr(11) - - return Timestamp.forDay(year + 1970, month, day) - } - - fun readTimestampToMinuteUTCOrUnknown(source: ByteArray, position: Int): Timestamp { - val data = readFixedInt32(source, position) - val year = data.and(MASK_7) - val month = data.shr(7).and(MASK_4) - val day = data.shr(11).and(MASK_5) - val hour = data.shr(16).and(MASK_5) - val minute = data.shr(21).and(MASK_6) - val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BIT) != 0 - - return Timestamp.forMinute(year + 1970, month, day, hour, minute, if (isUTC) 0 else null) - } - - fun readTimestampToSecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 5) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val second = data.shr(28).and(MASK_6L).toInt() - val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L - - return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, if (isUTC) 0 else null) - } - - fun readTimestampToMillisecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 6) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val second = data.shr(28).and(MASK_6L) - val fractionalSecond = data.shr(34).and(MASK_10L) - val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) - } - - fun readTimestampToMicrosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 7) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val second = data.shr(28).and(MASK_6L) - val fractionalSecond = data.shr(34).and(MASK_20L) - val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) - } - - fun readTimestampToNanosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 8) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val second = data.shr(28).and(MASK_6L) - val fractionalSecond = data.ushr(34).and(MASK_30L) - val isUTC = data.and(MASK_UTC_OR_UNKNOWN_BITL) != 0L - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) - } - - fun readTimestampToMinuteWithOffset(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 5) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val offset = data.shr(27).and(MASK_7L).toInt() - - return Timestamp.forMinute(year + 1970, month, day, hour, minute, (offset - 56) * 15) - } - - fun readTimestampToSecondWithOffset(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 5) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val offset = data.shr(27).and(MASK_7L).toInt() - val second = data.shr(34).and(MASK_6L).toInt() - - return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, (offset - 56) * 15) - } - - fun readTimestampToMillisecondWithOffset(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 7) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val offset = data.shr(27).and(MASK_7L).toInt() - val second = data.shr(34).and(MASK_6L) - val fractionalSecond = data.shr(40).and(MASK_10L) - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) - } - - fun readTimestampToMicrosecondWithOffset(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 8) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val offset = data.shr(27).and(MASK_7L).toInt() - val second = data.shr(34).and(MASK_6L) - val fractionalSecond = data.shr(40).and(MASK_20L) - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) - } - - fun readTimestampToNanosecondWithOffset(source: ByteArray, position: Int): Timestamp { - val data = readFixedIntAsLong(source, position, 8) - val highFractionalSecondByte = readFixedInt8AsShort(source, position + 8).toLong().and(MASK_6L) - val year = data.and(MASK_7L).toInt() - val month = data.shr(7).and(MASK_4L).toInt() - val day = data.shr(11).and(MASK_5L).toInt() - val hour = data.shr(16).and(MASK_5L).toInt() - val minute = data.shr(21).and(MASK_6L).toInt() - val offset = data.shr(27).and(MASK_7L).toInt() - val second = data.shr(34).and(MASK_6L) - val fractionalSecond = data.ushr(40).or(highFractionalSecondByte.shl(24)) - - val secondBigDecimal = BigDecimal.valueOf(second) - val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) - return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) - } - - fun readTimestamp(source: ByteArray, position: Int, precisionAndOffsetMode: Int): Timestamp { - // TODO: calling function references like this might be slower than just using a conditional or other solutions. - // Might be worth looking into. - val decoder = opcodeToDecoderFunctionTable[precisionAndOffsetMode] - return decoder(source, position) - } -} diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandler.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandler.kt index 3d27afe8f..d75e9239b 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandler.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandler.kt @@ -40,12 +40,12 @@ internal object ShortTimestampOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - val precisionAndOffsetMode = opcode and 0xF BytecodeEmitter.emitShortTimestampReference( destination, - precisionAndOffsetMode, + opcode, position ) + val precisionAndOffsetMode = opcode and 0xF return serializedSizeByOpcodeTable[precisionAndOffsetMode] } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt new file mode 100644 index 000000000..62f2ab848 --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt @@ -0,0 +1,220 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.IonException +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong +import com.amazon.ion.impl.bin.Ion_1_1_Constants.* +import java.math.BigDecimal + +/** + * Helper class for decoding the various timestamp encoding variants from a [ByteArray]. + * + * TODO(perf): avoid auto-boxing the `0` integer for the offset when constructing the Timestamp instance. + */ +internal object TimestampDecoder { + private const val MASK_4 = 0b1111 + private const val MASK_5 = 0b11111 + private const val MASK_6 = 0b111111 + private const val MASK_7 = 0b1111111 + private const val MASK_4L = 0b1111L + private const val MASK_5L = 0b11111L + private const val MASK_6L = 0b111111L + private const val MASK_7L = 0b1111111L + private const val MASK_10L = 0b1111111111L + private const val MASK_20L = 0b11111111111111111111L + private const val MASK_30L = 0b111111111111111111111111111111L + + fun readTimestampToYear(source: ByteArray, position: Int): Timestamp { + val year = readFixedInt8AsShort(source, position).toInt() + return Timestamp.forYear(year + 1970) + } + + fun readTimestampToMonth(source: ByteArray, position: Int): Timestamp { + val yearAndMonth = readFixedInt16(source, position).toInt() + val year = yearAndMonth.and(MASK_7) + val month = yearAndMonth.shr(S_TIMESTAMP_MONTH_BIT_OFFSET) + + return Timestamp.forMonth(year + 1970, month) + } + + fun readTimestampToDay(source: ByteArray, position: Int): Timestamp { + val yearMonthAndDay = readFixedInt16(source, position).toInt() + val year = yearMonthAndDay.and(MASK_7) + val month = yearMonthAndDay.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4) + val day = yearMonthAndDay.shr(S_TIMESTAMP_DAY_BIT_OFFSET) + + return Timestamp.forDay(year + 1970, month, day) + } + + fun readTimestampToMinuteUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedInt32(source, position) + val year = data.and(MASK_7) + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4) + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5) + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5) + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6) + val isUTC = data.and(S_U_TIMESTAMP_UTC_FLAG) != 0 + + return Timestamp.forMinute(year + 1970, month, day, hour, minute, if (isUTC) 0 else null) + } + + fun readTimestampToSecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val second = data.shr(S_U_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L).toInt() + val isUTC = data.and(S_U_TIMESTAMP_UTC_FLAG_L) != 0L + + return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, if (isUTC) 0 else null) + } + + fun readTimestampToMillisecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 6) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val second = data.shr(S_U_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.shr(S_U_TIMESTAMP_FRACTION_BIT_OFFSET).and(MASK_10L) + val isUTC = data.and(S_U_TIMESTAMP_UTC_FLAG_L) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToMicrosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 7) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val second = data.shr(S_U_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.shr(S_U_TIMESTAMP_FRACTION_BIT_OFFSET).and(MASK_20L) + val isUTC = data.and(S_U_TIMESTAMP_UTC_FLAG_L) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToNanosecondUTCOrUnknown(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val second = data.shr(S_U_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.ushr(S_U_TIMESTAMP_FRACTION_BIT_OFFSET).and(MASK_30L) + val isUTC = data.and(S_U_TIMESTAMP_UTC_FLAG_L) != 0L + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), if (isUTC) 0 else null) + } + + fun readTimestampToMinuteWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val offset = data.shr(S_O_TIMESTAMP_OFFSET_BIT_OFFSET).and(MASK_7L).toInt() + + return Timestamp.forMinute(year + 1970, month, day, hour, minute, (offset - 56) * 15) + } + + fun readTimestampToSecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 5) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val offset = data.shr(S_O_TIMESTAMP_OFFSET_BIT_OFFSET).and(MASK_7L).toInt() + val second = data.shr(S_O_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L).toInt() + + return Timestamp.forSecond(year + 1970, month, day, hour, minute, second, (offset - 56) * 15) + } + + fun readTimestampToMillisecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 7) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val offset = data.shr(S_O_TIMESTAMP_OFFSET_BIT_OFFSET).and(MASK_7L).toInt() + val second = data.shr(S_O_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.shr(S_O_TIMESTAMP_FRACTION_BIT_OFFSET).and(MASK_10L) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 3) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + fun readTimestampToMicrosecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val offset = data.shr(S_O_TIMESTAMP_OFFSET_BIT_OFFSET).and(MASK_7L).toInt() + val second = data.shr(S_O_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.shr(S_O_TIMESTAMP_FRACTION_BIT_OFFSET).and(MASK_20L) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 6) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + fun readTimestampToNanosecondWithOffset(source: ByteArray, position: Int): Timestamp { + val data = readFixedIntAsLong(source, position, 8) + val highFractionalSecondByte = readFixedInt8AsShort(source, position + 8).toLong().and(MASK_6L) + val year = data.and(MASK_7L).toInt() + val month = data.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4L).toInt() + val day = data.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5L).toInt() + val hour = data.shr(S_TIMESTAMP_HOUR_BIT_OFFSET).and(MASK_5L).toInt() + val minute = data.shr(S_TIMESTAMP_MINUTE_BIT_OFFSET).and(MASK_6L).toInt() + val offset = data.shr(S_O_TIMESTAMP_OFFSET_BIT_OFFSET).and(MASK_7L).toInt() + val second = data.shr(S_O_TIMESTAMP_SECOND_BIT_OFFSET).and(MASK_6L) + val fractionalSecond = data.ushr(S_O_TIMESTAMP_FRACTION_BIT_OFFSET).or(highFractionalSecondByte.shl(24)) + + val secondBigDecimal = BigDecimal.valueOf(second) + val fractionalSecondBigDecimal = BigDecimal.valueOf(fractionalSecond, 9) + return Timestamp.forSecond(year + 1970, month, day, hour, minute, secondBigDecimal.add(fractionalSecondBigDecimal), (offset - 56) * 15) + } + + @OptIn(ExperimentalStdlibApi::class) // for Byte.toHexString() + fun readShortTimestamp(source: ByteArray, position: Int, opcode: Int): Timestamp { + return when (opcode) { + 0x80 -> readTimestampToYear(source, position) + 0x81 -> readTimestampToMonth(source, position) + 0x82 -> readTimestampToDay(source, position) + 0x83 -> readTimestampToMinuteUTCOrUnknown(source, position) + 0x84 -> readTimestampToSecondUTCOrUnknown(source, position) + 0x85 -> readTimestampToMillisecondUTCOrUnknown(source, position) + 0x86 -> readTimestampToMicrosecondUTCOrUnknown(source, position) + 0x87 -> readTimestampToNanosecondUTCOrUnknown(source, position) + 0x88 -> readTimestampToMinuteWithOffset(source, position) + 0x89 -> readTimestampToSecondWithOffset(source, position) + 0x8a -> readTimestampToMillisecondWithOffset(source, position) + 0x8b -> readTimestampToMicrosecondWithOffset(source, position) + 0x8c -> readTimestampToNanosecondWithOffset(source, position) + else -> throw IonException("Unrecognized short timestamp opcode ${opcode.toByte().toHexString()}") + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index 8df501b34..0d360b6ea 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -1,3 +1,5 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; /** @@ -6,34 +8,36 @@ public class Ion_1_1_Constants { private Ion_1_1_Constants() {} - static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; - static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; + public static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; + public static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; //////// Timestamp Field Constants //////// // S_TIMESTAMP_* is applicable to all short-form timestamps - static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; - static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; - static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; - static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; + public static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; + public static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; + public static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; + public static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; // S_U_TIMESTAMP_* is applicable to all short-form timestamps with a `U` bit - static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; - static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; - static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; + public static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; + public static final long S_U_TIMESTAMP_UTC_FLAG_L = S_U_TIMESTAMP_UTC_FLAG; + public static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; + public static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; // S_O_TIMESTAMP_* is applicable to all short-form timestamps with `o` (offset) bits - static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; - static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; + public static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_FRACTION_BIT_OFFSET = 40; // L_TIMESTAMP_* is applicable to all long-form timestamps - static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; - static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; - static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; - static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; - static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; - static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; - static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; + public static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; + public static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; + public static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; + public static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; + public static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; + public static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; + public static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; //////// Bit masks //////// - static final long LEAST_SIGNIFICANT_7_BITS = 0b01111111L; + public static final long LEAST_SIGNIFICANT_7_BITS = 0b01111111L; } diff --git a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt index 623229ae5..b530bc420 100644 --- a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt +++ b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt @@ -86,9 +86,17 @@ object TextToBinaryUtils { * Converts a byte array to a string of hex bytes, such as "A5 0F EC 52". * The purpose of this method is to make it easier to read and write test assertions. */ + @OptIn(ExperimentalStdlibApi::class) @JvmStatic fun ByteArray.byteArrayToHexString(): String { - return this.joinToString(" ") { it.toUByte().toString(16).padStart(2, '0') } + return this.toHexString( + HexFormat { + upperCase = true + bytes { + byteSeparator = " " + } + } + ) } /** @@ -98,12 +106,4 @@ object TextToBinaryUtils { fun String.decimalStringToIntArray(): IntArray { return octetStringToIntArray(this, 10) } - - /** - * Prints the hex representation of an integer as a single byte, e.g. FF for -1 - */ - @JvmStatic - fun Int.toSingleHexByte(): String { - return this.toUByte().toString(16).padStart(2, '0') - } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 2b1801a13..a4ce25a07 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -24,9 +24,8 @@ import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASE import com.amazon.ion.bytecode.bin11.OpcodeTestCases.STRING_REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates +import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest @@ -56,44 +55,35 @@ class ByteArrayBytecodeGenerator11Test { @ParameterizedTest @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) fun `generator can read short timestamp references`(encodedTimestampBytes: String, expectedBytecodeString: String, expectedTimestampString: String) { - val timestampReferenceBytes = encodedTimestampBytes.hexStringToByteArray() - val generator = ByteArrayBytecodeGenerator11(timestampReferenceBytes, 0) - val bytecode = BytecodeBuffer() - generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) - - val timestampPrecisionAndOffsetMode = Instructions.getData(bytecode.get(0)) - val timestampPosition = bytecode.get(1) + val bytes = encodedTimestampBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(bytes, 0) + val opcode = bytes[0].toInt().and(0xFF) val expectedTimestamp = Timestamp.valueOf(expectedTimestampString) - val readTimestamp = generator.readShortTimestampReference(timestampPosition, timestampPrecisionAndOffsetMode) + val readTimestamp = generator.readShortTimestampReference(1, opcode) assertEquals(expectedTimestamp, readTimestamp) } @ParameterizedTest @MethodSource(STRING_REFERENCE_OPCODE_CASES) fun `generator can read string references`(encodedStringBytes: String, expectedBytecodeString: String, expectedString: String) { - val stringReferenceBytes = encodedStringBytes.hexStringToByteArray() - val generator = ByteArrayBytecodeGenerator11(stringReferenceBytes, 0) - val bytecode = BytecodeBuffer() - generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) - - val stringLength = Instructions.getData(bytecode.get(0)) - val stringPosition = bytecode.get(1) - val readString = generator.readTextReference(stringPosition, stringLength) + val bytes = encodedStringBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(bytes, 0) + // Size of input minus the opcode and FlexUInt length prefix + val length = bytes.size - PrimitiveDecoder.lengthOfFlexIntOrUIntAt(bytes, 1) - 1 + val position = bytes.size - length + val readString = generator.readTextReference(position, length) assertEquals(expectedString, readString) } @ParameterizedTest @MethodSource(LOB_REFERENCE_OPCODE_CASES) fun `generator can read lob references`(encodedLobBytes: String, expectedBytecodeString: String, expectedLobBytes: String) { - val lobReferenceBytes = encodedLobBytes.hexStringToByteArray() - val generator = ByteArrayBytecodeGenerator11(lobReferenceBytes, 0) - val bytecode = BytecodeBuffer() - generator.refill(bytecode, ConstantPool(), intArrayOf(), intArrayOf(), arrayOf()) - - val lobLength = Instructions.getData(bytecode.get(0)) - val lobPosition = bytecode.get(1) + val bytes = encodedLobBytes.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(bytes, 0) + val length = bytes.size - PrimitiveDecoder.lengthOfFlexIntOrUIntAt(bytes, 1) - 1 + val position = bytes.size - length val expectedLob = expectedLobBytes.hexStringToByteArray() - val readLob = generator.readBytesReference(lobPosition, lobLength).newByteArray() + val readLob = generator.readBytesReference(position, length).newByteArray() assertArrayEquals(expectedLob, readLob) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt index f5e7c59e9..56c5429cd 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt @@ -4,7 +4,6 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.TextToBinaryUtils.byteArrayToHexString import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.TextToBinaryUtils.toSingleHexByte import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.ir.Instructions.packInstructionData import com.amazon.ion.impl.bin.PrimitiveEncoder @@ -26,6 +25,7 @@ import java.nio.charset.StandardCharsets * sensitive to the opcode's position in the input (e.g. `OP_*_REF` codes) to be reused across test cases that use them * at different offsets. Pass the decimal string to [replacePositionTemplates] to parse these placeholders. */ +@OptIn(ExperimentalStdlibApi::class) // for Byte.toHexString() object OpcodeTestCases { private const val THIS_NAME = "com.amazon.ion.bytecode.bin11.OpcodeTestCases" @@ -81,20 +81,20 @@ object OpcodeTestCases { @JvmStatic fun shortTimestampOpcodeCases() = listOf( - "80 35, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x0)} %pos:1%, 2023T", - "81 35 05, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x1)} %pos:1%, 2023-10T", - "82 35 7D, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x2)} %pos:1%, 2023-10-15T", - "83 35 7D CB 0A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x3)} %pos:1%, 2023-10-15T11:22Z", - "84 35 7D CB 1A 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x4)} %pos:1%, 2023-10-15T11:22:33Z", - "84 35 7D CB 12 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x4)} %pos:1%, 2023-10-15T11:22:33-00:00", - "85 35 7D CB 12 F2 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x5)} %pos:1%, 2023-10-15T11:22:33.444-00:00", - "86 35 7D CB 12 2E 22 1B, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x6)} %pos:1%, 2023-10-15T11:22:33.444555-00:00", - "87 35 7D CB 12 4A 86 FD 69, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x7)} %pos:1%, 2023-10-15T11:22:33.444555666-00:00", - "88 35 7D CB EA 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8)} %pos:1%, 2023-10-15T11:22+01:15", - "89 35 7D CB EA 85, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x9)} %pos:1%, 2023-10-15T11:22:33+01:15", - "8A 35 7D CB EA 85 BC 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xA)} %pos:1%, 2023-10-15T11:22:33.444+01:15", - "8B 35 7D CB EA 85 8B C8 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xB)} %pos:1%, 2023-10-15T11:22:33.444555+01:15", - "8C 35 7D CB EA 85 92 61 7F 1A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0xC)} %pos:1%, 2023-10-15T11:22:33.444555666+01:15", + "80 35, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x80)} %pos:1%, 2023T", + "81 35 05, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x81)} %pos:1%, 2023-10T", + "82 35 7D, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x82)} %pos:1%, 2023-10-15T", + "83 35 7D CB 0A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x83)} %pos:1%, 2023-10-15T11:22Z", + "84 35 7D CB 1A 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x84)} %pos:1%, 2023-10-15T11:22:33Z", + "84 35 7D CB 12 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x84)} %pos:1%, 2023-10-15T11:22:33-00:00", + "85 35 7D CB 12 F2 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x85)} %pos:1%, 2023-10-15T11:22:33.444-00:00", + "86 35 7D CB 12 2E 22 1B, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x86)} %pos:1%, 2023-10-15T11:22:33.444555-00:00", + "87 35 7D CB 12 4A 86 FD 69, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x87)} %pos:1%, 2023-10-15T11:22:33.444555666-00:00", + "88 35 7D CB EA 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x88)} %pos:1%, 2023-10-15T11:22+01:15", + "89 35 7D CB EA 85, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x89)} %pos:1%, 2023-10-15T11:22:33+01:15", + "8A 35 7D CB EA 85 BC 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8A)} %pos:1%, 2023-10-15T11:22:33.444+01:15", + "8B 35 7D CB EA 85 8B C8 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8B)} %pos:1%, 2023-10-15T11:22:33.444555+01:15", + "8C 35 7D CB EA 85 92 61 7F 1A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8C)} %pos:1%, 2023-10-15T11:22:33.444555666+01:15", // TODO: add tests for max/min values, other extremes ).toArguments() @@ -271,7 +271,7 @@ object OpcodeTestCases { val expectedBytecodeString = "${instruction.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" val inputBytesSB = StringBuilder(payloadLength * 3 + flexUIntStr.length + 4) - inputBytesSB.append("${opcode.toSingleHexByte()} $flexUIntStr ") + inputBytesSB.append("${opcode.toByte().toHexString()} $flexUIntStr ") // Create a dummy payload for this value with all bytes set to zeros. // Not actually looked at by this test, but simulates an encoded value the handler would actually // encounter during parsing. @@ -337,7 +337,7 @@ object OpcodeTestCases { val expectedPayloadStartPosition = flexUIntStr.hexStringToByteArray().size + 1 val expectedBytecodeString = "${Instructions.I_BLOB_REF.packInstructionData(lobSize)} %pos:$expectedPayloadStartPosition%" - val inputBytes = "${OpCode.VARIABLE_LENGTH_BLOB.toSingleHexByte()} $flexUIntStr $it" + val inputBytes = "${OpCode.VARIABLE_LENGTH_BLOB.toByte().toHexString()} $flexUIntStr $it" arguments.add(Arguments.of(inputBytes, expectedBytecodeString, it)) } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt index e8717a275..50126b143 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.TextToBinaryUtils.toSingleHexByte import com.amazon.ion.bytecode.bin11.OpCode import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile @@ -19,6 +18,7 @@ internal class ReferenceOpcodeHandlerTest { * Test that variable-length payload opcodes generate the correct *_REF bytecode. * Does not validate the actual payload in any way. */ + @OptIn(ExperimentalStdlibApi::class) // for Byte.toHexString() @ParameterizedTest @MethodSource(REFERENCE_OPCODE_CASES) fun `handlers for OP_X_REF opcodes emit correct bytecode`(input: String, bytecode: String) { diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt similarity index 93% rename from src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt rename to src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt index 41346bd05..ddc339e23 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt @@ -9,7 +9,7 @@ import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource -class ShortTimestampDecoderTest { +class TimestampDecoderTest { @ParameterizedTest @CsvSource( @@ -31,7 +31,7 @@ class ShortTimestampDecoderTest { fun `short timestamps are decoded correctly`(input: String, expectedValue: String) { val data = input.hexStringToByteArray() val opcode = data[0].unsignedToInt() - val timestamp = ShortTimestampDecoder.readTimestamp(data, 1, opcode and 0xF) + val timestamp = TimestampDecoder.readShortTimestamp(data, 1, opcode) val expectedTimestamp = Timestamp.valueOf(expectedValue.trim()) assertEquals(expectedTimestamp, timestamp) } From b95ea51dde8c45300f62895c6dded70b8666bf13 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 29 Oct 2025 16:04:07 -0700 Subject: [PATCH 13/22] PR suggestion: Revert refactoring of opcode handler test cases --- .../bin11/bytearray/TimestampDecoder.kt | 12 +- .../java/com/amazon/ion/TextToBinaryUtils.kt | 42 -- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 131 ++--- .../ion/bytecode/bin11/OpcodeTestCases.kt | 453 ------------------ .../bytearray/BooleanOpcodeHandlerTest.kt | 67 ++- .../bin11/bytearray/FloatOpcodeHandlerTest.kt | 226 ++++++++- .../bin11/bytearray/IntOpcodeHandlerTest.kt | 207 ++++++-- .../bin11/bytearray/NullOpcodeHandlerTest.kt | 37 +- .../bin11/bytearray/OpcodeHandlerTestUtil.kt | 59 --- .../bytearray/ReferenceOpcodeHandlerTest.kt | 147 +++++- .../ShortTimestampOpcodeHandlerTest.kt | 58 ++- .../bytearray/TypedNullOpcodeHandlerTest.kt | 47 +- 12 files changed, 740 insertions(+), 746 deletions(-) delete mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt delete mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt index 62f2ab848..0a541df1f 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt @@ -8,7 +8,17 @@ import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt16 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong -import com.amazon.ion.impl.bin.Ion_1_1_Constants.* +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_O_TIMESTAMP_FRACTION_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_O_TIMESTAMP_OFFSET_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_O_TIMESTAMP_SECOND_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_TIMESTAMP_DAY_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_TIMESTAMP_HOUR_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_TIMESTAMP_MINUTE_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_TIMESTAMP_MONTH_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_U_TIMESTAMP_FRACTION_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_U_TIMESTAMP_SECOND_BIT_OFFSET +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_U_TIMESTAMP_UTC_FLAG +import com.amazon.ion.impl.bin.Ion_1_1_Constants.S_U_TIMESTAMP_UTC_FLAG_L import java.math.BigDecimal /** diff --git a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt index b530bc420..fcd4dc9a6 100644 --- a/src/test/java/com/amazon/ion/TextToBinaryUtils.kt +++ b/src/test/java/com/amazon/ion/TextToBinaryUtils.kt @@ -24,23 +24,6 @@ object TextToBinaryUtils { return bytesAsBytes } - /** - * Converts a string of octets in the given radix to an int array. Octets must be separated by a space. - * @param octetString the string of space-separated octets. - * @param radix the radix of the octets in the string. - * @return a new int array. - */ - @JvmStatic - private fun octetStringToIntArray(octetString: String, radix: Int): IntArray { - if (octetString.isEmpty()) return IntArray(0) - val intsAsStrings = octetString.split(" +".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() - val intsAsInts = IntArray(intsAsStrings.size) - for (i in intsAsInts.indices) { - intsAsInts[i] = intsAsStrings[i].toInt(radix) - } - return intsAsInts - } - /** * Converts a string of binary octets, such as "10010111 00010011", to a byte array. */ @@ -81,29 +64,4 @@ object TextToBinaryUtils { fun ByteArray.byteArrayToBitString(): String { return this.joinToString(" ") { it.toUByte().toString(2).padStart(8, '0') } } - - /** - * Converts a byte array to a string of hex bytes, such as "A5 0F EC 52". - * The purpose of this method is to make it easier to read and write test assertions. - */ - @OptIn(ExperimentalStdlibApi::class) - @JvmStatic - fun ByteArray.byteArrayToHexString(): String { - return this.toHexString( - HexFormat { - upperCase = true - bytes { - byteSeparator = " " - } - } - ) - } - - /** - * Converts a string of decimal integers, such as "105 -9349549 0 -12 99999", to an int array. - */ - @JvmStatic - fun String.decimalStringToIntArray(): IntArray { - return octetStringToIntArray(this, 10) - } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index a4ce25a07..942a86d82 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -2,60 +2,37 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11 -import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp -import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT16_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT32_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT64_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT0_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT24_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT64_EMITTING_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT8_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.LOB_REFERENCE_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.NULL_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.STRING_REFERENCE_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates -import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder -import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.impl.bin.PrimitiveEncoder import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.ValueSource +import java.nio.charset.StandardCharsets -class ByteArrayBytecodeGenerator11Test { +internal object ByteArrayBytecodeGenerator11Test { @ParameterizedTest - @MethodSource( - BOOLEAN_OPCODE_CASES, NULL_OPCODE_CASES, TYPED_NULL_OPCODE_CASES, FLOAT0_OPCODE_CASES, - FLOAT16_OPCODE_CASES, FLOAT32_OPCODE_CASES, FLOAT64_OPCODE_CASES, SHORT_TIMESTAMP_OPCODE_CASES, - REFERENCE_OPCODE_CASES, INT0_OPCODE_CASES, INT8_OPCODE_CASES, INT16_OPCODE_CASES, INT24_OPCODE_CASES, - INT32_OPCODE_CASES, INT64_EMITTING_OPCODE_CASES, STRING_REFERENCE_OPCODE_CASES, LOB_REFERENCE_OPCODE_CASES + @CsvSource( + "80 35, 2023T", + "81 35 05, 2023-10T", + "82 35 7D, 2023-10-15T", + "83 35 7D CB 0A, 2023-10-15T11:22Z", + "84 35 7D CB 1A 02, 2023-10-15T11:22:33Z", + "84 35 7D CB 12 02, 2023-10-15T11:22:33-00:00", + "85 35 7D CB 12 F2 06, 2023-10-15T11:22:33.444-00:00", + "86 35 7D CB 12 2E 22 1B, 2023-10-15T11:22:33.444555-00:00", + "87 35 7D CB 12 4A 86 FD 69, 2023-10-15T11:22:33.444555666-00:00", + "88 35 7D CB EA 01, 2023-10-15T11:22+01:15", + "89 35 7D CB EA 85, 2023-10-15T11:22:33+01:15", + "8A 35 7D CB EA 85 BC 01, 2023-10-15T11:22:33.444+01:15", + "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", + "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", ) - fun `generator produces correct bytecode for all supported opcodes`(inputBytesString: String, expectedBytecodeString: String) { - val inputData = inputBytesString.hexStringToByteArray() - val generator = ByteArrayBytecodeGenerator11(inputData, 0) - - generator.shouldGenerate( - intArrayOf( - *replacePositionTemplates(expectedBytecodeString, 0).decimalStringToIntArray(), - Instructions.I_END_OF_INPUT - ) - ) - } - - @ParameterizedTest - @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) - fun `generator can read short timestamp references`(encodedTimestampBytes: String, expectedBytecodeString: String, expectedTimestampString: String) { - val bytes = encodedTimestampBytes.hexStringToByteArray() + fun `generator can read short timestamp references`(inputBytesString: String, expectedTimestampString: String) { + val bytes = inputBytesString.hexStringToByteArray() val generator = ByteArrayBytecodeGenerator11(bytes, 0) val opcode = bytes[0].toInt().and(0xFF) val expectedTimestamp = Timestamp.valueOf(expectedTimestampString) @@ -64,26 +41,62 @@ class ByteArrayBytecodeGenerator11Test { } @ParameterizedTest - @MethodSource(STRING_REFERENCE_OPCODE_CASES) - fun `generator can read string references`(encodedStringBytes: String, expectedBytecodeString: String, expectedString: String) { - val bytes = encodedStringBytes.hexStringToByteArray() + @ValueSource( + strings = [ + "Hello world", + "\n\nhello\n\n", + "Love it! \uD83D\uDE0D❤\uFE0F\uD83D\uDC95\uD83D\uDE3B\uD83D\uDC96", + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`~!@#\$%^&*()-_=+[{]}\\|;:'\",<.>/?", + "Ἀνέβην δέ με σῖτος εὐρυβίοιο Ἰλιάδης τε καὶ Ὀδυσσείας καὶ Φοινικίων", + "", + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f", + " \tleading and trailing whitespace\u000c\r\n" + ] + ) + fun `generator can read string references`(expectedString: String) { + val utf8Buffer = StandardCharsets.UTF_8.encode(expectedString) + val utf8Bytes = ByteArray(utf8Buffer.remaining()) + utf8Buffer.get(utf8Bytes) + val flexUIntBytes = generateFlexUIntBytes(utf8Bytes.size) + val bytes = byteArrayOf(0xF8.toByte(), *flexUIntBytes, *utf8Bytes) + val generator = ByteArrayBytecodeGenerator11(bytes, 0) // Size of input minus the opcode and FlexUInt length prefix - val length = bytes.size - PrimitiveDecoder.lengthOfFlexIntOrUIntAt(bytes, 1) - 1 - val position = bytes.size - length - val readString = generator.readTextReference(position, length) + val position = flexUIntBytes.size + 1 + val readString = generator.readTextReference(position, utf8Bytes.size) assertEquals(expectedString, readString) } @ParameterizedTest - @MethodSource(LOB_REFERENCE_OPCODE_CASES) - fun `generator can read lob references`(encodedLobBytes: String, expectedBytecodeString: String, expectedLobBytes: String) { - val bytes = encodedLobBytes.hexStringToByteArray() + @ValueSource( + strings = [ + "00 00 00 00 00 00 00 00 00 00", + "FF FF FF FF FF FF FF FF FF FF", + "00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF", + "A5", + "" + ] + ) + fun `generator can read lob references`(expectedLobBytes: String) { + val lobBytes = expectedLobBytes.hexStringToByteArray() + val flexUIntBytes = generateFlexUIntBytes(lobBytes.size) + val bytes = byteArrayOf(0xFE.toByte(), *flexUIntBytes, *lobBytes) + val generator = ByteArrayBytecodeGenerator11(bytes, 0) - val length = bytes.size - PrimitiveDecoder.lengthOfFlexIntOrUIntAt(bytes, 1) - 1 - val position = bytes.size - length - val expectedLob = expectedLobBytes.hexStringToByteArray() - val readLob = generator.readBytesReference(position, length).newByteArray() - assertArrayEquals(expectedLob, readLob) + val position = flexUIntBytes.size + 1 + val readLob = generator.readBytesReference(position, lobBytes.size).newByteArray() + assertArrayEquals(lobBytes, readLob) + } + + /** + * Helper function for generating FlexUInt bytes from an unsigned integer. Useful for test + * cases that programmatically generate length-prefixed payloads. + */ + private fun generateFlexUIntBytes(value: Int): ByteArray { + val asLong = value.toLong() + val length = PrimitiveEncoder.flexUIntLength(asLong) + val bytes = ByteArray(length) + PrimitiveEncoder.writeFlexIntOrUIntInto(bytes, 0, asLong, length) + return bytes } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt b/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt deleted file mode 100644 index 56c5429cd..000000000 --- a/src/test/java/com/amazon/ion/bytecode/bin11/OpcodeTestCases.kt +++ /dev/null @@ -1,453 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -package com.amazon.ion.bytecode.bin11 - -import com.amazon.ion.TextToBinaryUtils.byteArrayToHexString -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.ir.Instructions -import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.impl.bin.PrimitiveEncoder -import org.junit.jupiter.params.provider.Arguments -import java.nio.charset.StandardCharsets - -/** - * Test cases for every binary 1.1 opcode supported by the bytecode generator. Test cases have the following components: - * - Hex string of input bytes to test - * - Decimal string of expected bytecode after compiling the input bytes - * - String representation of the value encoded by these bytes. This is opcode-specific and up to individual opcode - * handlers to parse and understand. Not every test case supplies this. - * - * Bytecode can contain placeholders in the form `%pos:%`, which should be replaced with `` plus the - * index of the first byte of the binary in the input. For example, if a bytecode string contains `%pos:30%` and the - * test suite is writing the binary at index 0 of a byte array passed to a - * [ByteArrayBytecodeGenerator11], then the placeholder should be replaced with `30`, and if the binary were written at - * index 5, the placeholder should be replaced with `35`. This allows tests cases where the resulting bytecode is - * sensitive to the opcode's position in the input (e.g. `OP_*_REF` codes) to be reused across test cases that use them - * at different offsets. Pass the decimal string to [replacePositionTemplates] to parse these placeholders. - */ -@OptIn(ExperimentalStdlibApi::class) // for Byte.toHexString() -object OpcodeTestCases { - - private const val THIS_NAME = "com.amazon.ion.bytecode.bin11.OpcodeTestCases" - - /** - * Parse any placeholders in the form `%pos:%` in [string] to `` plus [position]. Reveals the - * correct bytecode for opcodes that are sensitive to their position in the input. - * - * [position] should be the index in a BytecodeGenerator's input at which you are writing the corresponding - * binary-encoded value. - */ - @JvmStatic - fun replacePositionTemplates(string: String, position: Int): String { - return Regex("%pos:(\\d+)%").replace(string) { matchResult -> - (matchResult.groups[1]?.value!!.toInt() + position).toString() - } - } - - const val BOOLEAN_OPCODE_CASES = "$THIS_NAME#booleanOpcodeCases" - - @JvmStatic - fun booleanOpcodeCases() = listOf( - "6E, ${Instructions.I_BOOL.packInstructionData(1)}, true", - "6F, ${Instructions.I_BOOL.packInstructionData(0)}, false", - ).toArguments() - - const val NULL_OPCODE_CASES = "$THIS_NAME#nullOpcodeCases" - - @JvmStatic - fun nullOpcodeCases() = listOf( - "8E, ${Instructions.I_NULL_NULL}", - ).toArguments() - - const val TYPED_NULL_OPCODE_CASES = "$THIS_NAME#typedNullOpcodeCases" - - @JvmStatic - fun typedNullOpcodeCases() = listOf( - "8F 01, ${Instructions.I_NULL_BOOL}", - "8F 02, ${Instructions.I_NULL_INT}", - "8F 03, ${Instructions.I_NULL_FLOAT}", - "8F 04, ${Instructions.I_NULL_DECIMAL}", - "8F 05, ${Instructions.I_NULL_TIMESTAMP}", - "8F 06, ${Instructions.I_NULL_STRING}", - "8F 07, ${Instructions.I_NULL_SYMBOL}", - "8F 08, ${Instructions.I_NULL_BLOB}", - "8F 09, ${Instructions.I_NULL_CLOB}", - "8F 0a, ${Instructions.I_NULL_LIST}", - "8F 0b, ${Instructions.I_NULL_SEXP}", - "8F 0c, ${Instructions.I_NULL_STRUCT}", - ).toArguments() - - const val SHORT_TIMESTAMP_OPCODE_CASES = "$THIS_NAME#shortTimestampOpcodeCases" - - @JvmStatic - fun shortTimestampOpcodeCases() = listOf( - "80 35, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x80)} %pos:1%, 2023T", - "81 35 05, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x81)} %pos:1%, 2023-10T", - "82 35 7D, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x82)} %pos:1%, 2023-10-15T", - "83 35 7D CB 0A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x83)} %pos:1%, 2023-10-15T11:22Z", - "84 35 7D CB 1A 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x84)} %pos:1%, 2023-10-15T11:22:33Z", - "84 35 7D CB 12 02, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x84)} %pos:1%, 2023-10-15T11:22:33-00:00", - "85 35 7D CB 12 F2 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x85)} %pos:1%, 2023-10-15T11:22:33.444-00:00", - "86 35 7D CB 12 2E 22 1B, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x86)} %pos:1%, 2023-10-15T11:22:33.444555-00:00", - "87 35 7D CB 12 4A 86 FD 69, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x87)} %pos:1%, 2023-10-15T11:22:33.444555666-00:00", - "88 35 7D CB EA 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x88)} %pos:1%, 2023-10-15T11:22+01:15", - "89 35 7D CB EA 85, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x89)} %pos:1%, 2023-10-15T11:22:33+01:15", - "8A 35 7D CB EA 85 BC 01, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8A)} %pos:1%, 2023-10-15T11:22:33.444+01:15", - "8B 35 7D CB EA 85 8B C8 06, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8B)} %pos:1%, 2023-10-15T11:22:33.444555+01:15", - "8C 35 7D CB EA 85 92 61 7F 1A, ${Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x8C)} %pos:1%, 2023-10-15T11:22:33.444555666+01:15", - // TODO: add tests for max/min values, other extremes - ).toArguments() - - const val FLOAT0_OPCODE_CASES = "$THIS_NAME#float0OpcodeCases" - - @JvmStatic - fun float0OpcodeCases() = listOf( - "6A, ${Instructions.I_FLOAT_F32} 0, 0", - ).toArguments() - - const val FLOAT16_OPCODE_CASES = "$THIS_NAME#float16OpcodeCases" - - @JvmStatic - fun float16OpcodeCases() = listOf( - "6B 01 00, ${Instructions.I_FLOAT_F32} 864026624, 0.000000059604645", // smallest positive subnormal number - "6B FF 03, ${Instructions.I_FLOAT_F32} 947896320, 0.000060975552", // largest subnormal number - "6B 00 04, ${Instructions.I_FLOAT_F32} 947912704, 0.00006103515625", // smallest positive normal number - "6B FF 7B, ${Instructions.I_FLOAT_F32} 1199562752, 65504", // largest normal number - "6B FF 3B, ${Instructions.I_FLOAT_F32} 1065345024, 0.99951172", // largest number less than one - "6B 00 3C, ${Instructions.I_FLOAT_F32} 1065353216, 1", - "6B 01 3C, ${Instructions.I_FLOAT_F32} 1065361408, 1.00097656", // smallest number larger than one - - // Same as above, but negative - "6B 01 80, ${Instructions.I_FLOAT_F32} -1283457024, -0.000000059604645", - "6B FF 83, ${Instructions.I_FLOAT_F32} -1199587328, -0.000060975552", - "6B 00 84, ${Instructions.I_FLOAT_F32} -1199570944, -0.00006103515625", - "6B FF FB, ${Instructions.I_FLOAT_F32} -947920896, -65504", - "6B FF BB, ${Instructions.I_FLOAT_F32} -1082138624, -0.99951172", - "6B 00 BC, ${Instructions.I_FLOAT_F32} -1082130432, -1", - "6B 01 BC, ${Instructions.I_FLOAT_F32} -1082122240, -1.00097656", - - "6B 00 00, ${Instructions.I_FLOAT_F32} 0, 0", - "6B 00 80, ${Instructions.I_FLOAT_F32} -2147483648, -0", - "6B 00 7C, ${Instructions.I_FLOAT_F32} 2139095040, Infinity", - "6B 00 FC, ${Instructions.I_FLOAT_F32} -8388608, -Infinity", - "6B 01 7E, ${Instructions.I_FLOAT_F32} 2143297536, NaN", // quiet NaN - "6B 01 7C, ${Instructions.I_FLOAT_F32} 2139103232, NaN", // signaling NaN - "6B 01 FE, ${Instructions.I_FLOAT_F32} -4186112, NaN", // negative quiet NaN - "6B 01 FC, ${Instructions.I_FLOAT_F32} -8380416, NaN", // negative signaling NaN - "6B 53 7F, ${Instructions.I_FLOAT_F32} 2146066432, NaN", // another quiet NaN - "6B 53 FF, ${Instructions.I_FLOAT_F32} -1417216, NaN", // another negative quiet NaN - - "6B 00 C0, ${Instructions.I_FLOAT_F32} -1073741824, -2", - "6B 55 35, ${Instructions.I_FLOAT_F32} 1051369472, 0.33325195", - "6B 48 42, ${Instructions.I_FLOAT_F32} 1078525952, 3.140625" - ).toArguments() - - const val FLOAT32_OPCODE_CASES = "$THIS_NAME#float32OpcodeCases" - - @JvmStatic - fun float32OpcodeCases() = listOf( - "6C 01 00 00 00, ${Instructions.I_FLOAT_F32} 1, 1.4012984643e-45", // smallest positive subnormal number - "6C FF FF 7F 00, ${Instructions.I_FLOAT_F32} 8388607, 1.1754942107e-38", // largest subnormal number - "6C 00 00 80 00, ${Instructions.I_FLOAT_F32} 8388608, 1.1754943508e-38", // smallest positive normal number - "6C FF FF 7F 7F, ${Instructions.I_FLOAT_F32} 2139095039, 3.4028234664e38", // largest normal number - "6C FF FF 7F 3F, ${Instructions.I_FLOAT_F32} 1065353215, 0.999999940395355225", // largest number less than one - "6C 00 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353216, 1", - "6C 01 00 80 3F, ${Instructions.I_FLOAT_F32} 1065353217, 1.00000011920928955", // smallest number larger than one - - // Same as above, but negative - "6C 01 00 00 80, ${Instructions.I_FLOAT_F32} -2147483647, -1.4012984643e-45", - "6C FF FF 7F 80, ${Instructions.I_FLOAT_F32} -2139095041, -1.1754942107e-38", - "6C 00 00 80 80, ${Instructions.I_FLOAT_F32} -2139095040, -1.1754943508e-38", - "6C FF FF 7F FF, ${Instructions.I_FLOAT_F32} -8388609, -3.4028234664e38", - "6C FF FF 7F BF, ${Instructions.I_FLOAT_F32} -1082130433, -0.999999940395355225", - "6C 00 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130432, -1", - "6C 01 00 80 BF, ${Instructions.I_FLOAT_F32} -1082130431, -1.00000011920928955", - - "6C 00 00 00 00, ${Instructions.I_FLOAT_F32} 0, 0", - "6C 00 00 00 80, ${Instructions.I_FLOAT_F32} -2147483648, -0", - "6C 00 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095040, Infinity", - "6C 00 00 80 FF, ${Instructions.I_FLOAT_F32} -8388608, -Infinity", - "6C 01 00 C0 7F, ${Instructions.I_FLOAT_F32} 2143289345, NaN", // quiet NaN - "6C 01 00 80 7F, ${Instructions.I_FLOAT_F32} 2139095041, NaN", // signaling NaN - "6C 01 00 C0 FF, ${Instructions.I_FLOAT_F32} -4194303, NaN", // negative quiet NaN - "6C 01 00 80 FF, ${Instructions.I_FLOAT_F32} -8388607, NaN", // negative signaling NaN - - "6C 00 00 00 C0, ${Instructions.I_FLOAT_F32} -1073741824, -2", - "6C AB AA AA 3E, ${Instructions.I_FLOAT_F32} 1051372203, 0.333333343267440796", - "6C DB 0F 49 40, ${Instructions.I_FLOAT_F32} 1078530011, 3.14159274101257324" - ).toArguments() - - const val FLOAT64_OPCODE_CASES = "$THIS_NAME#float64OpcodeCases" - - @JvmStatic - fun float64OpcodeCases() = listOf( - "6D 01 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 1, 4.9406564584124654e-324", // smallest positive subnormal number - "6D FF FF FF FF FF FF 0F 00, ${Instructions.I_FLOAT_F64} 1048575 -1, 2.2250738585072009e-308", // largest subnormal number - "6D 00 00 00 00 00 00 10 00, ${Instructions.I_FLOAT_F64} 1048576 0, 2.2250738585072014e-308", // smallest positive normal number - "6D FF FF FF FF FF FF EF 7F, ${Instructions.I_FLOAT_F64} 2146435071 -1, 1.7976931348623157e308", // largest normal number - "6D FF FF FF FF FF FF EF 3F, ${Instructions.I_FLOAT_F64} 1072693247 -1, 0.99999999999999988898", // largest number less than one - "6D 00 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 0, 1", - "6D 01 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 1, 1.0000000000000002220", // smallest number larger than one - "6D 02 00 00 00 00 00 F0 3F, ${Instructions.I_FLOAT_F64} 1072693248 2, 1.0000000000000004441", // the second smallest number greater than 1 - - // Same as above, but negative - "6D 01 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 1, -4.9406564584124654e-324", - "6D FF FF FF FF FF FF 0F 80, ${Instructions.I_FLOAT_F64} -2146435073 -1, -2.2250738585072009e-308", - "6D 00 00 00 00 00 00 10 80, ${Instructions.I_FLOAT_F64} -2146435072 0, -2.2250738585072014e-308", - "6D FF FF FF FF FF FF EF FF, ${Instructions.I_FLOAT_F64} -1048577 -1, -1.7976931348623157e308", - "6D FF FF FF FF FF FF EF BF, ${Instructions.I_FLOAT_F64} -1074790401 -1, -0.99999999999999988898", - "6D 00 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 0, -1", - "6D 01 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 1, -1.0000000000000002220", - "6D 02 00 00 00 00 00 F0 BF, ${Instructions.I_FLOAT_F64} -1074790400 2, -1.0000000000000004441", - - "6D 00 00 00 00 00 00 00 00, ${Instructions.I_FLOAT_F64} 0 0, 0", - "6D 00 00 00 00 00 00 00 80, ${Instructions.I_FLOAT_F64} -2147483648 0, -0", - "6D 00 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 0, Infinity", - "6D 00 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 0, -Infinity", - "6D 01 00 00 00 00 00 F8 7F, ${Instructions.I_FLOAT_F64} 2146959360 1, NaN", // quiet NaN - "6D 01 00 00 00 00 00 F0 7F, ${Instructions.I_FLOAT_F64} 2146435072 1, NaN", // signaling NaN - "6D 01 00 00 00 00 00 F8 FF, ${Instructions.I_FLOAT_F64} -524288 1, NaN", // negative quiet NaN - "6D 01 00 00 00 00 00 F0 FF, ${Instructions.I_FLOAT_F64} -1048576 1, NaN", // negative signaling NaN - "6D FF FF FF FF FF FF FF 7F, ${Instructions.I_FLOAT_F64} 2147483647 -1, NaN", // another quiet NaN - "6D FF FF FF FF FF FF FF FF, ${Instructions.I_FLOAT_F64} -1 -1, NaN", // another negative quiet NaN - - "6D 00 00 00 00 00 00 00 C0, ${Instructions.I_FLOAT_F64} -1073741824 0, -2", - "6D 55 55 55 55 55 55 D5 3F, ${Instructions.I_FLOAT_F64} 1070945621 1431655765, 0.33333333333333331483", - "6D 18 2D 44 54 FB 21 09 40, ${Instructions.I_FLOAT_F64} 1074340347 1413754136, 3.141592653589793116" - ).toArguments() - - const val REFERENCE_OPCODE_CASES = "$THIS_NAME#referenceOpcodeCases" - - /** - * Generates tests for handlers that emit similar *_REF bytecode (instructions packed with a UInt22 reference length - * and followed by a UInt32 position of the data). - */ - @JvmStatic - fun referenceOpcodeCases(): List { - val arguments = mutableListOf() - - val instructions = arrayOf( - Pair(Instructions.I_ANNOTATION_REF, 0x59), - Pair(Instructions.I_INT_REF, 0xF5), - Pair(Instructions.I_DECIMAL_REF, 0xF6), - Pair(Instructions.I_TIMESTAMP_REF, 0xF7), - Pair(Instructions.I_STRING_REF, 0xF8), - Pair(Instructions.I_SYMBOL_REF, 0xF9), - Pair(Instructions.I_BLOB_REF, 0xFE), - Pair(Instructions.I_CLOB_REF, 0xFF), - ) - - val testTemplates = listOf( - /* - FlexUInt length prefix for referenced payload - | Decimal payload length - | | Expected payload start position - | | | - | | | */ - "03, 1, 2", - "05, 2, 2", - "07, 3, 2", - "09, 4, 2", - "0B, 5, 2", - "1D, 14, 2", - "7F, 63, 2", - "81, 64, 2", - "FF, 127, 2", - "02 02, 128, 3", - "FE FF, 16383, 3", - "04 00 02, 16384, 4", - "FC FF FF, 2097151, 4", - "08 00 00 02, 2097152, 5", - // Testing up to max length causes OOM errors on java 8 - "01, 0, 2", // zero-length payload TODO: is this legal? - "00 18 00 00 00 00 00 00 00 00 00 00, 1, 13", // overlong encoding on the FlexUInt - ) - - instructions.forEach { (instruction, opcode) -> - testTemplates.forEach { - val (flexUIntStr, payloadLengthStr, expectedPayloadStartPosStr) = it.split(',') - val payloadLength = payloadLengthStr.trim().toInt() - val expectedPayloadStartPosition = expectedPayloadStartPosStr.trim().toInt() - val expectedBytecodeString = "${instruction.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" - - val inputBytesSB = StringBuilder(payloadLength * 3 + flexUIntStr.length + 4) - inputBytesSB.append("${opcode.toByte().toHexString()} $flexUIntStr ") - // Create a dummy payload for this value with all bytes set to zeros. - // Not actually looked at by this test, but simulates an encoded value the handler would actually - // encounter during parsing. - for (i in 0 until payloadLength) { - inputBytesSB.append("00 ") - } - val inputBytes = inputBytesSB.toString() - arguments.add(Arguments.of(inputBytes, expectedBytecodeString)) - } - } - - return arguments - } - - const val STRING_REFERENCE_OPCODE_CASES = "$THIS_NAME#stringReferenceOpcodeCases" - - @JvmStatic - fun stringReferenceOpcodeCases(): List { - val arguments = mutableListOf() - val testStrings = listOf( - "Hello world", - "\n\nhello\n\n", - "Love it! \uD83D\uDE0D❤\uFE0F\uD83D\uDC95\uD83D\uDE3B\uD83D\uDC96", - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`~!@#\$%^&*()-_=+[{]}\\|;:'\",<.>/?", - "Ἀνέβην δέ με σῖτος εὐρυβίοιο Ἰλιάδης τε καὶ Ὀδυσσείας καὶ Φοινικίων", - "", - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f", - " \tleading and trailing whitespace\u000c\r\n" - ) - - testStrings.forEach { - val utf8Buffer = StandardCharsets.UTF_8.encode(it) - val utf8Bytes = ByteArray(utf8Buffer.remaining()) - utf8Buffer.get(utf8Bytes) - val flexUIntStr = generateFlexUIntHexString(utf8Bytes.size) - val payloadLength = utf8Bytes.size - val expectedPayloadStartPosition = flexUIntStr.hexStringToByteArray().size + 1 - val expectedBytecodeString = "${Instructions.I_STRING_REF.packInstructionData(payloadLength)} %pos:$expectedPayloadStartPosition%" - - val inputBytes = "F8 $flexUIntStr ${utf8Bytes.byteArrayToHexString()}" - arguments.add(Arguments.of(inputBytes, expectedBytecodeString, it)) - } - - return arguments - } - - const val LOB_REFERENCE_OPCODE_CASES = "$THIS_NAME#lobReferenceOpcodeCases" - - @JvmStatic - fun lobReferenceOpcodeCases(): List { - val arguments = mutableListOf() - val testLobBytes = listOf( - "00 00 00 00 00 00 00 00 00 00", - "FF FF FF FF FF FF FF FF FF FF", - "00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF", - "A5", - "" - ) - - testLobBytes.forEach { - val lobSize = it.hexStringToByteArray().size - val flexUIntStr = generateFlexUIntHexString(lobSize) - val expectedPayloadStartPosition = flexUIntStr.hexStringToByteArray().size + 1 - val expectedBytecodeString = "${Instructions.I_BLOB_REF.packInstructionData(lobSize)} %pos:$expectedPayloadStartPosition%" - - val inputBytes = "${OpCode.VARIABLE_LENGTH_BLOB.toByte().toHexString()} $flexUIntStr $it" - arguments.add(Arguments.of(inputBytes, expectedBytecodeString, it)) - } - - return arguments - } - - const val INT0_OPCODE_CASES = "$THIS_NAME#int0OpcodeCases" - - @JvmStatic - fun int0OpcodeCases() = listOf( - "60, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 0-byte - ).toArguments() - - const val INT8_OPCODE_CASES = "$THIS_NAME#int8OpcodeCases" - - @JvmStatic - fun int8OpcodeCases() = listOf( - "61 32, ${Instructions.I_INT_I16.packInstructionData(50)}, 50", // 1-byte positive - "61 97, ${Instructions.I_INT_I16.packInstructionData(-105)}, -105", // 1-byte negative - "61 7F, ${Instructions.I_INT_I16.packInstructionData(127)}, 127", // max value - "61 80, ${Instructions.I_INT_I16.packInstructionData(-128)}, -128", // min value - ).toArguments() - - const val INT16_OPCODE_CASES = "$THIS_NAME#int16OpcodeCases" - - @JvmStatic - fun int16OpcodeCases() = listOf( - "62 26 73, ${Instructions.I_INT_I16.packInstructionData(29478)}, 29478", // 2-byte positive - "62 50 FC, ${Instructions.I_INT_I16.packInstructionData(-944)}, -944", // 2-byte negative - "62 00 00, ${Instructions.I_INT_I16.packInstructionData(0)}, 0", // 2-byte overlong 0 - "62 FF FF, ${Instructions.I_INT_I16.packInstructionData(-1)}, -1", // 2-byte overlong -1 - "62 80 00, ${Instructions.I_INT_I16.packInstructionData(128)}, 128", // min positive - "62 7F FF, ${Instructions.I_INT_I16.packInstructionData(-129)}, -129", // max negative - "62 FF 7F, ${Instructions.I_INT_I16.packInstructionData(32767)}, 32767", // max value - "62 00 80, ${Instructions.I_INT_I16.packInstructionData(-32768)}, -32768", // min value - ).toArguments() - - const val INT24_OPCODE_CASES = "$THIS_NAME#int24OpcodeCases" - - @JvmStatic - fun int24OpcodeCases() = listOf( - "63 40 42 0F, ${Instructions.I_INT_I32} 1000000, 1000000", // 3-byte positive - "63 4F 34 8B, ${Instructions.I_INT_I32} -7654321, -7654321", // 3-byte negative - "63 00 80 00, ${Instructions.I_INT_I32} 32768, 32768", // min positive, length boundary from i16 - "63 FF FF 7F, ${Instructions.I_INT_I32} 8388607, 8388607", // max value - "63 FF 7F FF, ${Instructions.I_INT_I32} -32769, -32769", // max negative, length boundary from i16 - "63 00 00 80, ${Instructions.I_INT_I32} -8388608, -8388608", // min value - ).toArguments() - - const val INT32_OPCODE_CASES = "$THIS_NAME#int32OpcodeCases" - - @JvmStatic - fun int32OpcodeCases() = listOf( - "64 3B C4 42 7E, ${Instructions.I_INT_I32} 2118304827, 2118304827", // 4-byte positive - "64 57 97 13 E9, ${Instructions.I_INT_I32} -384592041, -384592041", // 4-byte negative - "64 00 00 00 00, ${Instructions.I_INT_I32} 0, 0", // 4-byte overlong 0 - "64 FF FF FF FF, ${Instructions.I_INT_I32} -1, -1", // 4-byte overlong -1 - "64 00 00 80 00, ${Instructions.I_INT_I32} 8388608, 8388608", // length boundary - "64 FF FF 7F FF, ${Instructions.I_INT_I32} -8388609, -8388609", // length boundary - "64 FF FF FF 7F, ${Instructions.I_INT_I32} ${Int.MAX_VALUE}, ${Int.MAX_VALUE}", // max value - "64 00 00 00 80, ${Instructions.I_INT_I32} ${Int.MIN_VALUE}, ${Int.MIN_VALUE}", // min value - ).toArguments() - - const val INT64_EMITTING_OPCODE_CASES = "$THIS_NAME#int64EmittingOpcodeCases" - - @JvmStatic - fun int64EmittingOpcodeCases() = listOf( - "65 6A 22 7C AB 5C, ${Instructions.I_INT_I64} 92 -1417928086, 398014030442", // 5-byte positive - "65 96 DD 83 54 A3, ${Instructions.I_INT_I64} -93 1417928086, -398014030442", // 5-byte negative - "66 C4 87 8F 09 97 5D, ${Instructions.I_INT_I64} 23959 160401348, 102903281846212", // 6-byte positive - "66 3C 78 70 F6 68 A2, ${Instructions.I_INT_I64} -23960 -160401348, -102903281846212", // 6-byte negative - "67 62 9A 42 56 83 77 10, ${Instructions.I_INT_I64} 1079171 1447205474, 4635005598997090", // 7-byte positive - "67 9E 65 BD A9 7C 88 EF, ${Instructions.I_INT_I64} -1079172 -1447205474, -4635005598997090", // 7-byte negative - "68 A4 F7 64 69 16 27 BF 31, ${Instructions.I_INT_I64} 834610966 1768224676, 3584626805621192612", // 8-byte positive - "68 5C 08 9B 96 E9 D8 40 CE, ${Instructions.I_INT_I64} -834610967 -1768224676, -3584626805621192612", // 8-byte negative - "68 00 00 00 00 00 00 00 00, ${Instructions.I_INT_I64} 0 0, 0", // 8-byte overlong 0 - "68 FF FF FF FF FF FF FF FF, ${Instructions.I_INT_I64} -1 -1, -1", // 8-byte overlong -1 - - "65 00 00 00 80 00, ${Instructions.I_INT_I64} 0 -2147483648, 2147483648", // min positive, length boundary from i32 - "65 FF FF FF FF 7F, ${Instructions.I_INT_I64} 127 -1, 549755813887", - "66 00 00 00 00 80 00, ${Instructions.I_INT_I64} 128 0, 549755813888", // length boundary - "66 FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 32767 -1, 140737488355327", - "67 00 00 00 00 00 80 00, ${Instructions.I_INT_I64} 32768 0, 140737488355328", // length boundary - "67 FF FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 8388607 -1, 36028797018963967", - "68 00 00 00 00 00 00 80 00, ${Instructions.I_INT_I64} 8388608 0, 36028797018963968", // length boundary - "68 FF FF FF FF FF FF FF 7F, ${Instructions.I_INT_I64} 2147483647 -1, ${Long.MAX_VALUE}", // max value - - "65 FF FF FF 7F FF, ${Instructions.I_INT_I64} -1 2147483647, -2147483649", // max negative, length boundary from i32 - "65 00 00 00 00 80, ${Instructions.I_INT_I64} -128 0, -549755813888", - "66 FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -129 -1, -549755813889", // length boundary - "66 00 00 00 00 00 80, ${Instructions.I_INT_I64} -32768 0, -140737488355328", - "67 FF FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -32769 -1, -140737488355329", // length boundary - "67 00 00 00 00 00 00 80, ${Instructions.I_INT_I64} -8388608 0, -36028797018963968", - "68 FF FF FF FF FF FF 7F FF, ${Instructions.I_INT_I64} -8388609 -1, -36028797018963969", // length boundary - "68 00 00 00 00 00 00 00 80, ${Instructions.I_INT_I64} -2147483648 0, ${Long.MIN_VALUE}", // min value - ).toArguments() - - private fun List.toArguments() = map { - Arguments.of(*it.split(',').map { it.trim() }.toTypedArray()) - } - - /** - * Helper function for generating FlexUInt hex strings from an unsigned integer. Useful for test - * cases that programmatically generate length-prefixed payloads. - */ - private fun generateFlexUIntHexString(value: Int): String { - val asLong = value.toLong() - val length = PrimitiveEncoder.flexUIntLength(asLong) - val bytes = ByteArray(length) - PrimitiveEncoder.writeFlexIntOrUIntInto(bytes, 0, asLong, length) - return bytes.byteArrayToHexString() - } -} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt index 7077c82a3..f3c7aa215 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/BooleanOpcodeHandlerTest.kt @@ -2,26 +2,61 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.BOOLEAN_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.fail -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.api.Test class BooleanOpcodeHandlerTest { - @ParameterizedTest - @MethodSource(BOOLEAN_OPCODE_CASES) - fun `boolean opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = BooleanOpcodeHandler.shouldCompile(input, bytecode) - val expectedBool = expectedValue.toBoolean() - val representedBool = when (Instructions.getData(buffer.get(0))) { - 1 -> true - 0 -> false - else -> fail("Unexpected packed instruction emitted from boolean opcode compiler: ${buffer.get(0)}") - } - assertEquals(expectedBool, representedBool) + @Test + fun `handler emits true bytecode for true opcode`() { + val byteArray: ByteArray = "6E".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = byteArray[position++].unsignedToInt() + position += BooleanOpcodeHandler.convertOpcodeToBytecode( + opcode, + byteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedInstruction = Instructions.I_BOOL.packInstructionData(1) + assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) + assertEquals(1, position) + } + + @Test + fun `handler emits false bytecode for false opcode`() { + val byteArray: ByteArray = "6F".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = byteArray[position++].unsignedToInt() + position += BooleanOpcodeHandler.convertOpcodeToBytecode( + opcode, + byteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedInstruction = Instructions.I_BOOL.packInstructionData(0) + assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) + assertEquals(1, position) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt index 01eac715b..0a1bf5818 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/FloatOpcodeHandlerTest.kt @@ -2,54 +2,228 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT0_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT16_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT32_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.FLOAT64_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.CsvSource import kotlin.String class FloatOpcodeHandlerTest { - @ParameterizedTest - @MethodSource(FLOAT0_OPCODE_CASES) - fun `float0 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Float0OpcodeHandler.shouldCompile(input, bytecode) - val expectedFloat = expectedValue.toFloat() - val representedFloat = Float.fromBits(buffer.get(1)) - assertEquals(expectedFloat, representedFloat) + @Test + fun `float0 opcode handler emits correct bytecode`() { + val inputByteArray: ByteArray = "6A".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += Float0OpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedBytecode = intArrayOf(Instructions.I_FLOAT_F32, 0) + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(1, position) } @ParameterizedTest - @MethodSource(FLOAT16_OPCODE_CASES) - fun `float16 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Float16OpcodeHandler.shouldCompile(input, bytecode) - val expectedFloat = expectedValue.toFloat() + @CsvSource( + "6B 01 00, 0.000000059604645", // smallest positive subnormal number + "6B FF 03, 0.000060975552", // largest subnormal number + "6B 00 04, 0.00006103515625", // smallest positive normal number + "6B FF 7B, 65504", // largest normal number + "6B FF 3B, 0.99951172", // largest number less than one + "6B 00 3C, 1", + "6B 01 3C, 1.00097656", // smallest number larger than one + + // Same as above, but negative + "6B 01 80, -0.000000059604645", + "6B FF 83, -0.000060975552", + "6B 00 84, -0.00006103515625", + "6B FF FB, -65504", + "6B FF BB, -0.99951172", + "6B 00 BC, -1", + "6B 01 BC, -1.00097656", + + "6B 00 00, 0", + "6B 00 80, -0", + "6B 00 7C, Infinity", + "6B 00 FC, -Infinity", + "6B 01 7E, NaN", // quiet NaN + "6B 01 7C, NaN", // signaling NaN + "6B 01 FE, NaN", // negative quiet NaN + "6B 01 FC, NaN", // negative signaling NaN + "6B 53 7F, NaN", // another quiet NaN + "6B 53 FF, NaN", // another negative quiet NaN + + "6B 00 C0, -2", + "6B 55 35, 0.33325195", + "6B 48 42, 3.140625" + ) + fun `float16 opcode handler emits correct bytecode`(inputString: String, expectedFloat: Float) { + val inputByteArray: ByteArray = inputString.hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += Float16OpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + // We cannot use assertEqualBytecode here because Java will parse all the NaNs in the test suite as quiet with + // all data bits 0, but the actual generated bytecode will have NaN floats with the signaling semantics and data + // preserved from the input. + // Instead, test that the buffer is the right length, the instruction is correct, and the stored float has + // equal value. + assertEquals(2, buffer.size()) + assertEquals(Instructions.I_FLOAT_F32, buffer.get(0)) + val representedFloat = Float.fromBits(buffer.get(1)) assertEquals(expectedFloat, representedFloat) + + assertEquals(3, position) } @ParameterizedTest - @MethodSource(FLOAT32_OPCODE_CASES) - fun `float32 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Float32OpcodeHandler.shouldCompile(input, bytecode) - val expectedFloat = expectedValue.toFloat() + @CsvSource( + "6C 01 00 00 00, 1.4012984643e-45", // smallest positive subnormal number + "6C FF FF 7F 00, 1.1754942107e-38", // largest subnormal number + "6C 00 00 80 00, 1.1754943508e-38", // smallest positive normal number + "6C FF FF 7F 7F, 3.4028234664e38", // largest normal number + "6C FF FF 7F 3F, 0.999999940395355225", // largest number less than one + "6C 00 00 80 3F, 1", + "6C 01 00 80 3F, 1.00000011920928955", // smallest number larger than one + + // Same as above, but negative + "6C 01 00 00 80, -1.4012984643e-45", + "6C FF FF 7F 80, -1.1754942107e-38", + "6C 00 00 80 80, -1.1754943508e-38", + "6C FF FF 7F FF, -3.4028234664e38", + "6C FF FF 7F BF, -0.999999940395355225", + "6C 00 00 80 BF, -1", + "6C 01 00 80 BF, -1.00000011920928955", + + "6C 00 00 00 00, 0", + "6C 00 00 00 80, -0", + "6C 00 00 80 7F, Infinity", + "6C 00 00 80 FF, -Infinity", + "6C 01 00 C0 7F, NaN", // quiet NaN + "6C 01 00 80 7F, NaN", // signaling NaN + "6C 01 00 C0 FF, NaN", // negative quiet NaN + "6C 01 00 80 FF, NaN", // negative signaling NaN + + "6C 00 00 00 C0, -2", + "6C AB AA AA 3E, 0.333333343267440796", + "6C DB 0F 49 40, 3.14159274101257324" + ) + fun `float32 opcode handler emits correct bytecode`(inputString: String, expectedFloat: Float) { + val inputByteArray: ByteArray = inputString.hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += Float32OpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEquals(2, buffer.size()) + assertEquals(Instructions.I_FLOAT_F32, buffer.get(0)) + val representedFloat = Float.fromBits(buffer.get(1)) assertEquals(expectedFloat, representedFloat) + + assertEquals(5, position) } @ParameterizedTest - @MethodSource(FLOAT64_OPCODE_CASES) - fun `float64 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = DoubleOpcodeHandler.shouldCompile(input, bytecode) - val expectedFloat = expectedValue.toDouble() + @CsvSource( + "6D 01 00 00 00 00 00 00 00, 4.9406564584124654e-324", // smallest positive subnormal number + "6D FF FF FF FF FF FF 0F 00, 2.2250738585072009e-308", // largest subnormal number + "6D 00 00 00 00 00 00 10 00, 2.2250738585072014e-308", // smallest positive normal number + "6D FF FF FF FF FF FF EF 7F, 1.7976931348623157e308", // largest normal number + "6D FF FF FF FF FF FF EF 3F, 0.99999999999999988898", // largest number less than one + "6D 00 00 00 00 00 00 F0 3F, 1", + "6D 01 00 00 00 00 00 F0 3F, 1.0000000000000002220", // smallest number larger than one + "6D 02 00 00 00 00 00 F0 3F, 1.0000000000000004441", // the second smallest number greater than 1 + + // Same as above, but negative + "6D 01 00 00 00 00 00 00 80, -4.9406564584124654e-324", + "6D FF FF FF FF FF FF 0F 80, -2.2250738585072009e-308", + "6D 00 00 00 00 00 00 10 80, -2.2250738585072014e-308", + "6D FF FF FF FF FF FF EF FF, -1.7976931348623157e308", + "6D FF FF FF FF FF FF EF BF, -0.99999999999999988898", + "6D 00 00 00 00 00 00 F0 BF, -1", + "6D 01 00 00 00 00 00 F0 BF, -1.0000000000000002220", + "6D 02 00 00 00 00 00 F0 BF, -1.0000000000000004441", + + "6D 00 00 00 00 00 00 00 00, 0", + "6D 00 00 00 00 00 00 00 80, -0", + "6D 00 00 00 00 00 00 F0 7F, Infinity", + "6D 00 00 00 00 00 00 F0 FF, -Infinity", + "6D 01 00 00 00 00 00 F8 7F, NaN", // quiet NaN + "6D 01 00 00 00 00 00 F0 7F, NaN", // signaling NaN + "6D 01 00 00 00 00 00 F8 FF, NaN", // negative quiet NaN + "6D 01 00 00 00 00 00 F0 FF, NaN", // negative signaling NaN + "6D FF FF FF FF FF FF FF 7F, NaN", // another quiet NaN + "6D FF FF FF FF FF FF FF FF, NaN", // another negative quiet NaN + + "6D 00 00 00 00 00 00 00 C0, -2", + "6D 55 55 55 55 55 55 D5 3F, 0.33333333333333331483", + "6D 18 2D 44 54 FB 21 09 40, 3.141592653589793116" + ) + fun `float64 opcode handler emits correct bytecode`(inputString: String, expectedDouble: Double) { + val inputByteArray: ByteArray = inputString.hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += DoubleOpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEquals(3, buffer.size()) + assertEquals(Instructions.I_FLOAT_F64, buffer.get(0)) + val representedFloat = Double.fromBits( buffer.get(1).toLong().shl(32) .or(buffer.get(2).toLong().and(0xFFFF_FFFF)) ) - assertEquals(expectedFloat, representedFloat) + assertEquals(expectedDouble, representedFloat) + + assertEquals(9, position) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt index 60cf57bea..460684985 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt @@ -2,72 +2,179 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT0_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT16_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT24_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT32_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT64_EMITTING_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.INT8_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.CsvSource import kotlin.String class IntOpcodeHandlerTest { + // These tests need to include the IVM in the test bytecode (or any 4 bytes before the FixedInt) + // because the BinaryPrimitiveReader has logic that expects this to always be the case. @ParameterizedTest - @MethodSource(INT0_OPCODE_CASES) - fun `int0 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Int0OpcodeHandler.shouldCompile(input, bytecode) - val expectedShort = expectedValue.toShort() - val representedShort = Instructions.getData(buffer.get(0)).toShort() - assertEquals(expectedShort, representedShort) - } + @CsvSource( + "60, 0, 0", // 0-byte + "61 32, 1, 50", // 1-byte positive + "61 97, 1, -105", // 1-byte negative + "62 26 73, 2, 29478", // 2-byte positive + "62 50 FC, 2, -944", // 2-byte negative + "62 00 00, 2, 0", // 2-byte overlong 0 + "62 FF FF, 2, -1", // 2-byte overlong -1 - @ParameterizedTest - @MethodSource(INT8_OPCODE_CASES) - fun `int8 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Int8OpcodeHandler.shouldCompile(input, bytecode) - val expectedShort = expectedValue.toShort() - val representedShort = Instructions.getData(buffer.get(0)).toShort() - assertEquals(expectedShort, representedShort) - } + "61 7F, 1, 127", + "62 80 00, 2, 128", // length boundary + "62 FF 7F, 2, 32767", // max value - @ParameterizedTest - @MethodSource(INT16_OPCODE_CASES) - fun `int16 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Int16OpcodeHandler.shouldCompile(input, bytecode) - val expectedShort = expectedValue.toShort() - val representedShort = Instructions.getData(buffer.get(0)).toShort() - assertEquals(expectedShort, representedShort) - } + "61 80, 1, -128", + "62 7F FF, 2, -129", // length boundary + "62 00 80, 2, -32768", // min value + ) + fun testI16EmittingIntHandlers( + inputString: String, + expectedBytesRead: Int, + expectedInt16: Short + ) { + val handlersByBytesRead = arrayOf(Int0OpcodeHandler, Int8OpcodeHandler, Int16OpcodeHandler) - @ParameterizedTest - @MethodSource(INT24_OPCODE_CASES) - fun `int24 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Int24OpcodeHandler.shouldCompile(input, bytecode) - val expectedInt = expectedValue.toInt() - val representedInt = buffer.get(1) - assertEquals(expectedInt, representedInt) + val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 4 // skip the IVM + val opcode = inputByteArray[position++].unsignedToInt() + position += handlersByBytesRead[expectedBytesRead].convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedInstruction = Instructions.I_INT_I16.packInstructionData(expectedInt16.toInt()) + assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) + assertEquals(5 + expectedBytesRead, position) + + val representedInteger = Instructions.getData(buffer.get(0)).toShort() + assertEquals(expectedInt16, representedInteger) } @ParameterizedTest - @MethodSource(INT32_OPCODE_CASES) - fun `int32 opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = Int32OpcodeHandler.shouldCompile(input, bytecode) - val expectedInt = expectedValue.toInt() - val representedInt = buffer.get(1) - assertEquals(expectedInt, representedInt) + @CsvSource( + "63 40 42 0F, 3, 1000000", // 3-byte positive + "63 4F 34 8B, 3, -7654321", // 3-byte negative + "64 3B C4 42 7E, 4, 2118304827", // 4-byte positive + "64 57 97 13 E9, 4, -384592041", // 4-byte negative + "64 00 00 00 00, 4, 0", // 4-byte overlong 0 + "64 FF FF FF FF, 4, -1", // 4-byte overlong -1 + + "63 00 80 00, 3, 32768", // min positive, length boundary from i16 + "63 FF FF 7F, 3, 8388607", + "64 00 00 80 00, 4, 8388608", // length boundary + "64 FF FF FF 7F, 4, ${Int.MAX_VALUE}", // max value + + "63 FF 7F FF, 3, -32769", // max negative, length boundary from i16 + "63 00 00 80, 3, -8388608", + "64 FF FF 7F FF, 4, -8388609", // length boundary + "64 00 00 00 80, 4, ${Int.MIN_VALUE}", // min value + ) + fun testI32EmittingIntHandlers( + inputString: String, + expectedBytesRead: Int, + expectedInt32: Int + ) { + val handlersByBytesRead = arrayOf(Int24OpcodeHandler, Int32OpcodeHandler) + + val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 4 // skip the IVM + val opcode = inputByteArray[position++].unsignedToInt() + position += handlersByBytesRead[expectedBytesRead - 3].convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedBytecode = intArrayOf(Instructions.I_INT_I32, expectedInt32) + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(5 + expectedBytesRead, position) } @ParameterizedTest - @MethodSource(INT64_EMITTING_OPCODE_CASES) - fun `long int opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - val buffer = LongIntOpcodeHandler.shouldCompile(input, bytecode) - val expectedLong = expectedValue.toLong() - val representedLong = (buffer.get(1).toLong() shl 32) or (buffer.get(2).toLong() and 0xFFFF_FFFF) - assertEquals(expectedLong, representedLong) + @CsvSource( + "65 6A 22 7C AB 5C, 5, 398014030442", // 5-byte positive + "65 96 DD 83 54 A3, 5, -398014030442", // 5-byte negative + "66 C4 87 8F 09 97 5D, 6, 102903281846212", // 6-byte positive + "66 3C 78 70 F6 68 A2, 6, -102903281846212", // 6-byte negative + "67 62 9A 42 56 83 77 10, 7, 4635005598997090", // 7-byte positive + "67 9E 65 BD A9 7C 88 EF, 7, -4635005598997090", // 7-byte negative + "68 A4 F7 64 69 16 27 BF 31, 8, 3584626805621192612", // 8-byte positive + "68 5C 08 9B 96 E9 D8 40 CE, 8, -3584626805621192612", // 8-byte negative + "68 00 00 00 00 00 00 00 00, 8, 0", // 8-byte overlong 0 + "68 FF FF FF FF FF FF FF FF, 8, -1", // 8-byte overlong -1 + + "65 00 00 00 80 00, 5, 2147483648", // min positive, length boundary from i32 + "65 FF FF FF FF 7F, 5, 549755813887", + "66 00 00 00 00 80 00, 6, 549755813888", // length boundary + "66 FF FF FF FF FF 7F, 6, 140737488355327", + "67 00 00 00 00 00 80 00, 7, 140737488355328", // length boundary + "67 FF FF FF FF FF FF 7F, 7, 36028797018963967", + "68 00 00 00 00 00 00 80 00, 8, 36028797018963968", // length boundary + "68 FF FF FF FF FF FF FF 7F, 8, ${Long.MAX_VALUE}", // max value + + "65 FF FF FF 7F FF, 5, -2147483649", // max negative, length boundary from i32 + "65 00 00 00 00 80, 5, -549755813888", + "66 FF FF FF FF 7F FF, 6, -549755813889", // length boundary + "66 00 00 00 00 00 80, 6, -140737488355328", + "67 FF FF FF FF FF 7F FF, 7, -140737488355329", // length boundary + "67 00 00 00 00 00 00 80, 7, -36028797018963968", + "68 FF FF FF FF FF FF 7F FF, 8, -36028797018963969", // length boundary + "68 00 00 00 00 00 00 00 80, 8, ${Long.MIN_VALUE}", // min value + ) + fun testI64EmittingIntHandler( + inputString: String, + expectedBytesRead: Int, + expectedInt64: Long + ) { + val inputByteArray: ByteArray = "E0 01 01 EA $inputString".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 4 // skip the IVM + val opcode = inputByteArray[position++].unsignedToInt() + position += LongIntOpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedBytecode = intArrayOf( + Instructions.I_INT_I64, + (expectedInt64 ushr 32).toInt(), + expectedInt64.toInt() + ) + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(5 + expectedBytesRead, position) + + val representedInteger = (buffer.get(1).toLong() shl 32) or (buffer.get(2).toLong() and 0xFFFF_FFFF) + assertEquals(expectedInt64, representedInteger) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt index eaa069dca..a1a855f80 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/NullOpcodeHandlerTest.kt @@ -2,16 +2,37 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.NULL_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test class NullOpcodeHandlerTest { - @ParameterizedTest - @MethodSource(NULL_OPCODE_CASES) - fun `null opcode handler emits correct bytecode`(input: String, bytecode: String) { - NullOpcodeHandler.shouldCompile(input, bytecode) + @Test + fun `handler emits null bytecode for null opcode`() { + val byteArray: ByteArray = "8E".hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = byteArray[position++].unsignedToInt() + position += NullOpcodeHandler.convertOpcodeToBytecode( + opcode, + byteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedInstruction = Instructions.I_NULL_NULL + assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) + assertEquals(1, position) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt deleted file mode 100644 index 850618243..000000000 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -package com.amazon.ion.bytecode.bin11.bytearray - -import com.amazon.ion.TextToBinaryUtils.decimalStringToIntArray -import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray -import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.replacePositionTemplates -import com.amazon.ion.bytecode.util.BytecodeBuffer -import com.amazon.ion.bytecode.util.ConstantPool -import com.amazon.ion.bytecode.util.unsignedToInt -import org.junit.jupiter.api.Assertions.assertEquals - -internal object OpcodeHandlerTestUtil { - /** - * Asserts that an opcode handler compiles the given input bytes to the given bytecode and that the position - * returned by the handler points immediately after the last byte in the input. - * - * @return The bytecode buffer containing the bytecode compiled by this handler, for convenience of test cases - * that wish to further validate the compiled bytecode represents a particular value - */ - fun OpcodeToBytecodeHandler.shouldCompile(inputBytes: ByteArray, expectedBytecode: IntArray): BytecodeBuffer { - val buffer = BytecodeBuffer() - - var position = 0 - val opcode = inputBytes[position++].unsignedToInt() - position += this.convertOpcodeToBytecode( - opcode, - inputBytes, - position, - buffer, - ConstantPool(0), - intArrayOf(), - intArrayOf(), - arrayOf() - ) - - assertEqualBytecode(expectedBytecode, buffer.toArray()) - assertEquals(inputBytes.size, position) - - return buffer - } - - /** - * Asserts that an opcode handler compiles the given input bytes to the given bytecode and that the position - * returned by the handler points immediately after the last byte in the input. - * - * Takes a hex string for the input bytes and a decimal string for the expected bytecode. - * - * @return The bytecode buffer containing the bytecode compiled by this handler, for convenience of test cases - * that wish to further validate the compiled bytecode represents a particular value - */ - fun OpcodeToBytecodeHandler.shouldCompile(inputBytes: String, expectedBytecode: String): BytecodeBuffer { - return this.shouldCompile( - inputBytes.hexStringToByteArray(), - replacePositionTemplates(expectedBytecode, 0).decimalStringToIntArray() - ) - } -} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt index 50126b143..81986bbd2 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ReferenceOpcodeHandlerTest.kt @@ -2,38 +2,145 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpCode -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.REFERENCE_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.TestInstance -import org.junit.jupiter.api.fail import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments import org.junit.jupiter.params.provider.MethodSource @TestInstance(TestInstance.Lifecycle.PER_CLASS) internal class ReferenceOpcodeHandlerTest { + /** + * Generates tests for handlers that emit similar *_REF bytecode (instructions packed with a UInt22 reference length + * and followed by a UInt32 position of the data). Returns a list of [Arguments] objects in the form: + * - Instance of [ReferenceOpcodeHandler] to test + * - Input [ByteArray] with meaningless bytes for the payload data + * - Expected generated bytecode as [IntArray] + * - Expected end position as [Int] + */ + private fun referenceOpcodeHandlerTestCases(): List { + val arguments = mutableListOf() + + val instructions = arrayOf( + Pair(Instructions.I_ANNOTATION_REF, 0x59), + Pair(Instructions.I_INT_REF, 0xF5), + Pair(Instructions.I_DECIMAL_REF, 0xF6), + Pair(Instructions.I_TIMESTAMP_REF, 0xF7), + Pair(Instructions.I_STRING_REF, 0xF8), + Pair(Instructions.I_SYMBOL_REF, 0xF9), + Pair(Instructions.I_BLOB_REF, 0xFE), + Pair(Instructions.I_CLOB_REF, 0xFF), + ) + + val testTemplates = listOf( + /* + FlexUInt length prefix for referenced payload + | Decimal payload length + | | Expected payload start position + | | | Expected end position after handling + | | | | */ + "03, 1, 2, 3", + "05, 2, 2, 4", + "07, 3, 2, 5", + "09, 4, 2, 6", + "0B, 5, 2, 7", + "1D, 14, 2, 16", + "7F, 63, 2, 65", + "81, 64, 2, 66", + "FF, 127, 2, 129", + "02 02, 128, 3, 131", + "FE FF, 16383, 3, 16386", + "04 00 02, 16384, 4, 16388", + "FC FF FF, 2097151, 4, 2097155", + "08 00 00 02, 2097152, 5, 2097157", + "F8 FF FF 03, 4194303, 5, 4194308", // maximum length of a payload + "01, 0, 2, 2", // zero-length payload TODO: is this legal? + "00 18 00 00 00 00 00 00 00 00 00 00, 1, 13, 14", // overlong encoding on the FlexUInt + ) + + // This loop maps the above templates into a tests for each opcode. The templates above consist of some bytecode + // representing the FlexUInt length of the payload, the decimal value of the FlexUInt length prefix, the expected + // start position of the payload, and the expected end position of the generator after executing the handler. + // + // The opcode will be prepended to the bytecode string, and null bytes will be generated to fill the payload. + // + // Output Arguments objects are tuples of: + // (ReferenceOpcodeHandler to test, bytecode to test, expected bytecode, expected end position) + // + // Example - this string: + // "0B, 5, 2, 7" + // will emit the following Arguments objects: + // , F5 0B <5 payload bytes>, , 7 + // , F6 0B <5 payload bytes>, , 7 + // ... + instructions.forEach { (instruction, opcode) -> + testTemplates.forEach { + val (flexUIntStr, payloadLengthStr, expectedPayloadStartPosStr, expectedEndPositionStr) = it.split(',') + val payloadLength = payloadLengthStr.trim().toInt() + val expectedPayloadStartPosition = expectedPayloadStartPosStr.trim().toInt() + val expectedEndPosition = expectedEndPositionStr.trim().toInt() + + // Create a dummy payload for this value with all bytes set to zeros. + // Not actually looked at by this test, but simulates an encoded value the handler would actually + // encounter during parsing. + val payload = ByteArray(payloadLength) + arguments.add( + Arguments.of( + ReferenceOpcodeHandler(instruction), + byteArrayOf( + opcode.toByte(), // write the opcode + *flexUIntStr.trim().hexStringToByteArray(), // then the FlexUInt + *payload // then the payload bytes + ), + intArrayOf( + instruction.packInstructionData(payloadLength), + expectedPayloadStartPosition + ), + expectedEndPosition + ) + ) + } + } + + return arguments + } + /** * Test that variable-length payload opcodes generate the correct *_REF bytecode. * Does not validate the actual payload in any way. */ - @OptIn(ExperimentalStdlibApi::class) // for Byte.toHexString() @ParameterizedTest - @MethodSource(REFERENCE_OPCODE_CASES) - fun `handlers for OP_X_REF opcodes emit correct bytecode`(input: String, bytecode: String) { - val opcode = input.take(2).toInt(16) - val handler = when (opcode) { - OpCode.ANNOTATION_TEXT -> ReferenceOpcodeHandler(Instructions.I_ANNOTATION_REF) - OpCode.VARIABLE_LENGTH_INTEGER -> ReferenceOpcodeHandler(Instructions.I_INT_REF) - OpCode.VARIABLE_LENGTH_DECIMAL -> ReferenceOpcodeHandler(Instructions.I_DECIMAL_REF) - OpCode.VARIABLE_LENGTH_TIMESTAMP -> ReferenceOpcodeHandler(Instructions.I_TIMESTAMP_REF) - OpCode.VARIABLE_LENGTH_STRING -> ReferenceOpcodeHandler(Instructions.I_STRING_REF) - OpCode.VARIABLE_LENGTH_SYMBOL -> ReferenceOpcodeHandler(Instructions.I_SYMBOL_REF) - OpCode.VARIABLE_LENGTH_BLOB -> ReferenceOpcodeHandler(Instructions.I_BLOB_REF) - OpCode.VARIABLE_LENGTH_CLOB -> ReferenceOpcodeHandler(Instructions.I_CLOB_REF) - else -> fail("Opcode is not a variable-length reference opcode: 0x${opcode.toByte().toHexString()}") - } - handler.shouldCompile(input, bytecode) + @MethodSource("referenceOpcodeHandlerTestCases") + fun `handlers for OP_X_REF opcodes emit correct bytecode`( + handler: ReferenceOpcodeHandler, + inputByteArray: ByteArray, + expectedBytecode: IntArray, + expectedEndPosition: Int + ) { + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += handler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(expectedEndPosition, position) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt index 82e73b45a..a55c34d53 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ShortTimestampOpcodeHandlerTest.kt @@ -2,17 +2,63 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.SHORT_TIMESTAMP_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.CsvSource import kotlin.String class ShortTimestampOpcodeHandlerTest { @ParameterizedTest - @MethodSource(SHORT_TIMESTAMP_OPCODE_CASES) - fun `short timestamp opcode handler emits correct bytecode`(input: String, bytecode: String, expectedValue: String) { - ShortTimestampOpcodeHandler.shouldCompile(input, bytecode) + @CsvSource( + "80 35, 2", // 2023T + "81 35 05, 3", // 2023-10T + "82 35 7D, 3", // 2023-10-15T + "83 35 7D CB 0A, 5", // 2023-10-15T11:22Z + "84 35 7D CB 1A 02, 6", // 2023-10-15T11:22:33Z + "84 35 7D CB 12 02, 6", // 2023-10-15T11:22:33-00:00 + "85 35 7D CB 12 F2 06, 7", // 2023-10-15T11:22:33.444-00:00 + "86 35 7D CB 12 2E 22 1B, 8", // 2023-10-15T11:22:33.444555-00:00 + "87 35 7D CB 12 4A 86 FD 69, 9", // 2023-10-15T11:22:33.444555666-00:00 + "88 35 7D CB EA 01, 6", // 2023-10-15T11:22+01:15 + "89 35 7D CB EA 85, 6", // 2023-10-15T11:22:33+01:15 + "8A 35 7D CB EA 85 BC 01, 8", // 2023-10-15T11:22:33.444+01:15 + "8B 35 7D CB EA 85 8B C8 06, 9", // 2023-10-15T11:22:33.444555+01:15 + "8C 35 7D CB EA 85 92 61 7F 1A, 10", // 2023-10-15T11:22:33.444555666+01:15 + ) + fun `short timestamp opcode handler emits correct bytecode`( + inputString: String, + expectedEndPosition: Int + ) { + val inputByteArray = inputString.hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = inputByteArray[position++].unsignedToInt() + position += ShortTimestampOpcodeHandler.convertOpcodeToBytecode( + opcode, + inputByteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + val expectedPayloadStartPosition = 1 + val expectedBytecode = intArrayOf( + Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(inputByteArray[0].unsignedToInt()), + expectedPayloadStartPosition + ) + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(expectedEndPosition, position) } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt index f39ccca86..dd5c8cca6 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TypedNullOpcodeHandlerTest.kt @@ -2,16 +2,51 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray -import com.amazon.ion.bytecode.bin11.OpcodeTestCases.TYPED_NULL_OPCODE_CASES -import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.CsvSource class TypedNullOpcodeHandlerTest { @ParameterizedTest - @MethodSource(TYPED_NULL_OPCODE_CASES) - fun `typed null opcode handler emits correct bytecode`(input: String, bytecode: String) { - TypedNullOpcodeHandler.shouldCompile(input, bytecode) + @CsvSource( + "8F 01, ${Instructions.I_NULL_BOOL}", + "8F 02, ${Instructions.I_NULL_INT}", + "8F 03, ${Instructions.I_NULL_FLOAT}", + "8F 04, ${Instructions.I_NULL_DECIMAL}", + "8F 05, ${Instructions.I_NULL_TIMESTAMP}", + "8F 06, ${Instructions.I_NULL_STRING}", + "8F 07, ${Instructions.I_NULL_SYMBOL}", + "8F 08, ${Instructions.I_NULL_BLOB}", + "8F 09, ${Instructions.I_NULL_CLOB}", + "8F 0a, ${Instructions.I_NULL_LIST}", + "8F 0b, ${Instructions.I_NULL_SEXP}", + "8F 0c, ${Instructions.I_NULL_STRUCT}", + ) + fun testTypedNull(inputString: String, expectedInstruction: Int) { + val byteArray: ByteArray = inputString.hexStringToByteArray() + val buffer = BytecodeBuffer() + + var position = 0 + val opcode = byteArray[position++].unsignedToInt() + position += TypedNullOpcodeHandler.convertOpcodeToBytecode( + opcode, + byteArray, + position, + buffer, + ConstantPool(0), + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEqualBytecode(intArrayOf(expectedInstruction), buffer.toArray()) + assertEquals(2, position) } } From 2cac7a2bae2b0a089ef5dd22b8f7897cf30a1a04 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 29 Oct 2025 16:53:10 -0700 Subject: [PATCH 14/22] Don't emit I_REFILL in generator --- .../amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index af93deb58..5744516c9 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -50,9 +50,7 @@ internal class ByteArrayBytecodeGenerator11( ) } - if (currentPosition < source.size) { - destination.add(Instructions.I_REFILL) - } else { + if (currentPosition >= source.size) { destination.add(Instructions.I_END_OF_INPUT) } } From 39294f354528661e99ff67eb054df3918c87aab0 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 29 Oct 2025 16:53:26 -0700 Subject: [PATCH 15/22] Add generator tests --- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 36 +++++++++++++++++++ .../bin11/bytearray/TimestampDecoderTest.kt | 1 + 2 files changed, 37 insertions(+) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 942a86d82..25a126057 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -4,6 +4,9 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData import com.amazon.ion.impl.bin.PrimitiveEncoder import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals @@ -14,6 +17,39 @@ import java.nio.charset.StandardCharsets internal object ByteArrayBytecodeGenerator11Test { + @ParameterizedTest + @ValueSource( + strings = [ + "64 4F 97 21 C5 " + // int32 -987654321 + "86 35 7D CB 12 2E 22 1B " + // short TS reference to 2023-10-15T11:22:33.444555-00:00 + "8F 0C " + // null struct + "6A " + // float 0e0 + "6D 18 2D 44 54 FB 21 09 40 " + // float64 3.141592653589793 + "FE 31 49 20 61 70 70 6c 61 75 64 20 79 6f 75 72 20 63 75 72 69 6f 73 69 74 79 " + // 24-byte blob + "6F " // false + ] + ) + fun `generator can compile input containing multiple simple opcodes`(inputBytesString: String) { + val f64pi = 3.141592653589793 + val expectedBytecode = intArrayOf( + Instructions.I_INT_I32, -987654321, + Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(0x86), 6, + Instructions.I_NULL_STRUCT, + Instructions.I_FLOAT_F32, 0, + Instructions.I_FLOAT_F64, f64pi.toRawBits().ushr(32).and(0xFFFFFFFF).toInt(), f64pi.toRawBits().toInt(), + Instructions.I_BLOB_REF.packInstructionData(24), 27, + Instructions.I_BOOL.packInstructionData(0), + Instructions.I_END_OF_INPUT + ) + + val bytes = inputBytesString.hexStringToByteArray() + val generator = ByteArrayBytecodeGenerator11(bytes, 0) + generator.shouldGenerate(expectedBytecode) + } + + // TODO: add tests cases for more complicated cases like nested containers, macro compilation, annots., etc. + // once those features are implemented + @ParameterizedTest @CsvSource( "80 35, 2023T", diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt index ddc339e23..36d6bbab8 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt @@ -27,6 +27,7 @@ class TimestampDecoderTest { "8A 35 7D CB EA 85 BC 01, 2023-10-15T11:22:33.444+01:15", "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", + // TODO: add min/max values, other extremes ) fun `short timestamps are decoded correctly`(input: String, expectedValue: String) { val data = input.hexStringToByteArray() From a6960eb4f7fc2c63852f43afcc555599a4083d78 Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 30 Oct 2025 11:11:41 -0700 Subject: [PATCH 16/22] Replace UTF-8 test string with known source --- .../ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 25a126057..0ae25a282 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -83,7 +83,8 @@ internal object ByteArrayBytecodeGenerator11Test { "\n\nhello\n\n", "Love it! \uD83D\uDE0D❤\uFE0F\uD83D\uDC95\uD83D\uDE3B\uD83D\uDC96", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`~!@#\$%^&*()-_=+[{]}\\|;:'\",<.>/?", - "Ἀνέβην δέ με σῖτος εὐρυβίοιο Ἰλιάδης τε καὶ Ὀδυσσείας καὶ Φοινικίων", + // A line of the Odyssey, CC BY-SA 3.0 US, from https://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.01.0135:book=1:card=1 + "τῶν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμῖν.", "", "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f", " \tleading and trailing whitespace\u000c\r\n" From 6f571dc48e875db55d9b61c699cecdbc882f265e Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Thu, 30 Oct 2025 11:20:06 -0700 Subject: [PATCH 17/22] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 0ae25a282..1e93c37a7 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -17,19 +17,20 @@ import java.nio.charset.StandardCharsets internal object ByteArrayBytecodeGenerator11Test { - @ParameterizedTest - @ValueSource( - strings = [ - "64 4F 97 21 C5 " + // int32 -987654321 - "86 35 7D CB 12 2E 22 1B " + // short TS reference to 2023-10-15T11:22:33.444555-00:00 - "8F 0C " + // null struct - "6A " + // float 0e0 - "6D 18 2D 44 54 FB 21 09 40 " + // float64 3.141592653589793 - "FE 31 49 20 61 70 70 6c 61 75 64 20 79 6f 75 72 20 63 75 72 69 6f 73 69 74 79 " + // 24-byte blob - "6F " // false - ] - ) - fun `generator can compile input containing multiple simple opcodes`(inputBytesString: String) { + @Test + fun `generator can compile input containing multiple simple opcodes`() { + val inputBytesString = """ + 64 4F 97 21 C5 | int -987654321 + 86 35 7D CB 12 2E 22 1B | timestamp 2023-10-15T11:22:33.444555-00:00 + 8F 0C | null.struct + 6A | float 0e0 + 6D 18 2D 44 54 FB 21 09 40 | float 3.141592653589793 + FE 31 | 24-byte blob + 49 20 61 70 70 6c 61 75 | + 64 20 79 6f 75 72 20 63 | + 75 72 69 6f 73 69 74 79 | + 6F | false + """.cleanCommentedHexBytes() val f64pi = 3.141592653589793 val expectedBytecode = intArrayOf( Instructions.I_INT_I32, -987654321, From 50045b78cccd0759af5b202687b5bc0ca007c612 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Thu, 30 Oct 2025 11:20:35 -0700 Subject: [PATCH 18/22] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 5744516c9..12ab90109 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -80,9 +80,7 @@ internal class ByteArrayBytecodeGenerator11( return ByteSlice(source, position, position + length) } - override fun ionMinorVersion(): Int { - return 1 - } + override fun ionMinorVersion(): Int = 1 override fun getGeneratorForMinorVersion(minorVersion: Int): BytecodeGenerator { return when (minorVersion) { From ad746caa8becf6f347de4f105306bbf8a5abf00f Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Thu, 30 Oct 2025 11:20:42 -0700 Subject: [PATCH 19/22] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 12ab90109..8ad30d423 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -34,7 +34,7 @@ internal class ByteArrayBytecodeGenerator11( macroIndices: IntArray, symTab: Array ) { - var opcode = 0 + var opcode = -1 while (currentPosition < source.size && !isSystemValue(opcode)) { opcode = source[currentPosition++].unsignedToInt() val handler = OpcodeHandlerTable.handler(opcode) From 785e1dd5f50853e6884deef53da581a572438fcf Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 30 Oct 2025 13:13:51 -0700 Subject: [PATCH 20/22] Fix broken bytecode generator test --- .../ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 1e93c37a7..39e73ce92 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11 +import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.ir.Instructions.packInstructionData import com.amazon.ion.impl.bin.PrimitiveEncoder +import org.junit.Test import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest @@ -15,7 +17,7 @@ import org.junit.jupiter.params.provider.CsvSource import org.junit.jupiter.params.provider.ValueSource import java.nio.charset.StandardCharsets -internal object ByteArrayBytecodeGenerator11Test { +internal class ByteArrayBytecodeGenerator11Test { @Test fun `generator can compile input containing multiple simple opcodes`() { From 0ad98d69cfc45130174eced8200b8261d1f325ce Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 30 Oct 2025 13:14:17 -0700 Subject: [PATCH 21/22] Add various test cases to timestamp decoder --- .../bin11/bytearray/TimestampDecoderTest.kt | 124 +++++++++++++++--- 1 file changed, 109 insertions(+), 15 deletions(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt index 36d6bbab8..45d74be6d 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoderTest.kt @@ -13,21 +13,115 @@ class TimestampDecoderTest { @ParameterizedTest @CsvSource( - "80 35, 2023T", - "81 35 05, 2023-10T", - "82 35 7D, 2023-10-15T", - "83 35 7D CB 0A, 2023-10-15T11:22Z", - "84 35 7D CB 1A 02, 2023-10-15T11:22:33Z", - "84 35 7D CB 12 02, 2023-10-15T11:22:33-00:00", - "85 35 7D CB 12 F2 06, 2023-10-15T11:22:33.444-00:00", - "86 35 7D CB 12 2E 22 1B, 2023-10-15T11:22:33.444555-00:00", - "87 35 7D CB 12 4A 86 FD 69, 2023-10-15T11:22:33.444555666-00:00", - "88 35 7D CB EA 01, 2023-10-15T11:22+01:15", - "89 35 7D CB EA 85, 2023-10-15T11:22:33+01:15", - "8A 35 7D CB EA 85 BC 01, 2023-10-15T11:22:33.444+01:15", - "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", - "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", - // TODO: add min/max values, other extremes + // UTC offset + "83 35 7D 01 08, 2023-10-15T01:00Z", + "83 35 7D 61 0F, 2023-10-15T01:59Z", + "83 35 7D CB 0A, 2023-10-15T11:22Z", + "83 35 7D 17 08, 2023-10-15T23:00Z", + "83 35 7D 77 0F, 2023-10-15T23:59Z", + "84 35 7D CB 0A 00, 2023-10-15T11:22:00Z", + "84 35 7D CB 1A 02, 2023-10-15T11:22:33Z", + "84 35 7D CB BA 03, 2023-10-15T11:22:59Z", + "85 35 7D CB 1A 02 00, 2023-10-15T11:22:33.000Z", + "85 35 7D CB 1A F2 06, 2023-10-15T11:22:33.444Z", + "85 35 7D CB 1A 9E 0F, 2023-10-15T11:22:33.999Z", + "86 35 7D CB 1A 02 00 00, 2023-10-15T11:22:33.000000Z", + "86 35 7D CB 1A 2E 22 1B, 2023-10-15T11:22:33.444555Z", + "86 35 7D CB 1A FE 08 3D, 2023-10-15T11:22:33.999999Z", + "87 35 7D CB 1A 02 00 00 00, 2023-10-15T11:22:33.000000000Z", + "87 35 7D CB 1A 4A 86 FD 69, 2023-10-15T11:22:33.444555666Z", + "87 35 7D CB 1A FE 27 6B EE, 2023-10-15T11:22:33.999999999Z", + + // Unknown offset + "80 35, 2023T", + "81 B5 00, 2023-01T", + "81 35 05, 2023-10T", + "81 35 06, 2023-12T", + "82 35 0D, 2023-10-01T", + "82 35 7D, 2023-10-15T", + "82 35 FD, 2023-10-31T", + "83 35 7D 01 00, 2023-10-15T01:00-00:00", + "83 35 7D 61 07, 2023-10-15T01:59-00:00", + "83 35 7D CB 02, 2023-10-15T11:22-00:00", + "83 35 7D 17 00, 2023-10-15T23:00-00:00", + "83 35 7D 77 07, 2023-10-15T23:59-00:00", + "84 35 7D CB 02 00, 2023-10-15T11:22:00-00:00", + "84 35 7D CB 12 02, 2023-10-15T11:22:33-00:00", + "84 35 7D CB B2 03, 2023-10-15T11:22:59-00:00", + "85 35 7D CB 12 02 00, 2023-10-15T11:22:33.000-00:00", + "85 35 7D CB 12 F2 06, 2023-10-15T11:22:33.444-00:00", + "85 35 7D CB 12 9E 0F, 2023-10-15T11:22:33.999-00:00", + "86 35 7D CB 12 02 00 00, 2023-10-15T11:22:33.000000-00:00", + "86 35 7D CB 12 2E 22 1B, 2023-10-15T11:22:33.444555-00:00", + "86 35 7D CB 12 FE 08 3D, 2023-10-15T11:22:33.999999-00:00", + "87 35 7D CB 12 02 00 00 00, 2023-10-15T11:22:33.000000000-00:00", + "87 35 7D CB 12 4A 86 FD 69, 2023-10-15T11:22:33.444555666-00:00", + "87 35 7D CB 12 FE 27 6B EE, 2023-10-15T11:22:33.999999999-00:00", + + // Known offset + "88 35 7D 01 00 00, 2023-10-15T01:00-14:00", // min offset + "88 35 7D 01 80 03, 2023-10-15T01:00+14:00", // max offset + "88 35 7D 01 98 01, 2023-10-15T01:00-01:15", + "88 35 7D 01 E8 01, 2023-10-15T01:00+01:15", + "88 35 7D 61 EF 01, 2023-10-15T01:59+01:15", + "88 35 7D CB EA 01, 2023-10-15T11:22+01:15", + "88 35 7D 17 E8 01, 2023-10-15T23:00+01:15", + "88 35 7D 77 EF 01, 2023-10-15T23:59+01:15", + "89 35 7D CB EA 01, 2023-10-15T11:22:00+01:15", + "89 35 7D CB EA 85, 2023-10-15T11:22:33+01:15", + "89 35 7D CB EA ED, 2023-10-15T11:22:59+01:15", + "8A 35 7D CB EA 85 00 00, 2023-10-15T11:22:33.000+01:15", + "8A 35 7D CB EA 85 BC 01, 2023-10-15T11:22:33.444+01:15", + "8A 35 7D CB EA 85 E7 03, 2023-10-15T11:22:33.999+01:15", + "8B 35 7D CB EA 85 00 00 00, 2023-10-15T11:22:33.000000+01:15", + "8B 35 7D CB EA 85 8B C8 06, 2023-10-15T11:22:33.444555+01:15", + "8B 35 7D CB EA 85 3F 42 0F, 2023-10-15T11:22:33.999999+01:15", + "8C 35 7D CB EA 85 00 00 00 00, 2023-10-15T11:22:33.000000000+01:15", + "8C 35 7D CB EA 85 92 61 7F 1A, 2023-10-15T11:22:33.444555666+01:15", + "8C 35 7D CB EA 85 FF C9 9A 3B, 2023-10-15T11:22:33.999999999+01:15", + + // Earliest possible moments in time + "80 00, 1970T", + "81 80 00, 1970-01T", + "82 80 08, 1970-01-01T", + "83 80 08 00 00, 1970-01-01T00:00-00:00", + "84 80 08 00 00 00, 1970-01-01T00:00:00-00:00", + "85 80 08 00 00 00 00, 1970-01-01T00:00:00.000-00:00", + "86 80 08 00 00 00 00 00, 1970-01-01T00:00:00.000000-00:00", + "87 80 08 00 00 00 00 00 00, 1970-01-01T00:00:00.000000000-00:00", + "88 80 08 00 80 03, 1970-01-01T00:00+14:00", + "89 80 08 00 80 03, 1970-01-01T00:00:00+14:00", + "8A 80 08 00 80 03 00 00, 1970-01-01T00:00:00.000+14:00", + "8B 80 08 00 80 03 00 00 00, 1970-01-01T00:00:00.000000+14:00", + "8C 80 08 00 80 03 00 00 00 00, 1970-01-01T00:00:00.000000000+14:00", + + // Latest possible moments in time + "80 7F, 2097T", + "81 7F 06, 2097-12T", + "82 7F FE, 2097-12-31T", + "83 7F FE 77 07, 2097-12-31T23:59-00:00", + "84 7F FE 77 B7 03, 2097-12-31T23:59:59-00:00", + "85 7F FE 77 B7 9F 0F, 2097-12-31T23:59:59.999-00:00", + "86 7F FE 77 B7 FF 08 3D, 2097-12-31T23:59:59.999999-00:00", + "87 7F FE 77 B7 FF 27 6B EE, 2097-12-31T23:59:59.999999999-00:00", + "88 7F FE 77 07 00, 2097-12-31T23:59-14:00", + "89 7F FE 77 07 EC, 2097-12-31T23:59:59-14:00", + "8A 7F FE 77 07 EC E7 03, 2097-12-31T23:59:59.999-14:00", + "8B 7F FE 77 07 EC 3F 42 0F, 2097-12-31T23:59:59.999999-14:00", + "8C 7F FE 77 07 EC FF C9 9A 3B, 2097-12-31T23:59:59.999999999-14:00", + + // Leap days + "82 3A E9, 2028-02-29T", + "83 3A E9 CA 0B, 2028-02-29T10:30Z", + "84 3A E9 CA DB 02, 2028-02-29T10:30:45Z", + "85 3A E9 CA DB EE 01, 2028-02-29T10:30:45.123Z", + "86 3A E9 CA DB 02 89 07, 2028-02-29T10:30:45.123456Z", + "87 3A E9 CA DB 56 34 6F 1D, 2028-02-29T10:30:45.123456789Z", + "88 3A E9 CA 9B 01, 2028-02-29T10:30-01:15", + "89 3A E9 CA 9B B5, 2028-02-29T10:30:45-01:15", + "8A 3A E9 CA 9B B5 7B 00, 2028-02-29T10:30:45.123-01:15", + "8B 3A E9 CA 9B B5 40 E2 01, 2028-02-29T10:30:45.123456-01:15", + "8C 3A E9 CA 9B B5 15 CD 5B 07, 2028-02-29T10:30:45.123456789-01:15", ) fun `short timestamps are decoded correctly`(input: String, expectedValue: String) { val data = input.hexStringToByteArray() From 558a2b6fa93a8dc1429123a5b79163c9a8118acc Mon Sep 17 00:00:00 2001 From: austnwil Date: Thu, 30 Oct 2025 13:14:34 -0700 Subject: [PATCH 22/22] Fix bug in day-precision timestamp decoder when day has high bit set --- .../com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt index 0a541df1f..d2c77f763 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/TimestampDecoder.kt @@ -56,7 +56,7 @@ internal object TimestampDecoder { val yearMonthAndDay = readFixedInt16(source, position).toInt() val year = yearMonthAndDay.and(MASK_7) val month = yearMonthAndDay.shr(S_TIMESTAMP_MONTH_BIT_OFFSET).and(MASK_4) - val day = yearMonthAndDay.shr(S_TIMESTAMP_DAY_BIT_OFFSET) + val day = yearMonthAndDay.shr(S_TIMESTAMP_DAY_BIT_OFFSET).and(MASK_5) return Timestamp.forDay(year + 1970, month, day) }