From 070ce67f206759d300e99f2e6bfcf53cf55501ad Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 5 Nov 2025 11:31:17 -0800 Subject: [PATCH 1/8] Implement list, Symbol/AnnotationSID, SingleCharSymbol opcode handlers --- .../amazon/ion/bytecode/BytecodeEmitter.kt | 15 + .../bin11/ByteArrayBytecodeGenerator11.kt | 24 +- .../bytearray/AnnotationSIDOpcodeHandler.kt | 33 + .../bin11/bytearray/IntOpcodeHandlers.kt | 31 + .../bin11/bytearray/ListOpcodeHandlers.kt | 218 ++++++ ...HandlerTable.kt => OpcodeHandlerTables.kt} | 48 ++ .../bin11/bytearray/SymbolOpcodeHandlers.kt | 58 ++ .../com/amazon/ion/bytecode/BytecodeUtils.kt | 21 + .../amazon/ion/bytecode/GeneratorTestUtil.kt | 13 + .../com/amazon/ion/bytecode/PrimitiveUtils.kt | 22 + .../bin11/ByteArrayBytecodeGenerator11Test.kt | 36 +- .../bytearray/AnnotationOpcodeHandlerTests.kt | 37 + .../bin11/bytearray/IntOpcodeHandlerTest.kt | 42 + .../bin11/bytearray/ListOpcodeHandlerTests.kt | 725 ++++++++++++++++++ .../bin11/bytearray/OpcodeHandlerTestUtil.kt | 50 ++ .../bytearray/SymbolOpcodeHandlerTests.kt | 64 ++ 16 files changed, 1416 insertions(+), 21 deletions(-) create mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt create mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt rename src/main/java/com/amazon/ion/bytecode/bin11/bytearray/{OpcodeHandlerTable.kt => OpcodeHandlerTables.kt} (67%) create mode 100644 src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/BytecodeUtils.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/PrimitiveUtils.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationOpcodeHandlerTests.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt create mode 100644 src/test/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlerTests.kt diff --git a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt index 4b75f7ca5..8060a7a83 100644 --- a/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt +++ b/src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt @@ -87,4 +87,19 @@ internal object BytecodeEmitter { fun emitShortTimestampReference(destination: BytecodeBuffer, opcode: Int, dataPosition: Int) { destination.add2(Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(opcode), dataPosition) } + + /** + * Writes a list to the destination, automatically handling the calculation of the generated bytecode + * length and reserving the space in the buffer for the list start instruction. + * + * @param contentWriter Callback function that should write children's bytecode to `destination` + */ + @JvmStatic + inline fun emitList(destination: BytecodeBuffer, contentWriter: () -> Unit) { + val containerStartIndex = destination.reserve() + contentWriter() + destination.add(Instructions.I_END_CONTAINER) + val containerBytecodeSize = destination.size() - containerStartIndex - 1 // excludes the container start instruction + destination[containerStartIndex] = Instructions.I_LIST_START.packInstructionData(containerBytecodeSize) + } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 8ad30d423..9c3df3cfb 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -38,16 +38,20 @@ internal class ByteArrayBytecodeGenerator11( while (currentPosition < source.size && !isSystemValue(opcode)) { opcode = source[currentPosition++].unsignedToInt() val handler = OpcodeHandlerTable.handler(opcode) - currentPosition += handler.convertOpcodeToBytecode( - opcode, - source, - currentPosition, - destination, - constantPool, - macroSrc, - macroIndices, - symTab - ) + try { + currentPosition += handler.convertOpcodeToBytecode( + opcode, + source, + currentPosition, + destination, + constantPool, + macroSrc, + macroIndices, + symTab + ) + } catch (e: StackOverflowError) { + throw IonException("Ion data nested too deeply", e) + } } if (currentPosition >= source.size) { diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt new file mode 100644 index 000000000..d215c0e0a --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt @@ -0,0 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.BytecodeBuffer + +/** + * Writes an annotation with symbol address to the bytecode buffer. Handles opcode `0x58`. + */ +internal object AnnotationSIDOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0x58) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val sidValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) + val sid = sidValueAndLength.toInt() + val length = sidValueAndLength.shr(Int.SIZE_BITS).toInt() + destination.add(Instructions.I_ANNOTATION_SID.packInstructionData(sid)) + return length + } +} diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt index 8408dcd79..fef075eb5 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt @@ -8,6 +8,8 @@ import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt24As import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32 import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData import com.amazon.ion.bytecode.util.AppendableConstantPoolView import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -143,3 +145,32 @@ internal object LongIntOpcodeHandler : OpcodeToBytecodeHandler { return fixedIntLength } } + +/** Writes a variable-length integer in a tagless context to the bytecode buffer. Handles tagless opcode `0x60`. + * For simplicity this only ever emits `I_INT_I32`, `I_INT_I64`, and `I_INT_CP` bytecode, even if the integer could fit + * in `I_INT_I16`. + * */ +@OptIn(ExperimentalStdlibApi::class) +internal val TAGLESS_FLEX_INT = OpcodeToBytecodeHandler { opcode, src, pos, dest, cp, _, _, _ -> + assert(opcode == 0x60) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val flexIntLength = PrimitiveDecoder.lengthOfFlexIntOrUIntAt(src, pos) + when (flexIntLength) { + 1, 2, 3, 4 -> { + val valueAndLength = PrimitiveDecoder.readFlexIntValueAndLength(src, pos) + val value = valueAndLength.toInt() + dest.add2(Instructions.I_INT_I32, value) + } + 5, 6, 7, 8, 9 -> { + val longValue = PrimitiveDecoder.readFlexIntAsLong(src, pos) + BytecodeEmitter.emitInt64Value(dest, longValue) + } + else -> { + val bigInt = PrimitiveDecoder.readFlexIntAsBigInteger(src, pos) + val cpIndex = cp.size + cp.add(bigInt) + dest.add(Instructions.I_INT_CP.packInstructionData(cpIndex)) + } + } + flexIntLength +} diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt new file mode 100644 index 000000000..e34151c8f --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt @@ -0,0 +1,218 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.bytecode.BytecodeEmitter +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.unsignedToInt + +// TODO: much of the logic here is shared between lists and sexps. It might be worthwhile to do something like +// "SequenceOpcodeHandlers" and pass the start instruction (Instructions.I_LIST_START vs .I_SEXP_START) to a helper +// BytecodeEmitter.emitSequence() or similar so this logic is not duplicated in a set of `*SexpOpcodeHandler`s. + +/** + * Writes a list to the bytecode buffer. Handles opcode `0xE0`. + */ +internal object EmptyListOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0xe0) { "Handler cannot compile opcode ${opcode.toHexString()}" } + BytecodeEmitter.emitList(destination) {} + return 0 + } +} + +/** + * Writes a length prefixed list to the bytecode buffer. Handles opcode `0xB0`-`0xBF`. + */ +internal object ShortLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode in 0xb0..0xbf) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val length = opcode and 0xF + BytecodeEmitter.emitList(destination) { + var p = position + val end = p + length + while (p < end) { + val opcode = source[p++].unsignedToInt() + p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode( + opcode, + source, + p, + destination, + constantPool, + macroSrc, + macroIndices, + symbolTable, + ) + } + } + return length + } +} + +/** + * Writes a length prefixed list to the bytecode buffer. Handles opcode `0xFA`. + */ +internal object LongLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0xfa) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val containerSizeUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) + val containerLength = containerSizeUIntValueAndLength.toInt() + val prefixLength = containerSizeUIntValueAndLength.shr(Int.SIZE_BITS).toInt() + BytecodeEmitter.emitList(destination) { + var p = position + prefixLength + val end = p + containerLength + while (p < end) { + val opcode = source[p++].unsignedToInt() + p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode( + opcode, + source, + p, + destination, + constantPool, + macroSrc, + macroIndices, + symbolTable, + ) + } + } + return containerLength + prefixLength + } +} + +/** + * Writes a delimited list to the bytecode buffer. Handles opcode `0xF0`. + */ +internal object DelimitedListOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0xf0) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + var p = position + BytecodeEmitter.emitList(destination) { + while (true) { + val opcode = source[p++].unsignedToInt() + if (opcode == OpCode.DELIMITED_CONTAINER_END) { + break + } + p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode( + opcode, + source, + p, + destination, + constantPool, + macroSrc, + macroIndices, + symbolTable, + ) + } + } + val bytesRead = p - position + return bytesRead + } +} + +/** + * Writes a tagless-element list to the bytecode buffer. Handles opcode `0x5B`. + */ +internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0x5b) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + var p = position + val childOpcode = source[p++].unsignedToInt() + val macroAddress = when (childOpcode) { + in 0x00..0x47 -> childOpcode + in 0x48..0x4f, 0xf4 -> { + val addressValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) + val addressValue = addressValueAndLength.toInt() + val addressLength = addressValueAndLength.shr(Int.SIZE_BITS).toInt() + p += addressLength + addressValue + } + else -> -1 + } + + val containerSizeValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) + val containerLength = containerSizeValueAndLength.toInt() + val prefixLength = containerSizeValueAndLength.shr(Int.SIZE_BITS).toInt() + p += prefixLength + + // If macroAddress > -1, then it is the address of the macro-shaped values, + // and childOpcode should be ignored. + // If macroAddress is -1, then childOpcode is the opcode of the values. + if (macroAddress < 0) { + val handler = TaglessOpcodeHandlerTable.handler(childOpcode) + BytecodeEmitter.emitList(destination) { + for (i in 0 until containerLength) { + p += handler.convertOpcodeToBytecode( + childOpcode, + source, + p, + destination, + constantPool, + macroSrc, + macroIndices, + symbolTable, + ) + } + } + } else { + TODO("Macro evaluation not yet implemented") + } + + return p - position + } +} diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTable.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTables.kt similarity index 67% rename from src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTable.kt rename to src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTables.kt index 2e2f38fc3..b3dfbcdfa 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTable.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTables.kt @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray +import com.amazon.ion.IonException import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.util.AppendableConstantPoolView import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -57,7 +58,10 @@ internal fun interface OpcodeToBytecodeHandler { internal object OpcodeHandlerTable { private val table = Array(256) { opcode -> when (opcode) { + in 0x50..0x57 -> SymbolSIDOpcodeHandler + 0x58 -> AnnotationSIDOpcodeHandler 0x59 -> ReferenceOpcodeHandler(Instructions.I_ANNOTATION_REF) + 0x5b -> TaglessElementListOpcodeHandler 0x60 -> Int0OpcodeHandler 0x61 -> Int8OpcodeHandler 0x62 -> Int16OpcodeHandler @@ -72,11 +76,15 @@ internal object OpcodeHandlerTable { 0x8e -> NullOpcodeHandler 0x8f -> TypedNullOpcodeHandler 0x6e, 0x6f -> BooleanOpcodeHandler + 0x91 -> SingleCharSymbolOpcodeHandler + in 0xb0..0xbf -> ShortLengthPrefixedListOpcodeHandler + 0xf0 -> DelimitedListOpcodeHandler 0xf5 -> ReferenceOpcodeHandler(Instructions.I_INT_REF) 0xf6 -> ReferenceOpcodeHandler(Instructions.I_DECIMAL_REF) 0xf7 -> ReferenceOpcodeHandler(Instructions.I_TIMESTAMP_REF) 0xf8 -> ReferenceOpcodeHandler(Instructions.I_STRING_REF) 0xf9 -> ReferenceOpcodeHandler(Instructions.I_SYMBOL_REF) + 0xfa -> LongLengthPrefixedListOpcodeHandler 0xfe -> ReferenceOpcodeHandler(Instructions.I_BLOB_REF) 0xff -> ReferenceOpcodeHandler(Instructions.I_CLOB_REF) else -> OpcodeToBytecodeHandler { _, _, _, _, _, _, _, _ -> @@ -93,3 +101,43 @@ internal object OpcodeHandlerTable { */ fun handler(opcode: Int): OpcodeToBytecodeHandler = table[opcode] } + +/** + * Table mapping numeric opcodes to the appropriate [OpcodeToBytecodeHandler] in a tagless context, allowing for + * array-based access to the appropriate handler. + */ +internal object TaglessOpcodeHandlerTable { + @OptIn(ExperimentalStdlibApi::class) + private val table = Array(256) { opcode -> + when (opcode) { + 0x60 -> TAGLESS_FLEX_INT + 0x61 -> Int8OpcodeHandler + 0x62 -> Int16OpcodeHandler + 0x64 -> Int32OpcodeHandler + 0x68 -> LongIntOpcodeHandler + 0x6b -> Float16OpcodeHandler + 0x6c -> Float32OpcodeHandler + 0x6d -> DoubleOpcodeHandler + in 0x82..0x87 -> ShortTimestampOpcodeHandler + 0xe0 -> TAGLESS_FLEX_UINT + 0xe1 -> TAGLESS_FIXED_UINT_8 + 0xe2 -> TAGLESS_FIXED_UINT_16 + 0xe4 -> TAGLESS_FIXED_UINT_32 + 0xe8 -> TAGLESS_FIXED_UINT_64 + 0x70, 0xee -> OpcodeToBytecodeHandler { _, _, _, _, _, _, _, _ -> + TODO("Tagless opcode $opcode not yet implemented") + } + else -> OpcodeToBytecodeHandler { _, _, _, _, _, _, _, _ -> + throw IonException("Opcode 0x${opcode.toHexString(HexFormat { upperCase = true })} is not legal in a tagless context") + } + } + } + + /** + * Retrieves the appropriate [OpcodeToBytecodeHandler] for a given opcode. + * + * TODO: this costs an unnecessary function call for every opcode handled. The performance of this + * vs. exposing the lookup table itself and accessing directly by index should be investigated. + */ + fun handler(opcode: Int): OpcodeToBytecodeHandler = table[opcode] +} diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt new file mode 100644 index 000000000..f567432cc --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt @@ -0,0 +1,58 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.BytecodeBuffer + +/** + * Writes a symbol with symbol address to the bytecode buffer. Handles opcodes `0x50`-`0x57`. + */ +internal object SymbolSIDOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode in 0x50..0x57) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val lsb = opcode and 0b111 + val msbValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) + val msb = msbValueAndLength.toInt().shl(3) + val msbLength = msbValueAndLength.shr(Int.SIZE_BITS).toInt() + val sid = msb or lsb + destination.add(Instructions.I_SYMBOL_SID.packInstructionData(sid)) + return msbLength + } +} + +/** + * Writes a single-char string to the bytecode buffer. Handles opcode `0xA1`. + */ +internal object SingleCharSymbolOpcodeHandler : OpcodeToBytecodeHandler { + @OptIn(ExperimentalStdlibApi::class) + override fun convertOpcodeToBytecode( + opcode: Int, + source: ByteArray, + position: Int, + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symbolTable: Array + ): Int { + assert(opcode == 0xa1) { "Handler cannot compile opcode ${opcode.toHexString()}" } + + val char = source[position].toInt() + destination.add(Instructions.I_SYMBOL_CHAR.packInstructionData(char)) + return 1 + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/BytecodeUtils.kt b/src/test/java/com/amazon/ion/bytecode/BytecodeUtils.kt new file mode 100644 index 000000000..69d4b5467 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/BytecodeUtils.kt @@ -0,0 +1,21 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode + +import com.amazon.ion.bytecode.ir.Instructions + +/** + * Helpers for generating bytecode in test cases. + */ +internal object BytecodeUtils { + /** + * Helper function for generating I_INT_I64 bytecode for a given [Long]. + */ + fun I64(value: Long): IntArray { + return intArrayOf( + Instructions.I_INT_I64, + value.shr(Int.SIZE_BITS).toInt(), + value.toInt() + ) + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt index 530517b35..b1020cb78 100644 --- a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt +++ b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode +import com.amazon.ion.IonException import com.amazon.ion.SystemSymbols import com.amazon.ion.bytecode.ir.Debugger import com.amazon.ion.bytecode.ir.Instructions @@ -9,6 +10,7 @@ import com.amazon.ion.bytecode.util.BytecodeBuffer import com.amazon.ion.bytecode.util.ConstantPool import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.assertThrows object GeneratorTestUtil { @@ -48,6 +50,17 @@ object GeneratorTestUtil { } } + /** + * Asserts that the generator throws an [IonException] when compiling its input (as opposed to a [Throwable] of + * an incorrect type). + */ + internal fun BytecodeGenerator.shouldThrowIonException() { + assertThrows { + val generator = this + generator.refill(BytecodeBuffer(), ConstantPool(), EMPTY_MACRO_TABLE, intArrayOf(), DEFAULT_SYMBOL_TABLE) + } + } + val DEFAULT_SYMBOL_TABLE = arrayOf( null, SystemSymbols.ION, diff --git a/src/test/java/com/amazon/ion/bytecode/PrimitiveUtils.kt b/src/test/java/com/amazon/ion/bytecode/PrimitiveUtils.kt new file mode 100644 index 000000000..d801f5354 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/PrimitiveUtils.kt @@ -0,0 +1,22 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode + +import com.amazon.ion.impl.bin.PrimitiveEncoder + +/** + * Helpers for working with binary primitives in test cases. + */ +internal object PrimitiveUtils { + /** + * Helper function for generating FlexUInt bytes from an unsigned integer. Useful for test + * cases that programmatically generate length-prefixed payloads. + */ + fun generateFlexUIntBytes(value: Int): ByteArray { + val asLong = value.toLong() + val length = PrimitiveEncoder.flexUIntLength(asLong) + val bytes = ByteArray(length) + PrimitiveEncoder.writeFlexIntOrUIntInto(bytes, 0, asLong, length) + return bytes + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 39e73ce92..9e2dd7d62 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -6,12 +6,13 @@ import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate +import com.amazon.ion.bytecode.GeneratorTestUtil.shouldThrowIonException +import com.amazon.ion.bytecode.PrimitiveUtils.generateFlexUIntBytes import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.ir.Instructions.packInstructionData -import com.amazon.ion.impl.bin.PrimitiveEncoder -import org.junit.Test import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource import org.junit.jupiter.params.provider.ValueSource @@ -128,15 +129,28 @@ internal class ByteArrayBytecodeGenerator11Test { assertArrayEquals(lobBytes, readLob) } - /** - * Helper function for generating FlexUInt bytes from an unsigned integer. Useful for test - * cases that programmatically generate length-prefixed payloads. + /* + * ================================================ + * == EXCEPTION HANDLING == + * == == + * == Tests that validate that IonException is == + * == thrown when the generator encounters an == + * == invalid input. == + * ================================================ */ - private fun generateFlexUIntBytes(value: Int): ByteArray { - val asLong = value.toLong() - val length = PrimitiveEncoder.flexUIntLength(asLong) - val bytes = ByteArray(length) - PrimitiveEncoder.writeFlexIntOrUIntInto(bytes, 0, asLong, length) - return bytes + + @Test + fun `generator catches stack overflow`() { + val testDepth = 60000 + val bytes = mutableListOf() + for (i in 0 until testDepth) { + bytes.add(OpCode.DELIMITED_LIST.toByte()) + } + for (i in 0 until testDepth) { + bytes.add(OpCode.DELIMITED_CONTAINER_END.toByte()) + } + + val generator = ByteArrayBytecodeGenerator11(bytes.toByteArray(), 0) + generator.shouldThrowIonException() } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationOpcodeHandlerTests.kt new file mode 100644 index 000000000..f3b042ea3 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationOpcodeHandlerTests.kt @@ -0,0 +1,37 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import org.junit.jupiter.api.Nested +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +class AnnotationOpcodeHandlerTests { + @Nested + inner class AnnotationSIDOpcodeHandlerTest { + + // NOTE: These test annotations don't actually annotate anything, which is technically invalid. + // This however allows us to use shouldCompile(). + @ParameterizedTest + @CsvSource( + "58 01, 0", + "58 03, 1", + "58 05, 2", + "58 07, 3", + "58 FF, 127", + "58 02 02, 128", + "58 04 00 02, 16384", + "58 08 00 00 02, 2097152", + "58 F8 FF FF 03, 4194303", // Max value that can be packed into data of I_ANNOTATION_SID + ) + fun `handler compiles annotations with SID`(bytes: String, sid: Int) { + val bytes = bytes.hexStringToByteArray() + val expectedBytecode = intArrayOf(Instructions.I_ANNOTATION_SID.packInstructionData(sid)) + AnnotationSIDOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt index 460684985..b8d536abc 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlerTest.kt @@ -2,8 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin11.bytearray +import com.amazon.ion.PrimitiveTestCases_1_1 +import com.amazon.ion.TextToBinaryUtils.binaryStringToByteArray import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.BytecodeUtils import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.bin11.OpCode import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.ir.Instructions.packInstructionData import com.amazon.ion.bytecode.util.BytecodeBuffer @@ -12,6 +16,8 @@ import com.amazon.ion.bytecode.util.unsignedToInt import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.MethodSource +import java.math.BigInteger import kotlin.String class IntOpcodeHandlerTest { @@ -177,4 +183,40 @@ class IntOpcodeHandlerTest { val representedInteger = (buffer.get(1).toLong() shl 32) or (buffer.get(2).toLong() and 0xFFFF_FFFF) assertEquals(expectedInt64, representedInteger) } + + @ParameterizedTest + @MethodSource(PrimitiveTestCases_1_1.FLEX_INT_READ_WRITE_CASES, PrimitiveTestCases_1_1.FLEX_INT_READ_ONLY_CASES) + fun testTaglessFlexIntHandler(expectedBigInt: BigInteger, bits: String) { + val source = bits.binaryStringToByteArray() + val dest = BytecodeBuffer() + val cp = ConstantPool() + cp.add("dummy value") + + val bytesRead = TAGLESS_FLEX_INT.convertOpcodeToBytecode(OpCode.TE_FLEX_INT, source, 0, dest, cp, intArrayOf(), intArrayOf(), arrayOf()) + + when (bytesRead) { + 1, 2, 3, 4 -> { + assertEqualBytecode( + intArrayOf(Instructions.I_INT_I32, expectedBigInt.toInt()), + dest.toArray() + ) + } + 5, 6, 7, 8, 9 -> { + val expectedLong = expectedBigInt.toLong() + assertEqualBytecode( + BytecodeUtils.I64(expectedLong), + dest.toArray() + ) + } + else -> { + assertEqualBytecode( + intArrayOf(Instructions.I_INT_CP.packInstructionData(1)), + dest.toArray() + ) + assertEquals(expectedBigInt, cp.toArray()[1]) + } + } + + assertEquals(source.size, bytesRead) + } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt new file mode 100644 index 000000000..e3b5e2fdf --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt @@ -0,0 +1,725 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.BytecodeUtils +import com.amazon.ion.bytecode.PrimitiveUtils.generateFlexUIntBytes +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.asHalfToFloat +import org.junit.jupiter.api.Disabled +import org.junit.jupiter.api.Nested +import org.junit.jupiter.api.Test +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ValueSource +import java.math.BigInteger + +class ListOpcodeHandlerTests { + + /* + * ================================================ + * == SHORT PREFIXED LISTS == + * ================================================ + */ + + @Nested + inner class `short length-prefixed lists` { + @Test + fun `handler compiles lists of simple elements`() { + val bytes = "B6 61 01 61 02 61 03".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(4), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(2), + Instructions.I_INT_I16.packInstructionData(3), + Instructions.I_END_CONTAINER + ) + ShortLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles lists of every supported length`() { + for (length in 0 until 0xF) { + // List of null.null of size `length` + val bytes = byteArrayOf( + OpCode.LIST_LENGTH_0.or(length).toByte(), + *Array(length) { OpCode.NULL_NULL.toByte() }.toByteArray() + ) + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(length + 1), + *Array(length) { Instructions.I_NULL_NULL }.toIntArray(), + Instructions.I_END_CONTAINER + ) + ShortLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } + + @Test + fun `handler compiles nested lists`() { + val bytes = """ + BF + B6 8E B3 B2 8E 8E 8E | [null, [[null, null]], null] + B3 61 01 6E | [1, true] + 8E | null + B0 | empty list + 61 03 | int 3 + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(19), + + Instructions.I_LIST_START.packInstructionData(9), + Instructions.I_NULL_NULL, + Instructions.I_LIST_START.packInstructionData(5), + Instructions.I_LIST_START.packInstructionData(3), + Instructions.I_NULL_NULL, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + Instructions.I_END_CONTAINER, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + + Instructions.I_LIST_START.packInstructionData(3), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_BOOL.packInstructionData(1), + Instructions.I_END_CONTAINER, + + Instructions.I_NULL_NULL, + + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER, + + Instructions.I_INT_I16.packInstructionData(3), + + Instructions.I_END_CONTAINER + ) + ShortLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles deeply nested lists`() { + val bytes = "BF BE BD BC BB BA B9 B8 B7 B6 B5 B4 B3 B2 B1 B0".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(31), + Instructions.I_LIST_START.packInstructionData(29), + Instructions.I_LIST_START.packInstructionData(27), + Instructions.I_LIST_START.packInstructionData(25), + Instructions.I_LIST_START.packInstructionData(23), + Instructions.I_LIST_START.packInstructionData(21), + Instructions.I_LIST_START.packInstructionData(19), + Instructions.I_LIST_START.packInstructionData(17), + Instructions.I_LIST_START.packInstructionData(15), + Instructions.I_LIST_START.packInstructionData(13), + Instructions.I_LIST_START.packInstructionData(11), + Instructions.I_LIST_START.packInstructionData(9), + Instructions.I_LIST_START.packInstructionData(7), + Instructions.I_LIST_START.packInstructionData(5), + Instructions.I_LIST_START.packInstructionData(3), + Instructions.I_LIST_START.packInstructionData(1), + *Array(16) { Instructions.I_END_CONTAINER }.toIntArray() + ) + ShortLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } + + /* + * ================================================ + * == LONG PREFIXED LISTS == + * ================================================ + */ + + @Nested + inner class `long length-prefixed lists` { + @Test + fun `handler compiles lists of simple elements`() { + val bytes = "FA 0D 61 01 61 02 61 03".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(4), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(2), + Instructions.I_INT_I16.packInstructionData(3), + Instructions.I_END_CONTAINER + ) + LongLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles empty lists`() { + val bytes = "FA 01".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER + ) + LongLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles large lists`() { + val testLength = 10_000_000 // Much larger causes test crash + val bytes = byteArrayOf( + OpCode.VARIABLE_LENGTH_LIST.toByte(), + *generateFlexUIntBytes(testLength), + *Array(testLength) { 0x6E /* true */ }.toByteArray() + ) + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(testLength + 1), + *Array(testLength) { Instructions.I_BOOL.packInstructionData(1) }.toIntArray(), + Instructions.I_END_CONTAINER + ) + LongLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles nested lists`() { + val bytes = """ + FA 63 + FA 11 | [null, [[null, null]], null] + 8E |--| | | + FA 09 FA 05 8E 8E |--------| | + 8E |------------------------| + FA 41 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + 61 01 61 02 61 03 61 04 + 61 05 61 06 61 07 61 08 + 61 09 61 0A 61 0B 61 0C + 61 0D 61 0E 61 0F 61 10 + 8E | null + FA 01 | empty list + 61 03 | int 3 + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(33), + + Instructions.I_LIST_START.packInstructionData(9), + Instructions.I_NULL_NULL, + Instructions.I_LIST_START.packInstructionData(5), + Instructions.I_LIST_START.packInstructionData(3), + Instructions.I_NULL_NULL, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + Instructions.I_END_CONTAINER, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + + Instructions.I_LIST_START.packInstructionData(17), + *Array(16) { Instructions.I_INT_I16.packInstructionData(it + 1) }.toIntArray(), + Instructions.I_END_CONTAINER, + + Instructions.I_NULL_NULL, + + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER, + + Instructions.I_INT_I16.packInstructionData(3), + + Instructions.I_END_CONTAINER + ) + LongLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + // TODO: this does stack overflow on very deep lists + @Test + fun `handler compiles deeply nested lists`() { + fun wrapBytesWithList(bytes: ByteArray): ByteArray { + return byteArrayOf( + OpCode.VARIABLE_LENGTH_LIST.toByte(), + *generateFlexUIntBytes(bytes.size), + *bytes + ) + } + + fun wrapBytecodeWithList(bytecode: IntArray): IntArray { + return intArrayOf( + Instructions.I_LIST_START.packInstructionData(bytecode.size + 1), + *bytecode, + Instructions.I_END_CONTAINER + ) + } + + val testDepth = 500 + var bytes = byteArrayOf() + var expectedBytecode = intArrayOf() + for (i in 0 until testDepth) { + bytes = wrapBytesWithList(bytes) + expectedBytecode = wrapBytecodeWithList(expectedBytecode) + } + LongLengthPrefixedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } + + /* + * ================================================ + * == DELIMITED LISTS == + * ================================================ + */ + + @Nested + inner class `delimited lists` { + @Test + fun `handler compiles lists of simple elements`() { + val bytes = "F0 61 01 61 02 61 03 EF".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(4), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(2), + Instructions.I_INT_I16.packInstructionData(3), + Instructions.I_END_CONTAINER + ) + DelimitedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles empty lists`() { + val bytes = "F0 EF".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER + ) + DelimitedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles large lists`() { + val testLength = 10_000_000 // Much larger causes test crash + val bytes = byteArrayOf( + OpCode.DELIMITED_LIST.toByte(), + *Array(testLength) { 0x6E /* true */ }.toByteArray(), + OpCode.DELIMITED_CONTAINER_END.toByte() + ) + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(testLength + 1), + *Array(testLength) { Instructions.I_BOOL.packInstructionData(1) }.toIntArray(), + Instructions.I_END_CONTAINER + ) + DelimitedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles nested lists`() { + val bytes = """ + F0 + F0 8E F0 8E F0 8E 8E EF EF 8E EF | [null, [null, [null, null]], null] + F0 61 01 61 02 61 03 EF | [1, 2, 3] + 8E | null + F0 EF | empty list + 61 03 | int 3 + EF + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(21), + + Instructions.I_LIST_START.packInstructionData(10), + Instructions.I_NULL_NULL, + Instructions.I_LIST_START.packInstructionData(6), + Instructions.I_NULL_NULL, + Instructions.I_LIST_START.packInstructionData(3), + Instructions.I_NULL_NULL, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + Instructions.I_END_CONTAINER, + Instructions.I_NULL_NULL, + Instructions.I_END_CONTAINER, + + Instructions.I_LIST_START.packInstructionData(4), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(2), + Instructions.I_INT_I16.packInstructionData(3), + Instructions.I_END_CONTAINER, + + Instructions.I_NULL_NULL, + + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER, + + Instructions.I_INT_I16.packInstructionData(3), + + Instructions.I_END_CONTAINER + ) + DelimitedListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + // TODO: this does stack overflow on very deep lists + @Test + fun `handler compiles deeply nested lists`() { + val testDepth = 500 + val bytes = mutableListOf() + val expectedBytecode = mutableListOf() + for (i in 0 until testDepth) { + val childCount = testDepth - i - 1 + bytes.add(OpCode.DELIMITED_LIST.toByte()) + expectedBytecode.add(Instructions.I_LIST_START.packInstructionData(childCount * 2 + 1)) + } + for (i in 0 until testDepth) { + bytes.add(OpCode.DELIMITED_CONTAINER_END.toByte()) + expectedBytecode.add(Instructions.I_END_CONTAINER) + } + DelimitedListOpcodeHandler.shouldCompile(bytes.toByteArray(), expectedBytecode.toIntArray()) + } + } + + /* + * ================================================ + * == TAGLESS ELEMENT LISTS == + * ================================================ + */ + + @Nested + inner class `tagless element lists` { + @Test + fun `handler compiles simple int TE lists`() { + val bytes = """ + 5B 60 15 + 01 + 03 + FF + 02 01 + FE FE + 04 00 01 + 08 00 00 01 + 10 00 00 00 10 + 00 FE FF FF FF FF FF FF FF 01 + 00 02 00 00 00 00 00 00 00 02 + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(20), + Instructions.I_INT_I32, 0, + Instructions.I_INT_I32, 1, + Instructions.I_INT_I32, -1, + Instructions.I_INT_I32, 64, + Instructions.I_INT_I32, -65, + Instructions.I_INT_I32, 8192, + Instructions.I_INT_I32, 1048576, + *BytecodeUtils.I64(Int.MAX_VALUE.toLong() + 1), + // The tagless FlexInt handler will always compile 10-byte FlexInts to I_INT_CP even though some of them + // can fit into a Long. + Instructions.I_INT_CP.packInstructionData(0), + Instructions.I_INT_CP.packInstructionData(1), + Instructions.I_END_CONTAINER + ) + val expectedConstantPool = ConstantPool().apply { + add(BigInteger.valueOf(Long.MAX_VALUE)) + add(BigInteger.valueOf(Long.MAX_VALUE) + BigInteger.ONE) + } + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode, expectedConstantPool) + } + + @Test + fun `handler compiles simple int8 TE lists`() { + val bytes = "5B 61 0B 00 01 FF 7F 80".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(6), + Instructions.I_INT_I16.packInstructionData(0), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(-1), + Instructions.I_INT_I16.packInstructionData(Byte.MAX_VALUE.toInt()), + Instructions.I_INT_I16.packInstructionData(Byte.MIN_VALUE.toInt()), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple int16 TE lists`() { + val bytes = "5B 62 0B 00 00 01 00 FF FF FF 7F 00 80".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(6), + Instructions.I_INT_I16.packInstructionData(0), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(-1), + Instructions.I_INT_I16.packInstructionData(Short.MAX_VALUE.toInt()), + Instructions.I_INT_I16.packInstructionData(Short.MIN_VALUE.toInt()), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple int32 TE lists`() { + val bytes = """ + 5B 64 0B + 00 00 00 00 + 01 00 00 00 + FF FF FF FF + FF FF FF 7F + 00 00 00 80 + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(11), + Instructions.I_INT_I32, 0, + Instructions.I_INT_I32, 1, + Instructions.I_INT_I32, -1, + Instructions.I_INT_I32, Int.MAX_VALUE, + Instructions.I_INT_I32, Int.MIN_VALUE, + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple int64 TE lists`() { + val bytes = """ + 5B 68 0B + 00 00 00 00 00 00 00 00 + 01 00 00 00 00 00 00 00 + FF FF FF FF FF FF FF FF + FF FF FF FF FF FF FF 7F + 00 00 00 00 00 00 00 80 + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(16), + *BytecodeUtils.I64(0), + *BytecodeUtils.I64(1), + *BytecodeUtils.I64(-1), + *BytecodeUtils.I64(Long.MAX_VALUE), + *BytecodeUtils.I64(Long.MIN_VALUE), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple uint TE lists`() { + TODO("Test not yet implemented") + } + + @Test + fun `handler compiles simple uint8 TE lists`() { + val bytes = "5B E1 09 00 01 7F FF".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(5), + Instructions.I_INT_I16.packInstructionData(0), + Instructions.I_INT_I16.packInstructionData(1), + Instructions.I_INT_I16.packInstructionData(Byte.MAX_VALUE.toInt()), + Instructions.I_INT_I16.packInstructionData(0xFF), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple uint16 TE lists`() { + val bytes = "5B E2 09 00 00 01 00 FF 7F FF FF".hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(9), + Instructions.I_INT_I32, 0, + Instructions.I_INT_I32, 1, + Instructions.I_INT_I32, Short.MAX_VALUE.toInt(), + Instructions.I_INT_I32, 0xFFFF, + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple uint32 TE lists`() { + val bytes = """ + 5B E4 09 + 00 00 00 00 + 01 00 00 00 + FF FF FF 7F + FF FF FF FF + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(10), + Instructions.I_INT_I32, 0, + Instructions.I_INT_I32, 1, + Instructions.I_INT_I32, Int.MAX_VALUE, + *BytecodeUtils.I64(0xFFFFFFFFL), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple uint64 TE lists`() { + val bytes = """ + 5B E8 09 + 00 00 00 00 00 00 00 00 + 01 00 00 00 00 00 00 00 + FF FF FF FF FF FF FF 7F + FF FF FF FF FF FF FF FF + """.cleanCommentedHexBytes().hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(11), + *BytecodeUtils.I64(0), + *BytecodeUtils.I64(1), + *BytecodeUtils.I64(Long.MAX_VALUE), + Instructions.I_INT_CP.packInstructionData(0), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple float16 TE lists`() { + val bytes = """ + 5B 6B 11 + 00 00 | 0 + 00 80 | -0 + 00 3C | 1 + 00 BC | -1 + FF 7B | 65504 + 00 04 | 0.00006103515625 + 01 80 | -0.000000059604645 + 01 7E | NaN + """.cleanCommentedHexBytes().hexStringToByteArray() + // We need this because the NaN compiled by the handler will have a different bit layout than Float.NaN + val expectedNaN = 0x7E01.toShort().asHalfToFloat() + assert(expectedNaN.isNaN()) // Sanity check on the NaN + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(17), + Instructions.I_FLOAT_F32, 0f.toRawBits(), + Instructions.I_FLOAT_F32, (-0f).toRawBits(), + Instructions.I_FLOAT_F32, 1f.toRawBits(), + Instructions.I_FLOAT_F32, (-1f).toRawBits(), + Instructions.I_FLOAT_F32, 65504f.toRawBits(), + Instructions.I_FLOAT_F32, 6.1035156E-5f.toRawBits(), + Instructions.I_FLOAT_F32, (-0.000000059604645f).toRawBits(), + Instructions.I_FLOAT_F32, expectedNaN.toRawBits(), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles simple float32 TE lists`() { + val bytes = """ + 5B 6C 11 + 00 00 00 00 | 0 + 00 00 00 80 | -0 + 00 00 80 3F | 1 + 00 00 80 BF | -1 + FF FF 7F 7F | 3.4028234664e38 + 00 00 80 00 | 1.1754943508e-38 + 01 00 00 80 | -1.4012984643e-45 + 01 00 C0 7F | NaN + """.cleanCommentedHexBytes().hexStringToByteArray() + // We need this because the NaN compiled by the handler will have a different bit layout than Float.NaN + val expectedNaN = Float.fromBits(0x7FC00001) + assert(expectedNaN.isNaN()) // Sanity check on the NaN + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(17), + Instructions.I_FLOAT_F32, 0f.toRawBits(), + Instructions.I_FLOAT_F32, (-0f).toRawBits(), + Instructions.I_FLOAT_F32, 1f.toRawBits(), + Instructions.I_FLOAT_F32, (-1f).toRawBits(), + Instructions.I_FLOAT_F32, 3.4028234664e38f.toRawBits(), + Instructions.I_FLOAT_F32, 1.1754943508e-38f.toRawBits(), + Instructions.I_FLOAT_F32, (-1.4012984643e-45f).toRawBits(), + Instructions.I_FLOAT_F32, expectedNaN.toRawBits(), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple float64 TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple decimal TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_day TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_min TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_s TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_ms TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_us TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple timestamp_ns TE lists`() { + TODO("Test not yet implemented") + } + + @Disabled("Test not yet implemented") + @Test + fun `handler compiles simple flexsym TE lists`() { + TODO("Test not yet implemented") + } + + @ParameterizedTest + @ValueSource( + strings = [ + "5B 60 01", + "5B 61 01", + "5B 62 01", + "5B 64 01", + "5B 68 01", + "5B E0 01", + "5B E1 01", + "5B E2 01", + "5B E4 01", + "5B E8 01", + "5B 6B 01", + "5B 6C 01", + "5B 6D 01", + "5B 70 01", + "5B 82 01", + "5B 83 01", + "5B 84 01", + "5B 85 01", + "5B 86 01", + "5B 87 01", + "5B EE 01", + ] + ) + fun `handler compiles empty lists`(bytes: String) { + val bytes = bytes.hexStringToByteArray() + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(1), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + + @Test + fun `handler compiles large lists`() { + val testLength = 10_000_000 // Much larger causes test crash + val bytes = byteArrayOf( + OpCode.TAGLESS_ELEMENT_LIST.toByte(), + OpCode.INT_8.toByte(), + *generateFlexUIntBytes(testLength), + *Array(testLength) { 0x01 }.toByteArray() + ) + val expectedBytecode = intArrayOf( + Instructions.I_LIST_START.packInstructionData(testLength + 1), + *Array(testLength) { Instructions.I_INT_I16.packInstructionData(1) }.toIntArray(), + Instructions.I_END_CONTAINER + ) + TaglessElementListOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt new file mode 100644 index 000000000..292eff928 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/OpcodeHandlerTestUtil.kt @@ -0,0 +1,50 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.bytecode.util.unsignedToInt +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals + +internal object OpcodeHandlerTestUtil { + /** + * Asserts that an opcode handler compiles the given input bytes to the given bytecode and that the position + * returned by the handler points immediately after the last byte in the input. + * + * @return The bytecode buffer containing the bytecode compiled by this handler, for convenience of test cases + * that wish to further validate the compiled bytecode represents a particular value + */ + fun OpcodeToBytecodeHandler.shouldCompile( + inputBytes: ByteArray, + expectedBytecode: IntArray, + expectedConstantPool: ConstantPool? = null, + ): BytecodeBuffer { + val buffer = BytecodeBuffer() + val constantPool = ConstantPool() + + var position = 0 + val opcode = inputBytes[position++].unsignedToInt() + position += this.convertOpcodeToBytecode( + opcode, + inputBytes, + position, + buffer, + constantPool, + intArrayOf(), + intArrayOf(), + arrayOf() + ) + + assertEqualBytecode(expectedBytecode, buffer.toArray()) + assertEquals(inputBytes.size, position) + + if (expectedConstantPool != null) { + assertArrayEquals(expectedConstantPool.toArray(), constantPool.toArray()) + } + + return buffer + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlerTests.kt new file mode 100644 index 000000000..6cdd6ec3a --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlerTests.kt @@ -0,0 +1,64 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin11.bytearray + +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.bytecode.bin11.bytearray.OpcodeHandlerTestUtil.shouldCompile +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import org.junit.jupiter.api.Nested +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.ValueSource + +class SymbolOpcodeHandlerTests { + @Nested + inner class SingleCharSymbolOpcodeHandlerTest { + + @ParameterizedTest + @ValueSource( + strings = [ + "a", + "Z", + "5", + "~", + " ", + "\n", + "\u007F", + "\u0000", + ] + ) + fun `handler compiles single-char symbols`(char: String) { + val char = char.single() + val bytes = byteArrayOf(OpCode.SYMBOL_LENGTH_0.or(1).toByte(), char.code.toByte()) + val expectedBytecode = intArrayOf(Instructions.I_SYMBOL_CHAR.packInstructionData(char.code)) + SingleCharSymbolOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } + + @Nested + inner class SymbolSIDOpcodeHandlerTest { + + @ParameterizedTest + @CsvSource( + "50 01, 0", + "51 01, 1", + "52 03, 10", + "53 03, 11", + "54 03, 12", + "55 03, 13", + "56 03, 14", + "57 03, 15", + "57 FF, 1023", + "50 02 02, 1024", + "57 FE FF, 131071", + "50 04 00 02, 131072" + ) + fun `handler compiles symbols with SID`(bytes: String, sid: Int) { + val bytes = bytes.hexStringToByteArray() + val expectedBytecode = intArrayOf(Instructions.I_SYMBOL_SID.packInstructionData(sid)) + SymbolSIDOpcodeHandler.shouldCompile(bytes, expectedBytecode) + } + } +} From 513467a76210f2c33df917c009615c46a8e42633 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 5 Nov 2025 11:48:35 -0800 Subject: [PATCH 2/8] Delete unused object --- .../bin11/bytearray/ListOpcodeHandlers.kt | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt index e34151c8f..d4871e6d8 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt @@ -12,27 +12,6 @@ import com.amazon.ion.bytecode.util.unsignedToInt // "SequenceOpcodeHandlers" and pass the start instruction (Instructions.I_LIST_START vs .I_SEXP_START) to a helper // BytecodeEmitter.emitSequence() or similar so this logic is not duplicated in a set of `*SexpOpcodeHandler`s. -/** - * Writes a list to the bytecode buffer. Handles opcode `0xE0`. - */ -internal object EmptyListOpcodeHandler : OpcodeToBytecodeHandler { - @OptIn(ExperimentalStdlibApi::class) - override fun convertOpcodeToBytecode( - opcode: Int, - source: ByteArray, - position: Int, - destination: BytecodeBuffer, - constantPool: AppendableConstantPoolView, - macroSrc: IntArray, - macroIndices: IntArray, - symbolTable: Array - ): Int { - assert(opcode == 0xe0) { "Handler cannot compile opcode ${opcode.toHexString()}" } - BytecodeEmitter.emitList(destination) {} - return 0 - } -} - /** * Writes a length prefixed list to the bytecode buffer. Handles opcode `0xB0`-`0xBF`. */ From b462486747ff0009f9433beae2a35d03040ad217 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Wed, 5 Nov 2025 14:50:19 -0800 Subject: [PATCH 3/8] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt index fef075eb5..5d21188fb 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt @@ -156,6 +156,7 @@ internal val TAGLESS_FLEX_INT = OpcodeToBytecodeHandler { opcode, src, pos, dest val flexIntLength = PrimitiveDecoder.lengthOfFlexIntOrUIntAt(src, pos) when (flexIntLength) { + // TODO(perf): See if there's any performance benefit to having a separate case for length=1|2 and using INT_I16 instruction 1, 2, 3, 4 -> { val valueAndLength = PrimitiveDecoder.readFlexIntValueAndLength(src, pos) val value = valueAndLength.toInt() From 15ac400bfb6e38d64d85a56182f8cecb3cfdac23 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Wed, 5 Nov 2025 14:50:42 -0800 Subject: [PATCH 4/8] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt index d4871e6d8..5f1501548 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt @@ -154,7 +154,15 @@ internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { val childOpcode = source[p++].unsignedToInt() val macroAddress = when (childOpcode) { in 0x00..0x47 -> childOpcode - in 0x48..0x4f, 0xf4 -> { + in 0x48..0x4f -> { + val flexUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) + val addressLength = flexUIntValueAndLength.shr(Int.SIZE_BITS).toInt() + p += flexUIntValueAndLength + val lsb = childOpcode - 0x48 + val msb = flexUIntValueAndLength.toInt() * 8 + msb + lsb + 72 + } + 0xf4 -> { val addressValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) val addressValue = addressValueAndLength.toInt() val addressLength = addressValueAndLength.shr(Int.SIZE_BITS).toInt() From ea3381ae0e5ebbea0bfb0b72799084fa64d10ba6 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Wed, 5 Nov 2025 14:50:58 -0800 Subject: [PATCH 5/8] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt index 5f1501548..3ab57ac25 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt @@ -172,10 +172,10 @@ internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { else -> -1 } - val containerSizeValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) - val containerLength = containerSizeValueAndLength.toInt() - val prefixLength = containerSizeValueAndLength.shr(Int.SIZE_BITS).toInt() - p += prefixLength + val childCountValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) + val childCount = childCountValueAndLength.toInt() + val prefixSize = childCountValueAndLength.shr(Int.SIZE_BITS).toInt() + p += prefixSize // If macroAddress > -1, then it is the address of the macro-shaped values, // and childOpcode should be ignored. From ab2a111094b84fd053e57548d18f7c8a41905835 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Wed, 5 Nov 2025 14:51:13 -0800 Subject: [PATCH 6/8] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt index e3b5e2fdf..4754928da 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt @@ -177,10 +177,11 @@ class ListOpcodeHandlerTests { fun `handler compiles nested lists`() { val bytes = """ FA 63 - FA 11 | [null, [[null, null]], null] - 8E |--| | | - FA 09 FA 05 8E 8E |--------| | - 8E |------------------------| + FA 11 | [ + 8E | null, + FA 09 FA 05 8E 8E | [[null, null]], + 8E | null, + | ] FA 41 | [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 61 01 61 02 61 03 61 04 61 05 61 06 61 07 61 08 From 11bed3fc3dcd923a2e3bd4540a86aac59f747697 Mon Sep 17 00:00:00 2001 From: Austin Williams Date: Wed, 5 Nov 2025 14:51:27 -0800 Subject: [PATCH 7/8] Apply suggestion from @popematt Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- .../ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt index 4754928da..0a7116e94 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt @@ -365,6 +365,8 @@ class ListOpcodeHandlerTests { * ================================================ * == TAGLESS ELEMENT LISTS == * ================================================ + * + * TODO: Test cases for TE Lists using macro shapes. */ @Nested From 85ea14042c5987a805073b165cd140a8dfb769e6 Mon Sep 17 00:00:00 2001 From: austnwil Date: Wed, 5 Nov 2025 17:00:54 -0800 Subject: [PATCH 8/8] Implement PR suggestions --- .../bytecode/bin11/ByteArrayBytecodeGenerator11.kt | 1 + .../bin11/bytearray/AnnotationSIDOpcodeHandler.kt | 2 -- .../bytecode/bin11/bytearray/IntOpcodeHandlers.kt | 2 -- .../bytecode/bin11/bytearray/ListOpcodeHandlers.kt | 12 ++---------- .../bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt | 4 ---- .../com/amazon/ion/bytecode/GeneratorTestUtil.kt | 2 +- .../bin11/ByteArrayBytecodeGenerator11Test.kt | 4 ++-- .../bin11/bytearray/ListOpcodeHandlerTests.kt | 6 +++--- 8 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt index 9c3df3cfb..510e8e3d3 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt @@ -50,6 +50,7 @@ internal class ByteArrayBytecodeGenerator11( symTab ) } catch (e: StackOverflowError) { + // TODO: implement recursion limit instead of catching StackOverflowError throw IonException("Ion data nested too deeply", e) } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt index d215c0e0a..51329a1a1 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/AnnotationSIDOpcodeHandler.kt @@ -22,8 +22,6 @@ internal object AnnotationSIDOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode == 0x58) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val sidValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) val sid = sidValueAndLength.toInt() val length = sidValueAndLength.shr(Int.SIZE_BITS).toInt() diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt index 5d21188fb..aea5e9e76 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt @@ -152,8 +152,6 @@ internal object LongIntOpcodeHandler : OpcodeToBytecodeHandler { * */ @OptIn(ExperimentalStdlibApi::class) internal val TAGLESS_FLEX_INT = OpcodeToBytecodeHandler { opcode, src, pos, dest, cp, _, _, _ -> - assert(opcode == 0x60) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val flexIntLength = PrimitiveDecoder.lengthOfFlexIntOrUIntAt(src, pos) when (flexIntLength) { // TODO(perf): See if there's any performance benefit to having a separate case for length=1|2 and using INT_I16 instruction diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt index 3ab57ac25..2cbc16259 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlers.kt @@ -27,8 +27,6 @@ internal object ShortLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode in 0xb0..0xbf) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val length = opcode and 0xF BytecodeEmitter.emitList(destination) { var p = position @@ -66,8 +64,6 @@ internal object LongLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode == 0xfa) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val containerSizeUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) val containerLength = containerSizeUIntValueAndLength.toInt() val prefixLength = containerSizeUIntValueAndLength.shr(Int.SIZE_BITS).toInt() @@ -107,8 +103,6 @@ internal object DelimitedListOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode == 0xf0) { "Handler cannot compile opcode ${opcode.toHexString()}" } - var p = position BytecodeEmitter.emitList(destination) { while (true) { @@ -148,8 +142,6 @@ internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode == 0x5b) { "Handler cannot compile opcode ${opcode.toHexString()}" } - var p = position val childOpcode = source[p++].unsignedToInt() val macroAddress = when (childOpcode) { @@ -157,7 +149,7 @@ internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { in 0x48..0x4f -> { val flexUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p) val addressLength = flexUIntValueAndLength.shr(Int.SIZE_BITS).toInt() - p += flexUIntValueAndLength + p += addressLength val lsb = childOpcode - 0x48 val msb = flexUIntValueAndLength.toInt() * 8 msb + lsb + 72 @@ -183,7 +175,7 @@ internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler { if (macroAddress < 0) { val handler = TaglessOpcodeHandlerTable.handler(childOpcode) BytecodeEmitter.emitList(destination) { - for (i in 0 until containerLength) { + for (i in 0 until childCount) { p += handler.convertOpcodeToBytecode( childOpcode, source, diff --git a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt index f567432cc..ffcd75eb5 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin11/bytearray/SymbolOpcodeHandlers.kt @@ -22,8 +22,6 @@ internal object SymbolSIDOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode in 0x50..0x57) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val lsb = opcode and 0b111 val msbValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position) val msb = msbValueAndLength.toInt().shl(3) @@ -49,8 +47,6 @@ internal object SingleCharSymbolOpcodeHandler : OpcodeToBytecodeHandler { macroIndices: IntArray, symbolTable: Array ): Int { - assert(opcode == 0xa1) { "Handler cannot compile opcode ${opcode.toHexString()}" } - val char = source[position].toInt() destination.add(Instructions.I_SYMBOL_CHAR.packInstructionData(char)) return 1 diff --git a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt index b1020cb78..b14dff212 100644 --- a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt +++ b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt @@ -54,7 +54,7 @@ object GeneratorTestUtil { * Asserts that the generator throws an [IonException] when compiling its input (as opposed to a [Throwable] of * an incorrect type). */ - internal fun BytecodeGenerator.shouldThrowIonException() { + internal fun BytecodeGenerator.refillShouldThrowIonException() { assertThrows { val generator = this generator.refill(BytecodeBuffer(), ConstantPool(), EMPTY_MACRO_TABLE, intArrayOf(), DEFAULT_SYMBOL_TABLE) diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt index 9e2dd7d62..a7eedd339 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11Test.kt @@ -5,8 +5,8 @@ package com.amazon.ion.bytecode.bin11 import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.GeneratorTestUtil.refillShouldThrowIonException import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate -import com.amazon.ion.bytecode.GeneratorTestUtil.shouldThrowIonException import com.amazon.ion.bytecode.PrimitiveUtils.generateFlexUIntBytes import com.amazon.ion.bytecode.ir.Instructions import com.amazon.ion.bytecode.ir.Instructions.packInstructionData @@ -151,6 +151,6 @@ internal class ByteArrayBytecodeGenerator11Test { } val generator = ByteArrayBytecodeGenerator11(bytes.toByteArray(), 0) - generator.shouldThrowIonException() + generator.refillShouldThrowIonException() } } diff --git a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt index 0a7116e94..936cf7261 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin11/bytearray/ListOpcodeHandlerTests.kt @@ -159,7 +159,7 @@ class ListOpcodeHandlerTests { @Test fun `handler compiles large lists`() { - val testLength = 10_000_000 // Much larger causes test crash + val testLength = 10_000_000 // Much larger causes OutOfMemoryError in gradle test executor val bytes = byteArrayOf( OpCode.VARIABLE_LENGTH_LIST.toByte(), *generateFlexUIntBytes(testLength), @@ -284,7 +284,7 @@ class ListOpcodeHandlerTests { @Test fun `handler compiles large lists`() { - val testLength = 10_000_000 // Much larger causes test crash + val testLength = 10_000_000 // Much larger causes OutOfMemoryError in gradle test executor val bytes = byteArrayOf( OpCode.DELIMITED_LIST.toByte(), *Array(testLength) { 0x6E /* true */ }.toByteArray(), @@ -710,7 +710,7 @@ class ListOpcodeHandlerTests { @Test fun `handler compiles large lists`() { - val testLength = 10_000_000 // Much larger causes test crash + val testLength = 10_000_000 // Much larger causes OutOfMemoryError in gradle test executor val bytes = byteArrayOf( OpCode.TAGLESS_ELEMENT_LIST.toByte(), OpCode.INT_8.toByte(),