diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10.kt b/src/main/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10.kt index 73964cbd4..19c06cd43 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10.kt @@ -2,14 +2,423 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.bytecode.bin10 +import com.amazon.ion.IonException +import com.amazon.ion.SystemSymbols +import com.amazon.ion.bytecode.BytecodeEmitter +import com.amazon.ion.bytecode.BytecodeGenerator +import com.amazon.ion.bytecode.ir.Instructions +import com.amazon.ion.bytecode.ir.Instructions.I_BLOB_REF +import com.amazon.ion.bytecode.ir.Instructions.I_CLOB_REF +import com.amazon.ion.bytecode.ir.Instructions.I_DECIMAL_REF +import com.amazon.ion.bytecode.ir.Instructions.I_END_OF_INPUT +import com.amazon.ion.bytecode.ir.Instructions.I_STRING_REF +import com.amazon.ion.bytecode.ir.Instructions.I_TIMESTAMP_REF +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.ir.OperationKind +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.ByteSlice +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.unsignedToInt +import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import java.lang.IllegalStateException +import java.math.BigInteger +import java.nio.Buffer +import java.nio.ByteBuffer +import java.util.Arrays +import kotlin.math.min +/** + * A bytecode generator for Ion 1.0 binary encoding. + * + * #### Note on integer values + * + * Because there are separate positive/negative opcodes for integers in Ion 1.0, the data referenced in an INT_REF + * instruction only includes the magnitude and not the sign. To avoid this ambiguity, this implementation eagerly + * materializes all big integers and places them in the constant pool. + */ @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "constructor does not make a defensive copy of source as a performance optimization") -internal class ByteArrayBytecodeGenerator10 -@SuppressFBWarnings("URF_UNREAD_FIELD", justification = "field will be read once this class is implemented") -constructor( +internal class ByteArrayBytecodeGenerator10( private val source: ByteArray, private var i: Int, -) { - // TODO: This should implement BytecodeGenerator +) : BytecodeGenerator { + + private val decoder = Utf8StringDecoderPool.getInstance().getOrCreate() + private val scratchBuffer = ByteBuffer.wrap(source) + private var scratchArray = ByteArray(32) + private val symbolTableHelper = SymbolTableHelper + + private fun getScratchArray(minCapacity: Int): ByteArray { + if (scratchArray.size < minCapacity) { + scratchArray = ByteArray(minCapacity) + } + return scratchArray + } + + override fun refill( + destination: BytecodeBuffer, + constantPool: AppendableConstantPoolView, + macroSrc: IntArray, + macroIndices: IntArray, + symTab: Array + ) { + val source = source + i += compileTopLevelValues(source, i, destination, constantPool, source.size) + + if (i == source.size) { + destination.add(I_END_OF_INPUT) + } + } + + override fun readBigIntegerReference(position: Int, length: Int): BigInteger { + throw IllegalStateException("Should be unreachable. All BigIntegers are eagerly parsed.") + } + + override fun readDecimalReference(position: Int, length: Int) = readDecimalReference(source, position, position + length) + + override fun readShortTimestampReference(position: Int, opcode: Int) = throw IllegalStateException("Should be unreachable. Not supported for Ion 1.0") + + override fun readTimestampReference(position: Int, length: Int) = readTimestampReference(source, position, length) + + override fun readTextReference(position: Int, length: Int): String { + val b = scratchBuffer + // We have to cast to `Buffer` here because JDK 17 added an override that returns `ByteBuffer`. + // The compiler seems to prefer that version, rather than the base method (which returns `Buffer`), and so + // running the tests with JDK 8 fails without this cast. + (b as Buffer).limit(position + length) + (b as Buffer).position(position) + return decoder.decode(b, length) + } + + override fun readBytesReference(position: Int, length: Int): ByteSlice = ByteSlice(source, position, position + length) + + override fun ionMinorVersion(): Int = 0 + + override fun getGeneratorForMinorVersion(minorVersion: Int): BytecodeGenerator { + return when (minorVersion) { + 0 -> this + // TODO: + // 1 -> ByteArrayBytecodeGenerator11(source, i) + else -> throw IonException("Unknown Ion version: 1.$minorVersion") + } + } + + private fun compileTopLevelValues( + src: ByteArray, + pos: Int, + dest: BytecodeBuffer, + cp: AppendableConstantPoolView, + limit: Int, + ): Int { + var p = pos + val end = min(pos + limit, src.size) + + // Try to avoid causing the destination buffer to resize. + // This is a good enough heuristic as long as the sizes of top-level values are fairly consistent. + // TODO(perf): Benchmark with and without this for large and small streams. + val instructionSoftLimit = dest.capacity() * 10 / 9 + + var firstAnnotationIndex = -1 + var firstAnnotationSid = -1 + var typeId: Int + + while (p < end && dest.size() < instructionSoftLimit) { + + typeId = source[p++].unsignedToInt() + + // Length includes only the bytes after the typeId. + val lengthAndValue = getLengthForTypeId(typeId, source, p) + val length = lengthAndValue.shr(8).toInt() + p += lengthAndValue.and(0xFF).toInt() + + if (TypeIdHelper.isNull(typeId)) { + compileNullValue(typeId, dest) + } else when (TypeIdHelper.operationKindForTypeId(typeId)) { + OperationKind.UNSET -> if (firstAnnotationIndex >= 0) throw IonException("Invalid annotation wrapper: NOP pad may not occur inside an annotation wrapper.") + OperationKind.IVM -> { + if (firstAnnotationIndex >= 0) throw IonException("Invalid annotation wrapper: IVM may not occur inside an annotation wrapper.") + val major = source[p++].unsignedToInt() + val minor = source[p++].unsignedToInt() + val lastByte = source[p++].unsignedToInt() + if (lastByte != 0xEA) throw IonException("Invalid IVM encountered. Ended with $lastByte instead of 0xEA.") + dest.add(Instructions.I_IVM.or(major.shl(8).or(minor))) + break + } + OperationKind.ANNOTATIONS -> { + if (firstAnnotationIndex >= 0) throw IonException("Invalid annotation wrapper: annotations may not occur inside an annotation wrapper.") + val varUIntValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val annotationStart = p + varUIntValueAndLength.and(0xFF).toInt() + val valueStart = annotationStart + varUIntValueAndLength.shr(8).toInt() + firstAnnotationIndex = dest.size() + firstAnnotationSid = compileAnnotations(source, annotationStart, valueStart, dest) + p = valueStart + continue + } + OperationKind.BOOL -> compileBoolValue(typeId, dest) + OperationKind.INT -> compileIntValue(typeId, source, p, length, dest, cp) + OperationKind.FLOAT -> compileFloatValue(typeId, source, p, dest) + OperationKind.DECIMAL -> dest.add2(I_DECIMAL_REF.or(length), p) + OperationKind.TIMESTAMP -> dest.add2(I_TIMESTAMP_REF.or(length), p) + OperationKind.SYMBOL -> compileSymbolValue(source, p, length, dest) + OperationKind.STRING -> dest.add2(I_STRING_REF.or(length), p) + OperationKind.CLOB -> dest.add2(I_CLOB_REF.or(length), p) + OperationKind.BLOB -> dest.add2(I_BLOB_REF.or(length), p) + OperationKind.LIST -> compileList(source, p, length, dest, cp) + OperationKind.SEXP -> compileSExp(source, p, length, dest, cp) + OperationKind.STRUCT -> { + if (firstAnnotationSid == SystemSymbols.ION_SYMBOL_TABLE_SID) { + dest.truncate(firstAnnotationIndex) + symbolTableHelper.compileSymbolTable(source, p, length, dest, cp) + p += length + break + } else { + compileStruct(source, p, length, dest, cp) + } + } + // Handled earlier. + OperationKind.NULL -> throw IllegalArgumentException("Unreachable!") + else -> throw IonException("Invalid Type Id: ${typeId.toString(16)}") + } + p += length + firstAnnotationIndex = -1 + firstAnnotationSid = -1 + } + return p - pos + } + + /** + * Returns the SID of the first annotation. (To enable easy checks for a local symbol table.) + */ + private fun compileAnnotations(source: ByteArray, start: Int, end: Int, bytecode: BytecodeBuffer): Int { + var p = start + val firstSidValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val firstSid = (firstSidValueAndLength ushr 8).toInt() + val firstSidLength = firstSidValueAndLength.toInt() and 0xFF + p += firstSidLength + bytecode.add(Instructions.I_ANNOTATION_SID.packInstructionData(firstSid)) + + while (p < end) { + val valueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val sid = (valueAndLength ushr 8).toInt() + val length = valueAndLength.toInt() and 0xFF + p += length + bytecode.add(Instructions.I_ANNOTATION_SID.packInstructionData(sid)) + } + return firstSid + } + + private fun compileNullValue(typeId: Int, dest: BytecodeBuffer) { + // TODO: Make something that's a little less brittle for this line. + val operationKind = TypeIdHelper.ionTypeForTypeId(typeId)!!.ordinal + 1 + dest.add(Instructions.typedNullFromOperationKind(operationKind)) + } + + private fun compileBoolValue(typeId: Int, dest: BytecodeBuffer) { + dest.add(Instructions.I_BOOL.packInstructionData(typeId and 0xF)) + } + + private fun compileIntValue(typeId: Int, source: ByteArray, position: Int, length: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { + val sign = signForIntTypeId(typeId) + + when (length) { + 0 -> { + if (sign == -1) throw IonException("Int zero may not be negative") + dest.add(Instructions.I_INT_I16) + } + 1 -> { + val value = source[position].toInt().and(0xFF).times(sign) + if (value == 0 && sign == -1) throw IonException("Int zero may not be negative") + dest.add(Instructions.I_INT_I16.packInstructionData(value)) + } + 2 -> { + val msb = source[position].toInt().and(0xFF).shl(8) + val lsb = source[position + 1].toInt() and 0xFF + val value = (msb or lsb) * sign + if (value == 0 && sign == -1) throw IonException("Int zero may not be negative") + val numLeadingSignBits = Integer.numberOfLeadingZeros(value.shr(31).xor(value)) + if (numLeadingSignBits > 16) { + dest.add(Instructions.I_INT_I16.packInstructionData(value)) + } else { + dest.add2(Instructions.I_INT_I32, value) + } + } + 3 -> { + var p = position + var absoluteValue = 0 + absoluteValue = absoluteValue.shl(8) or source[p++].toInt().and(0xFF) + absoluteValue = absoluteValue.shl(8) or source[p++].toInt().and(0xFF) + absoluteValue = absoluteValue.shl(8) or source[p++].toInt().and(0xFF) + val value = absoluteValue * sign + if (value == 0 && sign == -1) throw IonException("Int zero may not be negative") + val numLeadingSignBits = Integer.numberOfLeadingZeros(value.shr(31).xor(value)) + if (numLeadingSignBits > 16) { + dest.add(Instructions.I_INT_I16.packInstructionData(value)) + } else { + dest.add2(Instructions.I_INT_I32, value) + } + } + 4, 5, 6, 7 -> { + val absoluteValue = readUInt(source, position, length) + val value = absoluteValue * sign + if (value == 0L && sign == -1) throw IonException("Int zero may not be negative") + val minRequiredBits = Long.SIZE_BITS - java.lang.Long.numberOfLeadingZeros(value.shr(63).xor(value)) + if (minRequiredBits <= Short.SIZE_BITS) { + dest.add(Instructions.I_INT_I16.packInstructionData(value.toInt())) + } else if (minRequiredBits <= Int.SIZE_BITS) { + dest.add2(Instructions.I_INT_I32, value.toInt()) + } else { + BytecodeEmitter.emitInt64Value(dest, value) + } + } + else -> { + val scratch = getScratchArray(length) + val scratchSize = scratch.size + val scratchPosition = scratchSize - length + System.arraycopy(source, position, scratch, scratchPosition, length) + val value = BigInteger(sign, scratch) + Arrays.fill(scratch, scratchPosition, scratchSize, 0) + + if (value == BigInteger.ZERO && sign == -1) throw IonException("Int zero may not be negative") + + val minimumBytes = value.bitLength() / 8 + when (minimumBytes) { + 0, 1 -> { + dest.add(Instructions.I_INT_I16.packInstructionData(value.toInt())) + } + 2, 3 -> { + dest.add2(Instructions.I_INT_I32, value.toInt()) + } + 4, 5, 6, 7 -> { + BytecodeEmitter.emitInt64Value(dest, value.toLong()) + } + else -> { + dest.add(Instructions.I_INT_CP.packInstructionData(cp.add(value))) + } + } + } + } + } + + private fun compileFloatValue(typeId: Int, source: ByteArray, position: Int, dest: BytecodeBuffer) { + var p = position + when (typeId and 0xF) { + 0 -> dest.add2(Instructions.I_FLOAT_F32, 0.0f.toRawBits()) + 4 -> { + // TODO(perf): See if there's any difference between this, a for loop, or manually unrolling the loop + var bits = 0 + repeat(4) { bits = bits.shl(8) or source[p++].toInt().and(0xFF) } + dest.add2(Instructions.I_FLOAT_F32, bits) + } + 8 -> { + // TODO(perf): See if there's any difference between this, a for loop, or manually unrolling the loop + var bits = 0L + repeat(8) { bits = bits.shl(8) or source[p++].toLong().and(0xFF) } + BytecodeEmitter.emitDoubleValue(dest, Double.fromBits(bits)) + } + else -> throw IonException("Encountered an illegal typeId; not a valid float length: $typeId") + } + } + + private fun compileSymbolValue(source: ByteArray, position: Int, length: Int, dest: BytecodeBuffer) { + val sid = readUInt(source, position, length).toInt() + dest.add(Instructions.I_SYMBOL_SID.packInstructionData(sid)) + } + + private fun compileList(source: ByteArray, position: Int, length: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { + compileContainer(Instructions.I_LIST_START, dest) { + var p = position + val end = position + length + while (p < end) p += compileChildValue(source, p, dest, cp) + } + } + + private fun compileSExp(source: ByteArray, position: Int, length: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { + compileContainer(Instructions.I_SEXP_START, dest) { + var p = position + val end = position + length + while (p < end) p += compileChildValue(source, p, dest, cp) + } + } + + private fun compileStruct(source: ByteArray, position: Int, length: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { + compileContainer(Instructions.I_STRUCT_START, dest) { + var p = position + val end = position + length + while (p < end) { + val sidValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val sid = sidValueAndLength.ushr(8).toInt() + p += sidValueAndLength.and(0xFF).toInt() + dest.add(Instructions.I_FIELD_NAME_SID.or(sid)) + p += compileChildValue(source, p, dest, cp) + } + } + } + + private inline fun compileContainer(instruction: Int, dest: BytecodeBuffer, content: () -> Unit) { + val containerStartIndex = dest.reserve() + val start = containerStartIndex + 1 + content() + dest.add(Instructions.I_END_CONTAINER) + val end = dest.size() + dest[containerStartIndex] = instruction.packInstructionData(end - start) + } + + private fun compileChildValue(source: ByteArray, position: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView, isAnnotated: Boolean = false): Int { + var p = position + + val typeId = source[p++].unsignedToInt() + + // Length counts only the bytes after the typeId. + val valueAndLength = getLengthForTypeId(typeId, source, p) + p += valueAndLength.toByte() + val length = valueAndLength.ushr(8).toInt() + + if (TypeIdHelper.isNull(typeId)) { + compileNullValue(typeId, dest) + } else when (TypeIdHelper.operationKindForTypeId(typeId)) { + OperationKind.UNSET -> if (isAnnotated) throw IonException("Invalid annotation wrapper: NOP pad may not occur inside an annotation wrapper.") + OperationKind.IVM -> throw IonException("Found IVM illegally nested in a container.") + OperationKind.ANNOTATIONS -> { + if (isAnnotated) throw IonException("Invalid annotation wrapper: annotations may not occur inside an annotation wrapper.") + val varUIntValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val annotationStart = p + varUIntValueAndLength.and(0xFF).toInt() + val valueStart = annotationStart + varUIntValueAndLength.shr(8).toInt() + compileAnnotations(source, annotationStart, valueStart, dest) + compileChildValue(source, valueStart, dest, cp, isAnnotated = true) + } + OperationKind.BOOL -> compileBoolValue(typeId, dest) + OperationKind.INT -> compileIntValue(typeId, source, p, length, dest, cp) + OperationKind.FLOAT -> compileFloatValue(typeId, source, p, dest) + OperationKind.DECIMAL -> dest.add2(I_DECIMAL_REF.or(length), p) + OperationKind.TIMESTAMP -> dest.add2(I_TIMESTAMP_REF.or(length), p) + OperationKind.SYMBOL -> compileSymbolValue(source, p, length, dest) + OperationKind.STRING -> dest.add2(I_STRING_REF.or(length), p) + OperationKind.CLOB -> dest.add2(I_CLOB_REF.or(length), p) + OperationKind.BLOB -> dest.add2(I_BLOB_REF.or(length), p) + OperationKind.LIST -> compileList(source, p, length, dest, cp) + OperationKind.SEXP -> compileSExp(source, p, length, dest, cp) + OperationKind.STRUCT -> compileStruct(source, p, length, dest, cp) + // Handled earlier. + OperationKind.NULL -> throw IllegalStateException("Unreachable!") + else -> throw IonException("Invalid Type Id: ${typeId.toString(16)}") + } + p += length + return p - position + } + + /** + * Gets the length for the given TypeId, reading a VarUInt length if needed. + * Returns 7 bytes with the length and 1 byte containing the number of bytes consumed to read the length. + * + * See [VarIntHelper.readVarUIntValueAndLength]. + * + * @throws IonException if the typeId is not a legal typeId in Ion 1.0 + */ + private fun getLengthForTypeId(typeId: Int, source: ByteArray, position: Int): Long { + return when (val l = TypeIdHelper.TYPE_LENGTHS[typeId]) { + -1 -> VarIntHelper.readVarUIntValueAndLength(source, position) + -2 -> throw IonException("Invalid Type ID: $typeId") + else -> l.toLong() shl 8 + } + } } diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt b/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt index 4f4817a9d..617090166 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt @@ -80,11 +80,11 @@ internal object TypeIdHelper { in 0xA0..0xAF -> OperationKind.BLOB in 0xB0..0xBF -> OperationKind.LIST in 0xC0..0xCF -> OperationKind.SEXP - 0xD0, in 0xD2..0xDF -> OperationKind.STRUCT + in 0xD0..0xDF -> OperationKind.STRUCT 0xE0 -> OperationKind.IVM in 0xE3..0xEE -> OperationKind.ANNOTATIONS // Everything else: 12..1E, 30, D1, EF, F0..FF, illegal timestamp, float, and annotations sizes - else -> OperationKind.UNSET + else -> -1 } } @@ -107,28 +107,31 @@ internal object TypeIdHelper { // The length of all of these is determined entirely by the low-nibble of the typeId. 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, /* */ 0x70, 0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0 /* */ -> 0 0x01, /* */ 0x21, 0x31, /* */ 0x51, /* */ 0x71, 0x81, 0x91, 0xA1, 0xB1, 0xC1 /* */ /* */ -> 1 - 0x02, 0x12, 0x22, 0x32, /* */ 0x52, 0x62, 0x72, 0x82, 0x92, 0xA2, 0xB2, 0xC2, 0xD2 /* */ -> 2 - 0x03, 0x13, 0x23, 0x33, /* */ 0x53, 0x63, 0x73, 0x83, 0x93, 0xA3, 0xB3, 0xC3, 0xD3, 0xE3 -> 3 - 0x04, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74, 0x84, 0x94, 0xA4, 0xB4, 0xC4, 0xD4, 0xE4 -> 4 - 0x05, 0x15, 0x25, 0x35, /* */ 0x55, 0x65, 0x75, 0x85, 0x95, 0xA5, 0xB5, 0xC5, 0xD5, 0xE5 -> 5 - 0x06, 0x16, 0x26, 0x36, /* */ 0x56, 0x66, 0x76, 0x86, 0x96, 0xA6, 0xB6, 0xC6, 0xD6, 0xE6 -> 6 - 0x07, 0x17, 0x27, 0x37, /* */ 0x57, 0x67, 0x77, 0x87, 0x97, 0xA7, 0xB7, 0xC7, 0xD7, 0xE7 -> 7 - 0x08, 0x18, 0x28, 0x38, 0x48, 0x58, 0x68, 0x78, 0x88, 0x98, 0xA8, 0xB8, 0xC8, 0xD8, 0xE8 -> 8 - 0x09, 0x19, 0x29, 0x39, /* */ 0x59, 0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9, 0xE9 -> 9 - 0x0A, 0x1A, 0x2A, 0x3A, /* */ 0x5A, 0x6A, 0x7A, 0x8A, 0x9A, 0xAA, 0xBA, 0xCA, 0xDA, 0xEA -> 10 - 0x0B, 0x1B, 0x2B, 0x3B, /* */ 0x5B, 0x6B, 0x7B, 0x8B, 0x9B, 0xAB, 0xBB, 0xCB, 0xDB, 0xEB -> 11 - 0x0C, 0x1C, 0x2C, 0x3C, /* */ 0x5C, 0x6C, 0x7C, 0x8C, 0x9C, 0xAC, 0xBC, 0xCC, 0xDC, 0xEC -> 12 - 0x0D, 0x1D, 0x2D, 0x3D, /* */ 0x5D, 0x6D, 0x7D, 0x8D, 0x9D, 0xAD, 0xBD, 0xCD, 0xDD, 0xED -> 13 - 0x0E, 0x1E, 0x2E, 0x3E, /* */ 0x5E, 0x6E, 0x7E, 0x8E, 0x9E, 0xAE, 0xBE, 0xCE, 0xDE, 0xEE -> -1 + 0x02, /* */ 0x22, 0x32, /* */ 0x52, 0x62, 0x72, 0x82, 0x92, 0xA2, 0xB2, 0xC2, 0xD2 /* */ -> 2 + 0x03, /* */ 0x23, 0x33, /* */ 0x53, 0x63, 0x73, 0x83, 0x93, 0xA3, 0xB3, 0xC3, 0xD3, 0xE3 -> 3 + 0x04, /* */ 0x24, 0x34, 0x44, 0x54, 0x64, 0x74, 0x84, 0x94, 0xA4, 0xB4, 0xC4, 0xD4, 0xE4 -> 4 + 0x05, /* */ 0x25, 0x35, /* */ 0x55, 0x65, 0x75, 0x85, 0x95, 0xA5, 0xB5, 0xC5, 0xD5, 0xE5 -> 5 + 0x06, /* */ 0x26, 0x36, /* */ 0x56, 0x66, 0x76, 0x86, 0x96, 0xA6, 0xB6, 0xC6, 0xD6, 0xE6 -> 6 + 0x07, /* */ 0x27, 0x37, /* */ 0x57, 0x67, 0x77, 0x87, 0x97, 0xA7, 0xB7, 0xC7, 0xD7, 0xE7 -> 7 + 0x08, /* */ 0x28, 0x38, 0x48, 0x58, 0x68, 0x78, 0x88, 0x98, 0xA8, 0xB8, 0xC8, 0xD8, 0xE8 -> 8 + 0x09, /* */ 0x29, 0x39, /* */ 0x59, 0x69, 0x79, 0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9, 0xE9 -> 9 + 0x0A, /* */ 0x2A, 0x3A, /* */ 0x5A, 0x6A, 0x7A, 0x8A, 0x9A, 0xAA, 0xBA, 0xCA, 0xDA, 0xEA -> 10 + 0x0B, /* */ 0x2B, 0x3B, /* */ 0x5B, 0x6B, 0x7B, 0x8B, 0x9B, 0xAB, 0xBB, 0xCB, 0xDB, 0xEB -> 11 + 0x0C, /* */ 0x2C, 0x3C, /* */ 0x5C, 0x6C, 0x7C, 0x8C, 0x9C, 0xAC, 0xBC, 0xCC, 0xDC, 0xEC -> 12 + 0x0D, /* */ 0x2D, 0x3D, /* */ 0x5D, 0x6D, 0x7D, 0x8D, 0x9D, 0xAD, 0xBD, 0xCD, 0xDD, 0xED -> 13 + 0x0E, /* */ 0x2E, 0x3E, /* */ 0x5E, 0x6E, 0x7E, 0x8E, 0x9E, 0xAE, 0xBE, 0xCE, 0xDE, 0xEE -> -1 // Bool True 0x11 -> 0 // Nulls 0x0F, 0x1F, 0x2F, 0x3F, 0x4F, 0x5F, 0x6F, 0x7F, 0x8F, 0x9F, 0xAF, 0xBF, 0xCF, 0xDF -> 0 // IVM 0xE0 -> 3 // ...3 more than the typeId byte. + // Sorted struct + 0xD1 -> -1 // Reserved and/or illegal typeIds - 0xEF, 0x60, 0x61, 0xD1, 0xE1, 0xE2, + 0xEF, 0x60, 0x61, 0xE1, 0xE2, + in 0x12..0x1E, in 0x41..0x4E, // Illegal float lengths. 0x44 and 0x48 are trapped in the earlier condition. in 0xF0..0xFF -> -2 else -> TODO("This should be unreachable: ${typeId.toHexString()}") diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt b/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt index cce6bf574..6570ce732 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt @@ -121,6 +121,7 @@ internal fun readTimestampReference(valueBytes: ByteArray, position: Int, length * Reads a Decimal value from the given byte array. */ internal fun readDecimalReference(valueBytes: ByteArray, position: Int, end: Int): Decimal { + if (position >= end) return Decimal.ZERO var p = position val exponentValueAndLength = VarIntHelper.readVarIntValueAndLength(valueBytes, p) p += exponentValueAndLength.toInt() and 0xFF diff --git a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt index b14dff212..4dd743b14 100644 --- a/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt +++ b/src/test/java/com/amazon/ion/bytecode/GeneratorTestUtil.kt @@ -14,6 +14,8 @@ import org.junit.jupiter.api.assertThrows object GeneratorTestUtil { + internal fun BytecodeGenerator.shouldGenerate(vararg expectedBytecode: Int) = shouldGenerate(expectedBytecode) + internal fun BytecodeGenerator.shouldGenerate( expectedBytecode: IntArray, expectedConstantPool: ConstantPool? = null, @@ -23,7 +25,7 @@ object GeneratorTestUtil { val generator = this - val outputBytecode = BytecodeBuffer() + val outputBytecode = BytecodeBuffer(256) val constantPool = ConstantPool(32) val macroIndices = mutableListOf(0) diff --git a/src/test/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10Test.kt b/src/test/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10Test.kt new file mode 100644 index 000000000..40cd9df85 --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10Test.kt @@ -0,0 +1,1047 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin10 + +import com.amazon.ion.Decimal +import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.Timestamp +import com.amazon.ion.bytecode.GeneratorTestUtil.refillShouldThrowIonException +import com.amazon.ion.bytecode.GeneratorTestUtil.shouldGenerate +import com.amazon.ion.bytecode.ir.Instructions.I_ANNOTATION_SID +import com.amazon.ion.bytecode.ir.Instructions.I_BLOB_REF +import com.amazon.ion.bytecode.ir.Instructions.I_BOOL +import com.amazon.ion.bytecode.ir.Instructions.I_CLOB_REF +import com.amazon.ion.bytecode.ir.Instructions.I_DECIMAL_REF +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_SET_SYMBOLS +import com.amazon.ion.bytecode.ir.Instructions.I_END_CONTAINER +import com.amazon.ion.bytecode.ir.Instructions.I_END_OF_INPUT +import com.amazon.ion.bytecode.ir.Instructions.I_FIELD_NAME_SID +import com.amazon.ion.bytecode.ir.Instructions.I_FLOAT_F32 +import com.amazon.ion.bytecode.ir.Instructions.I_FLOAT_F64 +import com.amazon.ion.bytecode.ir.Instructions.I_INT_CP +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I16 +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I32 +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I64 +import com.amazon.ion.bytecode.ir.Instructions.I_IVM +import com.amazon.ion.bytecode.ir.Instructions.I_LIST_START +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_BLOB +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_BOOL +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_CLOB +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_DECIMAL +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_FLOAT +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_INT +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_LIST +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_NULL +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_SEXP +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_STRING +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_STRUCT +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_SYMBOL +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_TIMESTAMP +import com.amazon.ion.bytecode.ir.Instructions.I_SEXP_START +import com.amazon.ion.bytecode.ir.Instructions.I_STRING_REF +import com.amazon.ion.bytecode.ir.Instructions.I_STRUCT_START +import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_CP +import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_SID +import com.amazon.ion.bytecode.ir.Instructions.I_TIMESTAMP_REF +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.ByteSlice +import com.amazon.ion.bytecode.util.ConstantPool +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertSame +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import java.math.BigInteger + +class ByteArrayBytecodeGenerator10Test { + + @Test + fun `getMinorVersion is 0`() { + assertEquals(0, bytecodeGeneratorFor("").ionMinorVersion()) + } + + @Test + fun `getGeneratorForMinorVersion 0 returns self`() { + val generator = bytecodeGeneratorFor("") + assertSame(generator, generator.getGeneratorForMinorVersion(0)) + } + + @Test + fun `an IVM`() = assertIon10BinaryProducesBytecode( + "E0 01 00 EA", + intArrayOf( + I_IVM withData 0x0100, + I_END_OF_INPUT, + ) + ) + + @Test + fun `stops refilling at an IVM, even if more data is available`() { + + val generator = bytecodeGeneratorFor("E0 01 00 EA 0F 0F") + + with(generator) { + shouldGenerate(I_IVM.withData(0x0100)) + shouldGenerate( + I_NULL_NULL, + I_NULL_NULL, + I_END_OF_INPUT, + ) + } + } + + @Test + fun `a null`() = assertIon10BinaryProducesBytecode( + "0F", + intArrayOf( + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @Test + fun `typed nulls`() = assertIon10BinaryProducesBytecode( + // All the other valid typeIds for a null value. + "1F 2F 3F 4F 5F 6F 7F 8F 9F AF BF CF DF", + intArrayOf( + I_NULL_BOOL, + I_NULL_INT, + I_NULL_INT, + I_NULL_FLOAT, + I_NULL_DECIMAL, + I_NULL_TIMESTAMP, + I_NULL_STRING, + I_NULL_SYMBOL, + I_NULL_CLOB, + I_NULL_BLOB, + I_NULL_LIST, + I_NULL_SEXP, + I_NULL_STRUCT, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + "00 0F", + "01 FF 0F", + "02 FF FF 0F", + "03 FF FF FF 0F", + "04 FF FF FF FF 0F", + "05 FF FF FF FF FF 0F", + "06 FF FF FF FF FF FF 0F", + "07 FF FF FF FF FF FF FF 0F", + "08 FF FF FF FF FF FF FF FF 0F", + "09 FF FF FF FF FF FF FF FF FF 0F", + "0A FF FF FF FF FF FF FF FF FF FF 0F", + "0B FF FF FF FF FF FF FF FF FF FF FF 0F", + "0C FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "0D FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "0E 81 FF 0F", + "0E 87 FF FF FF FF FF FF FF 0F", + "0E 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `generator can skip NOPs`(bytes: String) = assertIon10BinaryProducesBytecode(bytes, intArrayOf(I_NULL_NULL, I_END_OF_INPUT)) + + @Test + fun `boolean values`() = assertIon10BinaryProducesBytecode( + "10 11", + intArrayOf( + I_BOOL withData 0, + I_BOOL withData 1, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Positive integers + " 0, 20", + " 1, 21 01", + " 123, 21 7B", + " 2, 22 00 02", + " 3, 23 00 00 03", + " 4, 24 00 00 00 04", + " 5, 25 00 00 00 00 05", + " 6, 26 00 00 00 00 00 06", + " 7, 27 00 00 00 00 00 00 07", + " 8, 28 00 00 00 00 00 00 00 08", + " 9, 29 00 00 00 00 00 00 00 00 09", + " 10, 2A 00 00 00 00 00 00 00 00 00 0A", + " 11, 2B 00 00 00 00 00 00 00 00 00 00 0B", + " 12, 2C 00 00 00 00 00 00 00 00 00 00 00 0C", + " 13, 2D 00 00 00 00 00 00 00 00 00 00 00 00 0D", + " 65538, 23 01 00 02", + " 65539, 25 00 00 01 00 03", + " 65540, 29 00 00 00 00 00 00 01 00 04", + " 4294967298, 25 01 00 00 00 02", + " 4294967299, 29 00 00 00 00 01 00 00 00 03", + " 18446744073709551618, 29 01 00 00 00 00 00 00 00 02", + // Var length positive integers + " 0, 2E 80", + " 17, 2E 81 11", + " 18, 2E 82 00 12", + " 19, 2E 84 00 00 00 13", + " 20, 2E 88 00 00 00 00 00 00 00 14", + " 21, 2E 8C 00 00 00 00 00 00 00 00 00 00 00 15", + " 22, 2E 90 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 16", + // Negative Integers + " -1, 31 01", + " -2, 32 00 02", + " -3, 33 00 00 03", + " -4, 34 00 00 00 04", + " -5, 35 00 00 00 00 05", + " -6, 36 00 00 00 00 00 06", + " -7, 37 00 00 00 00 00 00 07", + " -8, 38 00 00 00 00 00 00 00 08", + " -9, 39 00 00 00 00 00 00 00 00 09", + " -10, 3A 00 00 00 00 00 00 00 00 00 0A", + " -11, 3B 00 00 00 00 00 00 00 00 00 00 0B", + " -12, 3C 00 00 00 00 00 00 00 00 00 00 00 0C", + " -13, 3D 00 00 00 00 00 00 00 00 00 00 00 00 0D", + " -65538, 33 01 00 02", + " -65539, 35 00 00 01 00 03", + " -65540, 39 00 00 00 00 00 00 01 00 04", + " -4294967298, 35 01 00 00 00 02", + " -4294967299, 39 00 00 00 00 01 00 00 00 03", + "-18446744073709551618, 39 01 00 00 00 00 00 00 00 02", + // Var length negative integers + " -17, 3E 81 11", + " -18, 3E 82 00 12", + " -19, 3E 84 00 00 00 13", + " -20, 3E 88 00 00 00 00 00 00 00 14", + " -21, 3E 8C 00 00 00 00 00 00 00 00 00 00 00 15", + " -22, 3E 90 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 16", + ) + fun `int values`(expectedValue: BigInteger, bytes: String) { + val bits = expectedValue.bitLength() + val expectedConstantPool = ConstantPool() + val expectedBytecode = when { + bits < 16 -> intArrayOf(I_INT_I16 withData expectedValue.toInt(), I_END_OF_INPUT) + bits < 32 -> intArrayOf(I_INT_I32, expectedValue.toInt(), I_END_OF_INPUT) + bits < 64 -> intArrayOf(I_INT_I64, expectedValue.toLong().shr(Int.SIZE_BITS).toInt(), expectedValue.toInt(), I_END_OF_INPUT) + else -> { + expectedConstantPool.add(expectedValue) + intArrayOf(I_INT_CP withData 0, I_END_OF_INPUT) + } + } + bytecodeGeneratorFor(bytes).shouldGenerate(expectedBytecode, expectedConstantPool) + } + + @ParameterizedTest + @CsvSource( + "30", + "31 00", + "3E 80", + "3E 81 00", + ) + fun `int negative zero is illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + "40, 0.0", + "44 00 00 00 01, 1.4012984643e-45", + "44 00 7F FF FF, 1.1754942107e-38", + "44 00 80 00 00, 1.1754943508e-38", + "44 7F 7F FF FF, 3.4028234664e38", + "44 3F 7F FF FF, 0.999999940395355225", + "44 3F 80 00 00, 1", + "44 3F 80 00 01, 1.00000011920928955", + "44 80 00 00 01, -1.4012984643e-45", + "44 80 7F FF FF, -1.1754942107e-38", + "44 80 80 00 00, -1.1754943508e-38", + "44 FF 7F FF FF, -3.4028234664e38", + "44 BF 7F FF FF, -0.999999940395355225", + "44 BF 80 00 00, -1", + "44 BF 80 00 01, -1.00000011920928955", + "44 00 00 00 00, 0", + "44 80 00 00 00, -0", + "44 7F 80 00 00, Infinity", + "44 FF 80 00 00, -Infinity", + "44 7F C0 00 00, NaN", + "44 C0 00 00 00, -2", + "44 3E AA AA AB, 0.333333343267440796", + "44 40 49 0F DB, 3.14159274101257324", + "48 00 00 00 00 00 00 00 01, 4.9406564584124654e-324", + "48 00 0F FF FF FF FF FF FF, 2.2250738585072009e-308", + "48 00 10 00 00 00 00 00 00, 2.2250738585072014e-308", + "48 7F EF FF FF FF FF FF FF, 1.7976931348623157e308", + "48 3F EF FF FF FF FF FF FF, 0.99999999999999988898", + "48 3F F0 00 00 00 00 00 00, 1", + "48 3F F0 00 00 00 00 00 01, 1.0000000000000002220", + "48 3F F0 00 00 00 00 00 02, 1.0000000000000004441", + "48 80 00 00 00 00 00 00 01, -4.9406564584124654e-324", + "48 80 0F FF FF FF FF FF FF, -2.2250738585072009e-308", + "48 80 10 00 00 00 00 00 00, -2.2250738585072014e-308", + "48 FF EF FF FF FF FF FF FF, -1.7976931348623157e308", + "48 BF EF FF FF FF FF FF FF, -0.99999999999999988898", + "48 BF F0 00 00 00 00 00 00, -1", + "48 BF F0 00 00 00 00 00 01, -1.0000000000000002220", + "48 BF F0 00 00 00 00 00 02, -1.0000000000000004441", + "48 00 00 00 00 00 00 00 00, 0", + "48 80 00 00 00 00 00 00 00, -0", + "48 7F F0 00 00 00 00 00 00, Infinity", + "48 FF F0 00 00 00 00 00 00, -Infinity", + "48 7F F8 00 00 00 00 00 00, NaN", + "48 C0 00 00 00 00 00 00 00, -2", + "48 3F D5 55 55 55 55 55 55, 0.33333333333333331483", + "48 40 09 21 FB 54 44 2D 18, 3.141592653589793116" + ) + fun `float values`(bytes: String, expectedValue: Double) { + val byteArray = bytes.hexStringToByteArray() + val expectedBytecode = if (byteArray.size == 9) { + intArrayOf( + I_FLOAT_F64, + expectedValue.toRawBits().shr(Int.SIZE_BITS).toInt(), + expectedValue.toRawBits().toInt(), + I_END_OF_INPUT, + ) + } else { + intArrayOf( + I_FLOAT_F32, + expectedValue.toFloat().toRawBits(), + I_END_OF_INPUT, + ) + } + bytecodeGeneratorFor(bytes).shouldGenerate(expectedBytecode) + } + + @ParameterizedTest + @CsvSource( + "41 00", + "42 00 00", + "43 00 00 00 00", + "45 00 00 00 00 00", + "46 00 00 00 00 00 00", + "47 00 00 00 00 00 00 00", + "49 00 00 00 00 00 00 00 00 00", + "4A 00 00 00 00 00 00 00 00 00 00", + "4B 00 00 00 00 00 00 00 00 00 00 00", + "4C 00 00 00 00 00 00 00 00 00 00 00 00", + "4D 00 00 00 00 00 00 00 00 00 00 00 00 00", + "4E 84 3F 80 00 00", + ) + fun `other float sizes are illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + // Actual content bytes do not matter in this test because it should always produce a reference without having to inspect the value content. + // pos, len, bytes + "1, 0, 50 0F", + "3, 0, 01 FF 50 01 FF 0F", + "4, 0, 02 FF FF 50 01 FF 0F", + "4, 1, 02 FF FF 51 FF 01 FF 0F", + "1, 2, 52 FF FF 0F", + "1, 3, 53 FF FF FF 0F", + "1, 4, 54 FF FF FF FF 0F", + "1, 5, 55 FF FF FF FF FF 0F", + "1, 6, 56 FF FF FF FF FF FF 0F", + "1, 7, 57 FF FF FF FF FF FF FF 0F", + "1, 8, 58 FF FF FF FF FF FF FF FF 0F", + "1, 9, 59 FF FF FF FF FF FF FF FF FF 0F", + "1, 10, 5A FF FF FF FF FF FF FF FF FF FF 0F", + "1, 11, 5B FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 12, 5C FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 13, 5D FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "2, 1, 5E 81 FF 0F", + "2, 7, 5E 87 FF FF FF FF FF FF FF 0F", + "2, 14, 5E 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `decimal values`(position: Int, length: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_DECIMAL_REF withData length, position, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Expected, pos, len, bytes + " 0., 1, 0, 50", + " 0.0, 1, 1, 51 C1", + " 0.00, 1, 1, 51 C2", + " -0.0, 1, 2, 52 C1 80", + " 1.8, 1, 2, 52 C1 12", + " 180, 1, 2, 52 81 12", + " 18e129, 1, 3, 53 01 81 12", + " 2.58, 0, 3, C2 01 02", + " 2.58, 1, 3, 53 C2 01 02", + " 2.58, 2, 3, FF 53 C2 01 02", + " 2.58, 3, 3, FF FF 53 C2 01 02", + ) + fun `read decimal references`(expectedValue: String, position: Int, length: Int, bytes: String) { + val expected = Decimal.valueOf(expectedValue) + val generator = bytecodeGeneratorFor(bytes) + // We're comparing the string value because that will give us Ion equivalence rather than mathematical equivalence. + assertEquals(expected.toEngineeringString(), generator.readDecimalReference(position, length).toEngineeringString()) + } + + @ParameterizedTest + @CsvSource( + // Actual content bytes do not matter in this test because it should always produce a reference without having to inspect the value content. + // pos, len, bytes + "1, 2, 62 FF FF 0F", + "1, 3, 63 FF FF FF 0F", + "1, 4, 64 FF FF FF FF 0F", + "1, 5, 65 FF FF FF FF FF 0F", + "1, 6, 66 FF FF FF FF FF FF 0F", + "1, 7, 67 FF FF FF FF FF FF FF 0F", + "1, 8, 68 FF FF FF FF FF FF FF FF 0F", + "1, 9, 69 FF FF FF FF FF FF FF FF FF 0F", + "1, 10, 6A FF FF FF FF FF FF FF FF FF FF 0F", + "1, 11, 6B FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 12, 6C FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 13, 6D FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "2, 1, 6E 81 FF 0F", + "2, 7, 6E 87 FF FF FF FF FF FF FF 0F", + "2, 14, 6E 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `timestamp values`(position: Int, length: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_TIMESTAMP_REF withData length, position, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Expected, pos, len, bytes + "2000T, 0, 3, 80 0F D0", + "2000T, 1, 3, 63 80 0F D0", + "2000T, 2, 3, 00 63 80 0F D0", + "2001-02T, 0, 4, 80 0F D1 82", + "2002-03-01T, 0, 5, 80 0F D2 83 81", + "2003-04-02T03:04Z, 0, 7, 80 0F D3 84 82 83 84", + "2004-05-03T04:05:01Z, 0, 8, 80 0F D4 85 83 84 85 81", + "2005-06-04T05:06:02Z, 0, 9, 80 0F D5 86 84 85 86 82 80", + "2006-07-05T06:07:03Z, 0, 10, 80 0F D6 87 85 86 87 83 80 00", + "2007-08-06T07:08:04Z, 0, 9, 80 0F D7 88 86 87 88 84 C0", + "2008-09-07T08:09:05Z, 0, 9, 80 0F D8 89 87 88 89 85 81", + "2009-10-08T09:10:06.00Z, 0, 9, 80 0F D9 8A 88 89 8A 86 C2", + "2010-11-09T10:11:07.114Z, 0, 10, 80 0F DA 8B 89 8A 8B 87 C3 72", + "2010-01-01T00:15:00+00:15, 0, 8, 8F 0F DA 81 81 80 80 80", + "2010-01-01T00:00:00-00:00, 0, 8, C0 0F DA 81 81 80 80 80", + "2010-01-01T00:45:00-00:15, 0, 8, CF 0F DA 81 81 81 80 80", + ) + fun `read timestamp references`(expectedValue: String, position: Int, length: Int, bytes: String) { + val expected = Timestamp.valueOf(expectedValue) + val generator = bytecodeGeneratorFor(bytes) + assertEquals(expected, generator.readTimestampReference(position, length)) + } + + @ParameterizedTest + @CsvSource("60", "61 FF") + fun `illegal timestamp type ids`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @Test + fun `readShortTimestamp throws IllegalStateException`() { + val generator = bytecodeGeneratorFor("") + assertThrows { generator.readShortTimestampReference(0, 1) } + } + + @ParameterizedTest + @CsvSource( + " 0, 70 0F", + " 1, 01 FF 71 01 0F", + " 1, 02 FF FF 71 01 0F", + " 4, 71 04 0F", + " 255, 71 FF 0F", + " 258, 72 01 02 0F", + " 515, 73 00 02 03 0F", + " 4, 7E 81 04 0F", + " 259, 7E 82 01 03 0F", + "66051, 7E 83 01 02 03 0F", + ) + fun `symbol values`(sid: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_SYMBOL_SID withData sid, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Actual content bytes do not matter in this test because it should always produce a reference without having to inspect the value content. + // pos, len, bytes + "1, 0, 80 0F", + "3, 0, 01 FF 80 01 FF 0F", + "4, 0, 02 FF FF 80 01 FF 0F", + "4, 1, 02 FF FF 81 FF 01 FF 0F", + "1, 2, 82 FF FF 0F", + "1, 3, 83 FF FF FF 0F", + "1, 4, 84 FF FF FF FF 0F", + "1, 5, 85 FF FF FF FF FF 0F", + "1, 6, 86 FF FF FF FF FF FF 0F", + "1, 7, 87 FF FF FF FF FF FF FF 0F", + "1, 8, 88 FF FF FF FF FF FF FF FF 0F", + "1, 9, 89 FF FF FF FF FF FF FF FF FF 0F", + "1, 10, 8A FF FF FF FF FF FF FF FF FF FF 0F", + "1, 11, 8B FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 12, 8C FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 13, 8D FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "2, 1, 8E 81 FF 0F", + "2, 7, 8E 87 FF FF FF FF FF FF FF 0F", + "2, 14, 8E 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `string values`(position: Int, length: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_STRING_REF withData length, position, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Expected, pos, len, bytes + "'', 0, 0, 99", + "a, 0, 1, 61", + "ab, 0, 2, 61 62", + "abcd, 0, 4, 61 62 63 64", + "abcdefgh, 0, 8, 61 62 63 64 65 66 67 68", + "abcd, 2, 4, 99 99 61 62 63 64 65 66 99 99", + ) + fun `read string references`(expectedValue: String, position: Int, length: Int, bytes: String) { + val generator = bytecodeGeneratorFor(bytes) + assertEquals(expectedValue, generator.readTextReference(position, length)) + } + + @ParameterizedTest + @CsvSource( + // Actual content bytes do not matter in this test because it should always produce a reference without having to inspect the value content. + // pos, len, bytes + "1, 0, 90 0F", + "3, 0, 01 FF 90 01 FF 0F", + "4, 0, 02 FF FF 90 01 FF 0F", + "4, 1, 02 FF FF 91 FF 01 FF 0F", + "1, 2, 92 FF FF 0F", + "1, 3, 93 FF FF FF 0F", + "1, 4, 94 FF FF FF FF 0F", + "1, 5, 95 FF FF FF FF FF 0F", + "1, 6, 96 FF FF FF FF FF FF 0F", + "1, 7, 97 FF FF FF FF FF FF FF 0F", + "1, 8, 98 FF FF FF FF FF FF FF FF 0F", + "1, 9, 99 FF FF FF FF FF FF FF FF FF 0F", + "1, 10, 9A FF FF FF FF FF FF FF FF FF FF 0F", + "1, 11, 9B FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 12, 9C FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 13, 9D FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "2, 1, 9E 81 FF 0F", + "2, 7, 9E 87 FF FF FF FF FF FF FF 0F", + "2, 14, 9E 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `clob values`(position: Int, length: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_CLOB_REF withData length, position, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Actual content bytes do not matter in this test because it should always produce a reference without having to inspect the value content. + // pos, len, bytes + "1, 0, A0 0F", + "3, 0, 01 FF A0 01 FF 0F", + "4, 0, 02 FF FF A0 01 FF 0F", + "4, 1, 02 FF FF A1 FF 01 FF 0F", + "1, 2, A2 FF FF 0F", + "1, 3, A3 FF FF FF 0F", + "1, 4, A4 FF FF FF FF 0F", + "1, 5, A5 FF FF FF FF FF 0F", + "1, 6, A6 FF FF FF FF FF FF 0F", + "1, 7, A7 FF FF FF FF FF FF FF 0F", + "1, 8, A8 FF FF FF FF FF FF FF FF 0F", + "1, 9, A9 FF FF FF FF FF FF FF FF FF 0F", + "1, 10, AA FF FF FF FF FF FF FF FF FF FF 0F", + "1, 11, AB FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 12, AC FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "1, 13, AD FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + "2, 1, AE 81 FF 0F", + "2, 7, AE 87 FF FF FF FF FF FF FF 0F", + "2, 14, AE 8E FF FF FF FF FF FF FF FF FF FF FF FF FF FF 0F", + ) + fun `blob values`(position: Int, length: Int, bytes: String) = assertIon10BinaryProducesBytecode( + bytes, + intArrayOf( + I_BLOB_REF withData length, position, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + // Expected, pos, len, bytes + "0, 0, 99", + "0, 1, 61", + "0, 2, 61 62", + "0, 4, 61 62 63 64", + "0, 8, 61 62 63 64 65 66 67 68", + "2, 4, 99 99 61 62 63 64 65 66 99 99", + ) + fun `read bytes references`(position: Int, length: Int, bytes: String) { + val expected = ByteSlice(bytes.hexStringToByteArray(), position, position + length) + val generator = bytecodeGeneratorFor(bytes) + assertArrayEquals(expected.newByteArray(), generator.readBytesReference(position, length).newByteArray()) + } + + @Test + fun `an empty list`() = assertIon10BinaryProducesBytecode( + """ + B0 + BE 80 + """.trimIndent(), + intArrayOf( + I_LIST_START withData 1, + I_END_CONTAINER, + I_LIST_START withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a list with one value`() = assertIon10BinaryProducesBytecode( + """ + B1 0F + """.trimIndent(), + intArrayOf( + I_LIST_START withData 2, + I_NULL_NULL, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a list with multiple values`() = assertIon10BinaryProducesBytecode( + """ + B3 0F 10 11 + """.trimIndent(), + intArrayOf( + I_LIST_START withData 4, + I_NULL_NULL, + I_BOOL withData 0, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a list with a nop value`() = assertIon10BinaryProducesBytecode( + """ + B3 0F 00 11 + """.trimIndent(), + intArrayOf( + I_LIST_START withData 3, + I_NULL_NULL, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a var length list`() = assertIon10BinaryProducesBytecode( + """ + BE 83 0F 10 11 + """.trimIndent(), + intArrayOf( + I_LIST_START withData 4, + I_NULL_NULL, + I_BOOL withData 0, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `an empty sexp`() = assertIon10BinaryProducesBytecode( + """ + C0 + CE 80 + """.trimIndent(), + intArrayOf( + I_SEXP_START withData 1, + I_END_CONTAINER, + I_SEXP_START withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a sexp with one value`() = assertIon10BinaryProducesBytecode( + """ + C1 0F + """.trimIndent(), + intArrayOf( + I_SEXP_START withData 2, + I_NULL_NULL, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a sexp with multiple values`() = assertIon10BinaryProducesBytecode( + """ + C3 0F 10 11 + """.trimIndent(), + intArrayOf( + I_SEXP_START withData 4, + I_NULL_NULL, + I_BOOL withData 0, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a sexp with a nop value`() = assertIon10BinaryProducesBytecode( + """ + C3 0F 00 11 + """.trimIndent(), + intArrayOf( + I_SEXP_START withData 3, + I_NULL_NULL, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a var length sexp`() = assertIon10BinaryProducesBytecode( + """ + CE 83 0F 10 11 + """.trimIndent(), + intArrayOf( + I_SEXP_START withData 4, + I_NULL_NULL, + I_BOOL withData 0, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `empty structs`() = assertIon10BinaryProducesBytecode( + """ + D0 + D1 80 + DE 80 + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 1, + I_END_CONTAINER, + I_STRUCT_START withData 1, + I_END_CONTAINER, + I_STRUCT_START withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a struct with one field`() = assertIon10BinaryProducesBytecode( + """ + D2 84 0F + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 3, + I_FIELD_NAME_SID withData 4, + I_NULL_NULL, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a struct with multiple fields`() = assertIon10BinaryProducesBytecode( + """ + D6 84 0F 85 10 86 11 + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 7, + I_FIELD_NAME_SID withData 4, + I_NULL_NULL, + I_FIELD_NAME_SID withData 5, + I_BOOL withData 0, + I_FIELD_NAME_SID withData 6, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a struct with a nop field`() = assertIon10BinaryProducesBytecode( + """ + D6 84 0F 85 00 86 11 + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 6, + I_FIELD_NAME_SID withData 4, + I_NULL_NULL, + I_FIELD_NAME_SID withData 5, + I_FIELD_NAME_SID withData 6, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a var length struct`() = assertIon10BinaryProducesBytecode( + """ + DE 86 84 0F 85 10 86 11 + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 7, + I_FIELD_NAME_SID withData 4, + I_NULL_NULL, + I_FIELD_NAME_SID withData 5, + I_BOOL withData 0, + I_FIELD_NAME_SID withData 6, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `a var length struct using opcode D1`() = assertIon10BinaryProducesBytecode( + """ + D1 86 84 0F 85 10 86 11 + """.trimIndent(), + intArrayOf( + I_STRUCT_START withData 7, + I_FIELD_NAME_SID withData 4, + I_NULL_NULL, + I_FIELD_NAME_SID withData 5, + I_BOOL withData 0, + I_FIELD_NAME_SID withData 6, + I_BOOL withData 1, + I_END_CONTAINER, + I_END_OF_INPUT, + ) + ) + + @Test + fun `one annotation`() = assertIon10BinaryProducesBytecode( + """ + E3 | Annotations L=3 + 81 | Inner annotation length = 1 + 84 | $4:: + 0F | null + """.trimIndent(), + intArrayOf( + I_ANNOTATION_SID withData 4, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @Test + fun `multiple annotations`() = assertIon10BinaryProducesBytecode( + """ + E5 | Annotations L=5 + 83 | Inner annotation length = 3 + 84 | $4:: + 85 | $5:: + 86 | $6:: + 0F | null + """.trimIndent(), + intArrayOf( + I_ANNOTATION_SID withData 4, + I_ANNOTATION_SID withData 5, + I_ANNOTATION_SID withData 6, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @Test + fun `var length annotations`() = assertIon10BinaryProducesBytecode( + """ + EE | Annotations L=5 + 85 | Outer annotation length = 5 + 83 | Inner annotation length = 3 + 84 | $4:: + 85 | $5:: + 86 | $6:: + 0F | null + """.trimIndent(), + intArrayOf( + I_ANNOTATION_SID withData 4, + I_ANNOTATION_SID withData 5, + I_ANNOTATION_SID withData 6, + I_NULL_NULL, + I_END_OF_INPUT, + ) + ) + + @ParameterizedTest + @CsvSource( + "E3 81 84 00", + "B4 E3 81 84 00", + "C4 E3 81 84 00", + "D5 84 E3 81 84 00", + ) + fun `nop after annotations is illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + "E6 81 84 E0 01 10 EA", + "B7 E6 81 84 E0 01 10 EA", + "C7 E6 81 84 E0 01 10 EA", + "D8 83 E6 81 84 E0 01 10 EA", + ) + fun `ivm after annotations is illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + "E6 81 84 E3 81 84 0F", + "B7 E6 81 84 E3 81 84 0F", + "C7 E6 81 84 E3 81 84 0F", + "D8 84 E6 81 84 E3 81 84 0F", + ) + fun `annotations inside annotations wrapper is illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + "B4 E0 01 10 EA", + "C4 E0 01 10 EA", + "D5 83 E0 01 10 EA", + ) + fun `ivm in container is illegal`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @ParameterizedTest + @CsvSource( + "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", + "E1", "E2", "EF", + "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF" + ) + /** Illegal TypeIds not covered by other tests: 12..1E, EF, F0..FF, illegal annotations sizes */ + fun `other illegal type ids`(bytes: String) = bytecodeGeneratorFor(bytes).refillShouldThrowIonException() + + @Test + fun `all values types inside a container`() = assertIon10BinaryProducesBytecode( + """ + BE 94 | [ + 00 0F 10 20 40 50 | null, false, 0, 0e0, 0d0, + 62 80 80 | 0000T, + 70 80 90 A0 | $0, "", {{""}}, {{}}, + B0 C0 D0 | [], (), {}, + E3 81 84 0F | $4::null, + | ] + """, + intArrayOf( + I_LIST_START withData 25, + I_NULL_NULL, + I_BOOL, + I_INT_I16, + I_FLOAT_F32, 0.0f.toRawBits(), + I_DECIMAL_REF, 8, + I_TIMESTAMP_REF withData 2, 9, + I_SYMBOL_SID, + I_STRING_REF, 13, + I_CLOB_REF, 14, + I_BLOB_REF, 15, + I_LIST_START withData 1, I_END_CONTAINER, + I_SEXP_START withData 1, I_END_CONTAINER, + I_STRUCT_START withData 1, I_END_CONTAINER, + I_ANNOTATION_SID withData 4, + I_NULL_NULL, + I_END_CONTAINER, + I_END_OF_INPUT, + ), + ) + + @Test + fun `complex data with symbol table`() { + /* + { + name: "Fido", + age: years::4, + birthday: 2012-03-01T, + toys: [ ball, rope ], + weight: pounds::41.2, + } + */ + val generator = bytecodeGeneratorFor( + """ + E0 01 00 EA EE B7 81 83 DE B3 87 BE B0 83 61 67 65 85 79 65 61 72 73 88 62 69 72 74 68 64 61 79 + 84 74 6F 79 73 84 62 61 6C 6C 84 72 6F 70 65 86 77 65 69 67 68 74 86 70 6F 75 6E 64 73 DE A1 84 + 84 46 69 64 6F 8A E4 81 8B 21 04 8C 65 C0 0F DC 83 81 8D B4 71 0E 71 0F 90 E6 81 91 53 C1 01 9C + """.trimIndent() + ) + + with(generator) { + shouldGenerate( + I_IVM.withData(0x0100) + ) + shouldGenerate( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP withData 0, + I_SYMBOL_CP withData 1, + I_SYMBOL_CP withData 2, + I_SYMBOL_CP withData 3, + I_SYMBOL_CP withData 4, + I_SYMBOL_CP withData 5, + I_SYMBOL_CP withData 6, + I_SYMBOL_CP withData 7, + I_END_CONTAINER, + ) + shouldGenerate( + *buildStruct( + I_FIELD_NAME_SID withData 4, + I_STRING_REF withData 4, 65, + I_FIELD_NAME_SID withData 10, + I_ANNOTATION_SID withData 11, + I_INT_I16 withData 4, + I_FIELD_NAME_SID withData 12, + I_TIMESTAMP_REF withData 5, 77, + I_FIELD_NAME_SID withData 13, + *buildList( + I_SYMBOL_SID withData 14, + I_SYMBOL_SID withData 15, + ), + I_FIELD_NAME_SID withData 16, + I_ANNOTATION_SID withData 17, + I_DECIMAL_REF withData 3, 93, + ), + I_END_OF_INPUT, + ) + } + } + + private fun assertIon10BinaryProducesBytecode(commentedHexBytes: String, expectedBytecode: IntArray) { + bytecodeGeneratorFor(commentedHexBytes).shouldGenerate(expectedBytecode) + } + + private fun bytecodeGeneratorFor(commentedHexBytes: String) = ByteArrayBytecodeGenerator10(commentedHexBytes.cleanCommentedHexBytes().hexStringToByteArray(), 0) + + // Helper functions to build bytecode a little more easily + private fun buildStruct(vararg instructions: Int): IntArray = intArrayOf(I_STRUCT_START.withData(instructions.size + 1), *instructions, I_END_CONTAINER) + private fun buildList(vararg instructions: Int): IntArray = intArrayOf(I_LIST_START.withData(instructions.size + 1), *instructions, I_END_CONTAINER) + private infix fun Int.withData(data: Int): Int = this.packInstructionData(data) +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt b/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt index c7f8c8535..5294440a8 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt @@ -176,7 +176,6 @@ class TypeIdHelperTest { "0x20, INT", "0x2E, INT", "0x2F, INT", - "0x30, UNSET", "0x31, INT", "0x3E, INT", "0x3F, INT", @@ -208,7 +207,7 @@ class TypeIdHelperTest { "0xCE, SEXP", "0xCF, SEXP", "0xD0, STRUCT", - "0xD1, UNSET", + "0xD1, STRUCT", "0xDE, STRUCT", "0xDF, STRUCT", "0xE0, IVM", @@ -230,8 +229,8 @@ class TypeIdHelperTest { "0x0F, 0", // NULL (length 0) "0x10, 0", // BOOL false (length 0) "0x11, 0", // BOOL true (length 0) - "0x12, 2", // Length 2 - "0x1E, -1", // VarUInt follows + "0x12, -2", // invalid + "0x1E, -2", // invalid "0x1F, 0", // NULL (length 0) "0x20, 0", // Length 0 "0x21, 1", // Length 1