diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/SymbolTableHelper.kt b/src/main/java/com/amazon/ion/bytecode/bin10/SymbolTableHelper.kt new file mode 100644 index 000000000..afe138b3b --- /dev/null +++ b/src/main/java/com/amazon/ion/bytecode/bin10/SymbolTableHelper.kt @@ -0,0 +1,376 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin10 + +import com.amazon.ion.IonException +import com.amazon.ion.SystemSymbols +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_ADD_SYMBOLS +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_SET_SYMBOLS +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_USE +import com.amazon.ion.bytecode.ir.Instructions.I_END_CONTAINER +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I32 +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_NULL +import com.amazon.ion.bytecode.ir.Instructions.I_STRING_CP +import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_CP +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.ir.OperationKind +import com.amazon.ion.bytecode.util.AppendableConstantPoolView +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.unsignedToInt +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings + +/** + * Helper to generate Bytecode instructions for Ion 1.0 style symbol tables. + * + * We cannot meaningfully read a partial symbol table, so it seems a reasonable requirement that the entire symbol + * table must be buffered before we generate bytecode for it. Therefore, this is re-usable for both continuable and + * non-continuable bytecode generators. + * + * The Bytecode uses Ion 1.1 style directives, which don't quite align with Ion 1.0 directives. + * So, we have two options: + * 1. We can output the bytecode that is functionally equivalent to "classic" symbol tables + * 2. We can add a "classic" symbol table directive to the bytecode + * + * For option 1, the generated bytecode is as follows: + * - if there's no LST append, and no imports, then we can generate a SET_SYMBOLS instruction + * - if there's LST append, and no imports, then we can generate an ADD_SYMBOLS instruction + * - if there are imports, there cannot also be LST append. + * - if there are imports, and it's not LST append, we can generate an empty SET_SYMBOLS, followed by a USE for all the + * imports, followed by ADD_SYMBOLS with the local symbols. + * - if there's LST append with no local symbols added, we could elide it completely, but for simplicity of the + * implementation we can emit an empty ADD_SYMBOLS + * - if there are imports and no local symbols, we can generate an empty SET_SYMBOLS, followed by USE for all the imports. + * - if there are no imports, no LST append, and no local symbols, we can generate an empty SET_SYMBOLS. + * + * This means we need to buffer some data before emitting the bytecode, but that's okay. + * We can put the strings into the constant pool, and keep track of the min and max, since there will be nothing else + * that we would put in the constant pool while we're processing a symbol table. + * + * It's actually beneficial to put the symbol text strings in the constant pool now. They can be added to the symbol + * table from the constant pool comparatively cheaply, and we can decode the strings eagerly without having as much + * overhead from the control flow of calling `readTextReference()` over and over. + * + * Logic is roughly this: + * + * ```pseudocode + * let symbolsStartInclusive = 0 + * let symbolsEndExclusive = 0 + * let isAppend = false + * while hasMoreFields(): + * let fieldName = readFieldName() + * switch(fieldName): + * "imports": + * let valueType = readValueType() + * switch(valueType): + * symbol: + * readAndValidate "$ion_symbol_table" + * isAppend = true + * list: + * bytecode.add2(SET_SYMBOLS, END_CONTAINER) + * bytecode.add(USE) + * while hasMoreListElements(): + * compileImport() + * bytecode.add(END_CONTAINER) + * isAppend = true + * "symbols": + * symbolStartInclusive = sizeOf(constantPool) + * for value in list: + * if value is symbol: + * constantPool.add(readText()) + * else: + * constantPool.add(null) + * symbolsEndExclusive = sizeOf(constantPool) + * + * if (isAppend): + * bytecode.add(ADD_SYMBOLS) + * else: + * bytecode.add(SET_SYMBOLS) + * for i in symbolsStartInclusive .. symbolsEndExclusive: + * bytecode.add(SYMBOL_CP(i)) + * bytecode.add(END_CONTAINER) + * ``` + */ +internal object SymbolTableHelper { + + private const val ONE_BYTE_MASK = 0xFF + private const val ONE_BYTE_SHIFT = 8 + + /** + * Compiles an Ion 1.0 symbol table to bytecode instructions. See class documentation for details. + */ + @JvmStatic + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") + fun compileSymbolTable(source: ByteArray, position: Int, structLength: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { + var symbolsCpIndexStartInclusive = 0 + var symbolsCpIndexEndExclusive = 0 + + var hasSeenImports = false + var isAppendRequired = false + var hasSeenSymbols = false + + iterateStruct(source, position, structLength) { fieldSid, fieldTid, pos, length -> + val operationKind = TypeIdHelper.operationKindForTypeId(fieldTid) + when (fieldSid) { + SystemSymbols.IMPORTS_SID -> { + if (hasSeenImports) throw IonException("Multiple imports fields found within a single local symbol table.") + hasSeenImports = true + when (operationKind) { + OperationKind.SYMBOL -> { + val sid = readUInt(source, pos, length).toInt() + if (sid == SystemSymbols.ION_SYMBOL_TABLE_SID) isAppendRequired = true + } + OperationKind.LIST -> { + readImportsList(source, pos, length, dest, cp) + isAppendRequired = true + } + } + } + SystemSymbols.SYMBOLS_SID -> { + if (hasSeenSymbols) throw IonException("Multiple symbols fields found within a single local symbol table.") + hasSeenSymbols = true + when (operationKind) { + OperationKind.LIST -> { + symbolsCpIndexStartInclusive = cp.size + readSymbolsList(source, pos, length, cp) + symbolsCpIndexEndExclusive = cp.size + } + } + } + } + } + + val directiveOperation = if (isAppendRequired) { + // The new local symbols are "appended" to the imports using ADD_SYMBOLS + if (symbolsCpIndexEndExclusive - symbolsCpIndexStartInclusive == 0) return + I_DIRECTIVE_ADD_SYMBOLS + } else { + I_DIRECTIVE_SET_SYMBOLS + } + dest.add(directiveOperation) + for (i in symbolsCpIndexStartInclusive until symbolsCpIndexEndExclusive) { + dest.add(I_SYMBOL_CP.packInstructionData(i)) + } + dest.add(I_END_CONTAINER) + } + + /** + * Reads a list of import structs. Emits bytecode if and only if there is a non-zero number of imports. + */ + @JvmStatic + private fun readImportsList( + source: ByteArray, + listStart: Int, + listLength: Int, + dest: BytecodeBuffer, + cp: AppendableConstantPoolView + ) { + // Clear default module symbols and start adding the imports in a USE directive + dest.add3(I_DIRECTIVE_SET_SYMBOLS, I_END_CONTAINER, I_DIRECTIVE_USE) + + val checkpoint = dest.size() + + iterateList(source, listStart, listLength) { childTid: Int, childStart: Int, length: Int -> + if (TypeIdHelper.isNonNullStruct(childTid)) readImportStruct(source, childStart, length, dest, cp) + } + + if (dest.size() == checkpoint) { + // Truncate to remove the USE directive + dest.truncate(checkpoint - 1) + } else { + // Close the USE directive + dest.add(I_END_CONTAINER) + } + } + + /** + * Reads an import struct according to https://amazon-ion.github.io/ion-docs/docs/symbols.html#imports + */ + @JvmStatic + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") + private fun readImportStruct( + source: ByteArray, + contentStart: Int, + contentLength: Int, + dest: BytecodeBuffer, + cp: AppendableConstantPoolView + ) { + + var catalogName: String? = null + var catalogVersion: Int = -1 + var maxId: Int = -1 + + var hasSeenCatalogName = false + var hasSeenCatalogVersion = false + var hasSeenMaxId = false + + iterateStruct(source, contentStart, contentLength) { fieldSid, fieldTid, pos, length -> + when (fieldSid) { + SystemSymbols.NAME_SID -> { + if (hasSeenCatalogName) throw IonException("Multiple name fields found within a single import.") + hasSeenCatalogName = true + if (TypeIdHelper.isNonNullString(fieldTid)) { + catalogName = String(source, pos, length, Charsets.UTF_8) + } + } + SystemSymbols.VERSION_SID -> { + if (hasSeenCatalogVersion) throw IonException("Multiple version fields found within a single import.") + hasSeenCatalogVersion = true + if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) { + catalogVersion = readUInt(source, pos, length).toInt() + } + } + SystemSymbols.MAX_ID_SID -> { + if (hasSeenMaxId) throw IonException("Multiple max_id fields found within a single import.") + hasSeenMaxId = true + if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) { + maxId = readUInt(source, pos, length).toInt() + } + } + } + } + + // No name, empty name, or $ion, so we ignore the import clause + if (catalogName == null || catalogName == "\$ion" || catalogName == "") return + val cpIndex = cp.add(catalogName) + dest.add(I_STRING_CP.packInstructionData(cpIndex)) + if (catalogVersion < 1) catalogVersion = 1 + dest.add2(I_INT_I32, catalogVersion) + if (maxId < 0) { + dest.add(I_NULL_NULL) + } else { + dest.add2(I_INT_I32, maxId) + } + } + + /** + * Reads all symbols in the symbols lists, adding them to the constant pool. Any values that are not a non-null + * string result in a symbol with unknown text, so a `null` is added to the constant pool. + */ + @JvmStatic + private fun readSymbolsList(source: ByteArray, position: Int, listLength: Int, cp: AppendableConstantPoolView) { + iterateList(source, position, listLength) { typeId, p, length -> + if (TypeIdHelper.isNonNullString(typeId)) { + cp.add(String(source, p, length, Charsets.UTF_8)) + } else { + cp.add(null) + } + } + } + + // ==== General helpers for traversing through the symbol table struct ==== + + /** + * Iterates over all fields in a struct. + * For each non-null field, it calls [fieldHandler]. + * Annotations are ignored in symbol table and import structs, so this handles skipping the annotations. + */ + @JvmStatic + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") + private inline fun iterateStruct( + source: ByteArray, + start: Int, + length: Int, + fieldHandler: (fieldSid: Int, valueTid: Int, pos: Int, len: Int) -> Unit + ) { + var p = start + + val end = p + length + + while (p < end) { + val fieldSidValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) + val fieldSid = fieldSidValueAndLength.shr(ONE_BYTE_SHIFT).toInt() + p += fieldSidValueAndLength.toInt().and(ONE_BYTE_MASK) + + var typeId = source[p++].unsignedToInt() + + when (TypeIdHelper.operationKindForTypeId(typeId)) { + // This is a nop, so we skip this field + OperationKind.UNSET -> { + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) + p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() + continue + } + // We ignore annotations inside all symbol table structs and import structs + OperationKind.ANNOTATIONS -> { + p += skipAnnotations(typeId, source, p) + typeId = source[p++].unsignedToInt() + } + } + + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) + val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() + fieldHandler(fieldSid, typeId, p, l) + p += l + } + } + + /** + * Iterates over all values in a list. + * For each non-null value, it calls [valueHandler]. + * Annotations are ignored in symbols and imports lists, so this handles skipping the annotations. + */ + @JvmStatic + private inline fun iterateList( + source: ByteArray, + position: Int, + length: Int, + valueHandler: (typeId: Int, position: Int, length: Int) -> Unit + ) { + var p = position + val end = position + length + while (p < end) { + val typeId = source[p++].unsignedToInt() + + when (TypeIdHelper.operationKindForTypeId(typeId)) { + // This is a nop, so we skip this field + OperationKind.UNSET -> { + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) + p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() + continue + } + // We ignore annotations on anything inside a local symbol table. + OperationKind.ANNOTATIONS -> { + p += skipAnnotations(typeId, source, p) + continue + } + } + + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) + val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() + valueHandler(typeId, p, l) + p += l + } + } + + /** returns the number of bytes needed to skip the annotations and go to the annotated value. */ + @JvmStatic + private fun skipAnnotations(typeId: Int, source: ByteArray, position: Int): Int { + var p = position + // Skip the annotations and do nothing with them, but don't skip the annotated value. + if (TypeIdHelper.isVariableLength(typeId)) { + p += VarIntHelper.readVarUIntValueAndLength(source, p).toInt().and(ONE_BYTE_MASK) + } + val innerAnnotationLength = VarIntHelper.readVarUIntValueAndLength(source, p) + p += innerAnnotationLength.toInt().and(ONE_BYTE_MASK) + innerAnnotationLength.shr(ONE_BYTE_SHIFT).toInt() + return p - position + } + + /** + * Gets the length for the given TypeId, reading a VarUInt length if needed. + * Returns -1 if there is not enough data available to read the full VarUInt length. + * + * @throws IonException if the typeId is not a legal typeId in Ion 1.0 + */ + @JvmStatic + private fun getLengthForTypeId(typeId: Int, source: ByteArray, position: Int): Long { + return when (val l = TypeIdHelper.TYPE_LENGTHS[typeId]) { + -1 -> VarIntHelper.readVarUIntValueAndLength(source, position) + -2 -> throw IonException("Invalid Type ID: $typeId") + else -> l.toLong().shl(ONE_BYTE_SHIFT) + } + } +} diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt b/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt index 832be1f21..4f4817a9d 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin10/TypeIdHelper.kt @@ -8,6 +8,29 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings internal object TypeIdHelper { + private const val POSITIVE_INT_HIGH_NIBBLE = 0x20 + private const val STRING_HIGH_NIBBLE = 0x80 + private const val STRUCT_HIGH_NIBBLE = 0xD0 + private const val HIGH_NIBBLE_MASK = 0xF0 + private const val LOW_NIBBLE_MASK = 0xF + private const val NULL_LOW_NIBBLE = 0xF + private const val VAR_LENGTH_LOW_NIBBLE = 0xE + + @JvmStatic + fun isNonNullPositiveInt(typeId: Int): Boolean = typeId.and(HIGH_NIBBLE_MASK) == POSITIVE_INT_HIGH_NIBBLE && typeId.and(LOW_NIBBLE_MASK) != NULL_LOW_NIBBLE + + @JvmStatic + fun isNonNullString(typeId: Int): Boolean = typeId.and(HIGH_NIBBLE_MASK) == STRING_HIGH_NIBBLE && typeId.and(LOW_NIBBLE_MASK) != NULL_LOW_NIBBLE + + @JvmStatic + fun isNonNullStruct(typeId: Int): Boolean = typeId.and(HIGH_NIBBLE_MASK) == STRUCT_HIGH_NIBBLE && typeId.and(LOW_NIBBLE_MASK) != NULL_LOW_NIBBLE + + @JvmStatic + fun isNull(typeId: Int): Boolean = typeId.and(LOW_NIBBLE_MASK) == NULL_LOW_NIBBLE + + @JvmStatic + fun isVariableLength(typeId: Int): Boolean = typeId.and(LOW_NIBBLE_MASK) == VAR_LENGTH_LOW_NIBBLE + /** * Returns the IonType for a legal Ion 1.0 typeId. * diff --git a/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt b/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt index d3b48329e..cce6bf574 100644 --- a/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt +++ b/src/main/java/com/amazon/ion/bytecode/bin10/ValueHelpers.kt @@ -24,6 +24,19 @@ internal fun signForIntTypeId(typeId: Int): Int = (((typeId shr 4) shl 31) shr 3 */ internal fun getSignumValueFromLeadingSignBit(byte: Byte): Int = byte.toInt().shr(7).shl(1) + 1 +/** + * Reads an unsigned integer from a [ByteArray]. Does not perform array bounds checking. + */ +internal fun readUInt(source: ByteArray, startIndex: Int, length: Int): Long { + var result: Long = 0 + // TODO(perf): See if it's better to branch on the length and unroll the loops vs having a variable-length loop. + val endIndex = startIndex + length + for (i in startIndex until endIndex) { + result = (result shl 8) or (source[i].toInt() and 0xFF).toLong() + } + return result +} + /** * Reads a timestamp value from the given byte array. */ diff --git a/src/test/java/com/amazon/ion/bytecode/bin10/SymbolTableHelperTest.kt b/src/test/java/com/amazon/ion/bytecode/bin10/SymbolTableHelperTest.kt new file mode 100644 index 000000000..ff08a305d --- /dev/null +++ b/src/test/java/com/amazon/ion/bytecode/bin10/SymbolTableHelperTest.kt @@ -0,0 +1,584 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.bytecode.bin10 + +import com.amazon.ion.IonException +import com.amazon.ion.bytecode.GeneratorTestUtil.assertEqualBytecode +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_ADD_SYMBOLS +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_SET_SYMBOLS +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_USE +import com.amazon.ion.bytecode.ir.Instructions.I_END_CONTAINER +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I32 +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_NULL +import com.amazon.ion.bytecode.ir.Instructions.I_STRING_CP +import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_CP +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData +import com.amazon.ion.bytecode.util.BytecodeBuffer +import com.amazon.ion.bytecode.util.ConstantPool +import com.amazon.ion.system.IonBinaryWriterBuilder +import com.amazon.ion.system.IonSystemBuilder +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import java.io.ByteArrayOutputStream +import com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE as ion_symbol_table + +/** + * TODO: Tests for edge cases such as + * - Repeated field name with NOP; i.e.: { symbols: [], symbols: } should be okay + * - Repeated field names with nonsense values should always throw an exception, even though the value will be discarded. + */ +class SymbolTableHelperTest { + + @Test + fun `symbol table with one symbol`() = expectBytecodeForLst( + lstText = """ { symbols: [ "a" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a") + ) + + @Test + fun `symbol table with multiple symbols`() = expectBytecodeForLst( + lstText = """ { symbols: [ "a", "b", "c", "d" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_SYMBOL_CP.packInstructionData(1), + I_SYMBOL_CP.packInstructionData(2), + I_SYMBOL_CP.packInstructionData(3), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a", "b", "c", "d"), + ) + + @Test + fun `import field that is not a list or '$ion_symbol_table' should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: name } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `symbol table with LST append and no symbols`() = expectBytecodeForLst( + lstText = """ { imports: $ion_symbol_table } """, + // It would also work to have I_ADD_MACROS, I_END_CONTAINER + expectedBytecode = intArrayOf(), + ) + + @Test + fun `symbol table with LST append and one symbol`() = expectBytecodeForLst( + lstText = """ { imports: $ion_symbol_table, symbols: [ "a" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a") + ) + + @Test + fun `symbol table with one symbol followed by LST append`() = expectBytecodeForLst( + lstText = """ { symbols: [ "a" ], imports: $ion_symbol_table } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a") + ) + + @Test + fun `symbol table with LST append and multiple symbols`() = expectBytecodeForLst( + lstText = """ { imports: $ion_symbol_table, symbols: [ "a", "b", "c", "d" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_SYMBOL_CP.packInstructionData(1), + I_SYMBOL_CP.packInstructionData(2), + I_SYMBOL_CP.packInstructionData(3), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a", "b", "c", "d"), + ) + + @Test + fun `symbol table with imports and no symbols`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo",version:1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo"), + ) + + @Test + fun `symbol table with import followed by one symbol`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo",version:1} ], symbols: [ "a" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(1), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo", "a") + ) + + @Test + fun `symbol table with one symbol followed by import`() = expectBytecodeForLst( + lstText = """ { symbols: [ "a" ], imports: [ {name:"foo",version:1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(1), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a", "foo") + ) + + @Test + fun `symbol table with imports and multiple symbols`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo",version:1} ], symbols: [ "a", "b", "c", "d" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(1), + I_SYMBOL_CP.packInstructionData(2), + I_SYMBOL_CP.packInstructionData(3), + I_SYMBOL_CP.packInstructionData(4), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo", "a", "b", "c", "d"), + ) + + @Test + fun `symbol table with multiple imports and multiple symbols`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo",version:1}, {name:"bar",version:2} ], symbols: [ "a", "b", "c", "d" ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + // First import + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + // Second import + I_STRING_CP.packInstructionData(1), + I_INT_I32, 2, + I_NULL_NULL, + I_END_CONTAINER, + // Symbols + I_DIRECTIVE_ADD_SYMBOLS, + I_SYMBOL_CP.packInstructionData(2), + I_SYMBOL_CP.packInstructionData(3), + I_SYMBOL_CP.packInstructionData(4), + I_SYMBOL_CP.packInstructionData(5), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo", "bar", "a", "b", "c", "d"), + ) + + @Test + fun `annotations on symbol table fields should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: name::[], symbols: version::[] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `null symbol table struct`() = expectBytecodeForLst( + lstText = """ null.struct """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ) + ) + + @Test + fun `empty symbol table struct`() = expectBytecodeForLst( + lstText = """ { } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ) + ) + + @Test + fun `unspecified fields in symbol table should be ignored`() = expectBytecodeForLst( + lstText = """ { name: 1 } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ) + ) + + @Test + fun `symbol table with empty symbols list`() = expectBytecodeForLst( + lstText = """ { symbols: [] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ) + ) + + @Test + fun `symbol table with symbols field that is not a list`() = expectBytecodeForLst( + lstText = """ { symbols: 0 } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ) + ) + + @Test + fun `symbol table with symbols field that is not a list and one that is a list`() = assertFails( + lstText = """ { symbols: 0, symbols: [] } """, + reason = "Multiple symbols fields" + ) + + @Test + fun `symbol table with two symbols lists`() = assertFails( + lstText = """ { symbols: [], symbols: [] } """, + reason = "Multiple symbols fields" + ) + + @Test + fun `symbol table with two imports lists`() = assertFails( + lstText = """ { imports: [], imports: [] } """, + reason = "Multiple imports fields" + ) + + @Test + fun `symbol table with imports list and LST append`() = assertFails( + lstText = """ { imports: [], imports: $ion_symbol_table } """, + reason = "Multiple imports fields" + ) + + @Test + fun `symbol table with LST append and import list`() = assertFails( + lstText = """ { imports: $ion_symbol_table, imports: [] } """, + reason = "Multiple imports fields" + ) + + @Test + fun `symbol table with two LST append`() = assertFails( + lstText = """ { imports: $ion_symbol_table, imports: $ion_symbol_table } """, + reason = "Multiple imports fields" + ) + + // Symbol specific tests + + @Test + fun `annotations on symbols should be ignored`() = expectBytecodeForLst( + lstText = """ { symbols: [version::"a"] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a") + ) + + @Test + fun `non-string values in symbol list should result in a symbol with unknown text`() = expectBytecodeForLst( + lstText = """ { symbols: [1] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf(null) + ) + + @Test + fun `null values in symbol list should result in a symbol with unknown text`() = expectBytecodeForLst( + lstText = """ { symbols: [null, null.string] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_SYMBOL_CP.packInstructionData(0), + I_SYMBOL_CP.packInstructionData(1), + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf(null, null) + ) + + // Import-specific tests + + @Test + fun `imports that are null or not structs should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ 1, 2e0, null, null.struct ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `import with null name should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:null, version:1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `import with no name should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ {version:2} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `import with name '$ion' should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:$1, version:1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + ), + ) + + @Test + fun `import with no version defaults to 1`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo"} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `import with non-default version`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo", version: 2} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 2, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `import with max_id specified`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo", max_id: 10} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_INT_I32, 10, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `import with max_id unspecified`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo", version: 1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `max_id that is not a non-negative integer should be interpreted as null`() = expectBytecodeForLst( + lstText = """ + { + imports: [ + {name:"foo", version:2, max_id:-10 }, + {name:"bar", version:3, max_id:"a" }, + {name:"baz", version:4, max_id:2.0 } + ] + } + """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 2, + I_NULL_NULL, + I_STRING_CP.packInstructionData(1), + I_INT_I32, 3, + I_NULL_NULL, + I_STRING_CP.packInstructionData(2), + I_INT_I32, 4, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo", "bar", "baz") + ) + + @Test + fun `version that is not a positive integer should be interpreted as 1`() = expectBytecodeForLst( + lstText = """ + { + imports: [ + {name:"a", version:0 }, + {name:"b", version:"a" }, + {name:"c", version:-10 }, + {name:"d", version:2.0 } , + ] + } + """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_NULL_NULL, + I_STRING_CP.packInstructionData(1), + I_INT_I32, 1, + I_NULL_NULL, + I_STRING_CP.packInstructionData(2), + I_INT_I32, 1, + I_NULL_NULL, + I_STRING_CP.packInstructionData(3), + I_INT_I32, 1, + I_NULL_NULL, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("a", "b", "c", "d") + ) + + @Test + fun `unspecified fields in import should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ {name:"foo", symbols: 99, version: 1, imports: 100, max_id: 1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_INT_I32, 1, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `annotations in import should be ignored`() = expectBytecodeForLst( + lstText = """ { imports: [ {name: symbols::"foo", version: imports::1, max_id: $ion_symbol_table::1} ] } """, + expectedBytecode = intArrayOf( + I_DIRECTIVE_SET_SYMBOLS, + I_END_CONTAINER, + I_DIRECTIVE_USE, + I_STRING_CP.packInstructionData(0), + I_INT_I32, 1, + I_INT_I32, 1, + I_END_CONTAINER, + ), + expectedConstantPool = arrayOf("foo") + ) + + @Test + fun `import with two name fields`() = assertFails( + lstText = """ { imports: [ {name:"foo", name:"foo", max_id: 99, version: 1} ]} """, + reason = "Multiple name fields" + ) + + @Test + fun `import with two version fields`() = assertFails( + lstText = """ { imports: [ {name:"foo", max_id: 99, version: 1, version: 1} ]} """, + reason = "Multiple version fields" + ) + + @Test + fun `import with two max_id`() = assertFails( + lstText = """ { imports: [ {name:"foo", max_id: 99, max_id: 99, version: 1} ]} """, + reason = "Multiple max_id fields" + ) + + /** + * [lstText] is the Ion text of the symbol table struct, _without_ any annotations. Don't use any user symbols + * in the text, or the stream positioning logic in this method will get all messed up. + */ + private fun assertFails(lstText: String, reason: String) { + val e = assertThrows { expectBytecodeForLst(lstText, intArrayOf(/* It should fail before we make this comparison */)) } + assertTrue(reason in e.message!!, "Exception message \"${e.message}\" does not contain the expected substring \"$reason\"") + } + + /** + * [lstText] is the Ion text of the symbol table struct, _without_ any annotations. Don't use any user symbols + * in the text, or the stream positioning logic in this method will get all messed up. + */ + private fun expectBytecodeForLst(lstText: String, expectedBytecode: IntArray, expectedConstantPool: Array = emptyArray()) { + val source = IonSystemBuilder.standard().build().singleValue(lstText).let { + val baos = ByteArrayOutputStream() + val writer = IonBinaryWriterBuilder.standard().build(baos) + it.writeTo(writer) + writer.close() + baos.toByteArray() + } + + var position = 4 // After the IVM + val structTid = source[position++].toInt().and(0xFF) + val length = when (structTid) { + 0xDE -> { + val lengthAndSizeOfLength = VarIntHelper.readVarUIntValueAndLength(source, position) + position += lengthAndSizeOfLength.and(0xFF).toInt() + lengthAndSizeOfLength.shr(8).toInt() + } + 0xDF -> 0 + else -> structTid.and(0xF) + } + + val bytecode = BytecodeBuffer() + val cp = ConstantPool() + SymbolTableHelper.compileSymbolTable(source, position, length, bytecode, cp) + assertEqualBytecode(expectedBytecode, bytecode.toArray()) + assertArrayEquals(expectedConstantPool, cp.toArray()) + } +} diff --git a/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt b/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt index f212465a5..c7f8c8535 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin10/TypeIdHelperTest.kt @@ -10,6 +10,115 @@ import org.junit.jupiter.params.provider.CsvSource class TypeIdHelperTest { + @ParameterizedTest + @CsvSource( + "0x10, false", + "0x21, false", + "0x32, false", + "0x43, false", + "0x54, false", + "0x65, false", + "0x8F, true", + "0x9F, true", + "0xAF, true", + "0xBF, true", + ) + fun testIsNull(tid: Int, expected: Boolean) { + assertEquals(expected, TypeIdHelper.isNull(tid)) + } + + @ParameterizedTest + @CsvSource( + "0x10, false", + "0x20, true", + "0x21, true", + "0x22, true", + "0x24, true", + "0x28, true", + "0x2E, true", + "0x2F, false", + "0x30, false", + "0x32, false", + "0x34, false", + "0x43, false", + "0x54, false", + "0x65, false", + "0x8F, false", + "0x9F, false", + "0xAF, false", + "0xBF, false", + ) + fun testIsNonNullPositiveInt(tid: Int, expected: Boolean) { + assertEquals(expected, TypeIdHelper.isNonNullPositiveInt(tid)) + } + + @ParameterizedTest + @CsvSource( + "0x10, false", + "0x21, false", + "0x32, false", + "0x43, false", + "0x54, false", + "0x65, false", + "0x80, true", + "0x82, true", + "0x84, true", + "0x88, true", + "0x8E, true", + "0x8F, false", + "0x9F, false", + "0xAF, false", + "0xBF, false", + ) + fun testIsNonNullString(tid: Int, expected: Boolean) { + assertEquals(expected, TypeIdHelper.isNonNullString(tid)) + } + + @ParameterizedTest + @CsvSource( + "0x10, false", + "0x21, false", + "0x32, false", + "0x43, false", + "0x54, false", + "0x65, false", + "0x8F, false", + "0x9F, false", + "0xAF, false", + "0xBF, false", + "0xD0, true", + "0xD2, true", + "0xD3, true", + "0xDE, true", + "0xDF, false", + ) + fun testIsNonNullStruct(tid: Int, expected: Boolean) { + assertEquals(expected, TypeIdHelper.isNonNullStruct(tid)) + } + + @ParameterizedTest + @CsvSource( + "0x0E, true", + "0x10, false", + "0x21, false", + "0x2E, true", + "0x32, false", + "0x3E, true", + "0x43, false", + "0x4E, true", + "0x54, false", + "0x5E, true", + "0x65, false", + "0xAF, false", + "0xD0, false", + "0xDE, true", + "0xDF, false", + "0xEE, true", + ) + fun testIsVariableLength(tid: Int, expected: Boolean) { + assertEquals(expected, TypeIdHelper.isVariableLength(tid)) + } + @ParameterizedTest @CsvSource( // Typed values diff --git a/src/test/java/com/amazon/ion/bytecode/bin10/ValueHelpersTest.kt b/src/test/java/com/amazon/ion/bytecode/bin10/ValueHelpersTest.kt index 449714beb..88ee7be8f 100644 --- a/src/test/java/com/amazon/ion/bytecode/bin10/ValueHelpersTest.kt +++ b/src/test/java/com/amazon/ion/bytecode/bin10/ValueHelpersTest.kt @@ -14,6 +14,25 @@ import org.junit.jupiter.params.provider.CsvSource class ValueHelpersTest { + @ParameterizedTest + @CsvSource( + "0, 0, 0", + "0, 1, 0x01", + "1, 1, 0x02", + "1, 2, 0x0203", + "1, 4, 0x02030405", + "2, 3, 0x030405", + "2, 8, 0x030405060708090A", + "4, 4, 0x05060708", + "4, 7, 0x05060708090A0B", + "8, 2, 0x090A", + ) + fun testReadUInt(startIndex: Int, length: Int, expected: Long) { + val bytes = byteArrayOf(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB) + val result = readUInt(bytes, startIndex, length) + assertEquals(expected, result) + } + @ParameterizedTest @CsvSource( "0x20, 1",