Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
376 changes: 376 additions & 0 deletions src/main/java/com/amazon/ion/bytecode/bin10/SymbolTableHelper.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,376 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.bytecode.bin10

import com.amazon.ion.IonException
import com.amazon.ion.SystemSymbols
import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_ADD_SYMBOLS
import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_SET_SYMBOLS
import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_USE
import com.amazon.ion.bytecode.ir.Instructions.I_END_CONTAINER
import com.amazon.ion.bytecode.ir.Instructions.I_INT_I32
import com.amazon.ion.bytecode.ir.Instructions.I_NULL_NULL
import com.amazon.ion.bytecode.ir.Instructions.I_STRING_CP
import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_CP
import com.amazon.ion.bytecode.ir.Instructions.packInstructionData
import com.amazon.ion.bytecode.ir.OperationKind
import com.amazon.ion.bytecode.util.AppendableConstantPoolView
import com.amazon.ion.bytecode.util.BytecodeBuffer
import com.amazon.ion.bytecode.util.unsignedToInt
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings

/**
* Helper to generate Bytecode instructions for Ion 1.0 style symbol tables.
*
* We cannot meaningfully read a partial symbol table, so it seems a reasonable requirement that the entire symbol
* table must be buffered before we generate bytecode for it. Therefore, this is re-usable for both continuable and
* non-continuable bytecode generators.
*
* The Bytecode uses Ion 1.1 style directives, which don't quite align with Ion 1.0 directives.
* So, we have two options:
* 1. We can output the bytecode that is functionally equivalent to "classic" symbol tables
* 2. We can add a "classic" symbol table directive to the bytecode
*
* For option 1, the generated bytecode is as follows:
* - if there's no LST append, and no imports, then we can generate a SET_SYMBOLS instruction
* - if there's LST append, and no imports, then we can generate an ADD_SYMBOLS instruction
* - if there are imports, there cannot also be LST append.
* - if there are imports, and it's not LST append, we can generate an empty SET_SYMBOLS, followed by a USE for all the
* imports, followed by ADD_SYMBOLS with the local symbols.
* - if there's LST append with no local symbols added, we could elide it completely, but for simplicity of the
* implementation we can emit an empty ADD_SYMBOLS
* - if there are imports and no local symbols, we can generate an empty SET_SYMBOLS, followed by USE for all the imports.
* - if there are no imports, no LST append, and no local symbols, we can generate an empty SET_SYMBOLS.
*
* This means we need to buffer some data before emitting the bytecode, but that's okay.
* We can put the strings into the constant pool, and keep track of the min and max, since there will be nothing else
* that we would put in the constant pool while we're processing a symbol table.
*
* It's actually beneficial to put the symbol text strings in the constant pool now. They can be added to the symbol
* table from the constant pool comparatively cheaply, and we can decode the strings eagerly without having as much
* overhead from the control flow of calling `readTextReference()` over and over.
*
* Logic is roughly this:
*
* ```pseudocode
* let symbolsStartInclusive = 0
* let symbolsEndExclusive = 0
* let isAppend = false
* while hasMoreFields():
* let fieldName = readFieldName()
* switch(fieldName):
* "imports":
* let valueType = readValueType()
* switch(valueType):
* symbol:
* readAndValidate "$ion_symbol_table"
* isAppend = true
* list:
* bytecode.add2(SET_SYMBOLS, END_CONTAINER)
* bytecode.add(USE)
* while hasMoreListElements():
* compileImport()
* bytecode.add(END_CONTAINER)
* isAppend = true
* "symbols":
* symbolStartInclusive = sizeOf(constantPool)
* for value in list:
* if value is symbol:
* constantPool.add(readText())
* else:
* constantPool.add(null)
* symbolsEndExclusive = sizeOf(constantPool)
*
* if (isAppend):
* bytecode.add(ADD_SYMBOLS)
* else:
* bytecode.add(SET_SYMBOLS)
* for i in symbolsStartInclusive .. symbolsEndExclusive:
* bytecode.add(SYMBOL_CP(i))
* bytecode.add(END_CONTAINER)
* ```
*/
internal object SymbolTableHelper {

private const val ONE_BYTE_MASK = 0xFF
private const val ONE_BYTE_SHIFT = 8

/**
* Compiles an Ion 1.0 symbol table to bytecode instructions. See class documentation for details.
*/
@JvmStatic
@SuppressFBWarnings("SF_SWITCH_NO_DEFAULT")
fun compileSymbolTable(source: ByteArray, position: Int, structLength: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) {
var symbolsCpIndexStartInclusive = 0
var symbolsCpIndexEndExclusive = 0

var hasSeenImports = false
var isAppendRequired = false
var hasSeenSymbols = false

iterateStruct(source, position, structLength) { fieldSid, fieldTid, pos, length ->
val operationKind = TypeIdHelper.operationKindForTypeId(fieldTid)
when (fieldSid) {
SystemSymbols.IMPORTS_SID -> {
if (hasSeenImports) throw IonException("Multiple imports fields found within a single local symbol table.")
hasSeenImports = true
when (operationKind) {
OperationKind.SYMBOL -> {
val sid = readUInt(source, pos, length).toInt()
if (sid == SystemSymbols.ION_SYMBOL_TABLE_SID) isAppendRequired = true
}
OperationKind.LIST -> {
readImportsList(source, pos, length, dest, cp)
isAppendRequired = true
}
}
}
SystemSymbols.SYMBOLS_SID -> {
if (hasSeenSymbols) throw IonException("Multiple symbols fields found within a single local symbol table.")
hasSeenSymbols = true
when (operationKind) {
OperationKind.LIST -> {
symbolsCpIndexStartInclusive = cp.size
readSymbolsList(source, pos, length, cp)
symbolsCpIndexEndExclusive = cp.size
}
}
}
}
}

val directiveOperation = if (isAppendRequired) {
// The new local symbols are "appended" to the imports using ADD_SYMBOLS
if (symbolsCpIndexEndExclusive - symbolsCpIndexStartInclusive == 0) return
I_DIRECTIVE_ADD_SYMBOLS
} else {
I_DIRECTIVE_SET_SYMBOLS
}
dest.add(directiveOperation)
for (i in symbolsCpIndexStartInclusive until symbolsCpIndexEndExclusive) {
dest.add(I_SYMBOL_CP.packInstructionData(i))
}
dest.add(I_END_CONTAINER)
}

/**
* Reads a list of import structs. Emits bytecode if and only if there is a non-zero number of imports.
*/
@JvmStatic
private fun readImportsList(
source: ByteArray,
listStart: Int,
listLength: Int,
dest: BytecodeBuffer,
cp: AppendableConstantPoolView
) {
// Clear default module symbols and start adding the imports in a USE directive
dest.add3(I_DIRECTIVE_SET_SYMBOLS, I_END_CONTAINER, I_DIRECTIVE_USE)

val checkpoint = dest.size()

iterateList(source, listStart, listLength) { childTid: Int, childStart: Int, length: Int ->
if (TypeIdHelper.isNonNullStruct(childTid)) readImportStruct(source, childStart, length, dest, cp)
}

if (dest.size() == checkpoint) {
// Truncate to remove the USE directive
dest.truncate(checkpoint - 1)
} else {
// Close the USE directive
dest.add(I_END_CONTAINER)
}
}

/**
* Reads an import struct according to https://amazon-ion.github.io/ion-docs/docs/symbols.html#imports
*/
@JvmStatic
@SuppressFBWarnings("SF_SWITCH_NO_DEFAULT")
private fun readImportStruct(
source: ByteArray,
contentStart: Int,
contentLength: Int,
dest: BytecodeBuffer,
cp: AppendableConstantPoolView
) {

var catalogName: String? = null
var catalogVersion: Int = -1
var maxId: Int = -1

var hasSeenCatalogName = false
var hasSeenCatalogVersion = false
var hasSeenMaxId = false

iterateStruct(source, contentStart, contentLength) { fieldSid, fieldTid, pos, length ->
when (fieldSid) {
SystemSymbols.NAME_SID -> {
if (hasSeenCatalogName) throw IonException("Multiple name fields found within a single import.")
hasSeenCatalogName = true
if (TypeIdHelper.isNonNullString(fieldTid)) {
catalogName = String(source, pos, length, Charsets.UTF_8)
}
}
SystemSymbols.VERSION_SID -> {
if (hasSeenCatalogVersion) throw IonException("Multiple version fields found within a single import.")
hasSeenCatalogVersion = true
if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) {
catalogVersion = readUInt(source, pos, length).toInt()
}
}
SystemSymbols.MAX_ID_SID -> {
if (hasSeenMaxId) throw IonException("Multiple max_id fields found within a single import.")
hasSeenMaxId = true
if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) {
maxId = readUInt(source, pos, length).toInt()
}
}
}
}

// No name, empty name, or $ion, so we ignore the import clause
if (catalogName == null || catalogName == "\$ion" || catalogName == "") return
val cpIndex = cp.add(catalogName)
dest.add(I_STRING_CP.packInstructionData(cpIndex))
if (catalogVersion < 1) catalogVersion = 1
dest.add2(I_INT_I32, catalogVersion)
if (maxId < 0) {
dest.add(I_NULL_NULL)
} else {
dest.add2(I_INT_I32, maxId)
}
}

/**
* Reads all symbols in the symbols lists, adding them to the constant pool. Any values that are not a non-null
* string result in a symbol with unknown text, so a `null` is added to the constant pool.
*/
@JvmStatic
private fun readSymbolsList(source: ByteArray, position: Int, listLength: Int, cp: AppendableConstantPoolView) {
iterateList(source, position, listLength) { typeId, p, length ->
if (TypeIdHelper.isNonNullString(typeId)) {
cp.add(String(source, p, length, Charsets.UTF_8))
} else {
cp.add(null)
}
}
}

// ==== General helpers for traversing through the symbol table struct ====

/**
* Iterates over all fields in a struct.
* For each non-null field, it calls [fieldHandler].
* Annotations are ignored in symbol table and import structs, so this handles skipping the annotations.
*/
@JvmStatic
@SuppressFBWarnings("SF_SWITCH_NO_DEFAULT")
private inline fun iterateStruct(
source: ByteArray,
start: Int,
length: Int,
fieldHandler: (fieldSid: Int, valueTid: Int, pos: Int, len: Int) -> Unit
) {
var p = start

val end = p + length

while (p < end) {
val fieldSidValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p)
val fieldSid = fieldSidValueAndLength.shr(ONE_BYTE_SHIFT).toInt()
p += fieldSidValueAndLength.toInt().and(ONE_BYTE_MASK)

var typeId = source[p++].unsignedToInt()

when (TypeIdHelper.operationKindForTypeId(typeId)) {
// This is a nop, so we skip this field
OperationKind.UNSET -> {
val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p)
p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK)
p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt()
continue
}
// We ignore annotations inside all symbol table structs and import structs
OperationKind.ANNOTATIONS -> {
p += skipAnnotations(typeId, source, p)
typeId = source[p++].unsignedToInt()
}
}

val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p)
p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK)
val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt()
fieldHandler(fieldSid, typeId, p, l)
p += l
}
}

/**
* Iterates over all values in a list.
* For each non-null value, it calls [valueHandler].
* Annotations are ignored in symbols and imports lists, so this handles skipping the annotations.
*/
@JvmStatic
private inline fun iterateList(
source: ByteArray,
position: Int,
length: Int,
valueHandler: (typeId: Int, position: Int, length: Int) -> Unit
) {
var p = position
val end = position + length
while (p < end) {
val typeId = source[p++].unsignedToInt()

when (TypeIdHelper.operationKindForTypeId(typeId)) {
// This is a nop, so we skip this field
OperationKind.UNSET -> {
val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p)
p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK)
p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt()
continue
}
// We ignore annotations on anything inside a local symbol table.
OperationKind.ANNOTATIONS -> {
p += skipAnnotations(typeId, source, p)
continue
}
}

val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p)
p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK)
val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt()
valueHandler(typeId, p, l)
p += l
}
}

/** returns the number of bytes needed to skip the annotations and go to the annotated value. */
@JvmStatic
private fun skipAnnotations(typeId: Int, source: ByteArray, position: Int): Int {
var p = position
// Skip the annotations and do nothing with them, but don't skip the annotated value.
if (TypeIdHelper.isVariableLength(typeId)) {
p += VarIntHelper.readVarUIntValueAndLength(source, p).toInt().and(ONE_BYTE_MASK)
}
val innerAnnotationLength = VarIntHelper.readVarUIntValueAndLength(source, p)
p += innerAnnotationLength.toInt().and(ONE_BYTE_MASK) + innerAnnotationLength.shr(ONE_BYTE_SHIFT).toInt()
return p - position
}

/**
* Gets the length for the given TypeId, reading a VarUInt length if needed.
* Returns -1 if there is not enough data available to read the full VarUInt length.
*
* @throws IonException if the typeId is not a legal typeId in Ion 1.0
*/
@JvmStatic
private fun getLengthForTypeId(typeId: Int, source: ByteArray, position: Int): Long {
return when (val l = TypeIdHelper.TYPE_LENGTHS[typeId]) {
-1 -> VarIntHelper.readVarUIntValueAndLength(source, position)
-2 -> throw IonException("Invalid Type ID: $typeId")
else -> l.toLong().shl(ONE_BYTE_SHIFT)
}
}
}
Loading
Loading