Skip to content

Commit 7b407eb

Browse files
austnwilpopematt
andauthored
Add list, symbol/annotation SID, single-char symbol opcode handlers (#1140)
Implement handlers for the following: - Length-prefixed short list - opcodes `0xB0-0xBF` - Variable-length list - opcode `0xFA` - Delimited list - opcode `0xF0` - Tagless list (for primitive encodings) - opcode `0x5B` - Annotation SID - opcode `0x58` - Symbol SID - opcodes `0x50-0x57` - Single-char symbol - opcode `0xA1` --------- Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com>
1 parent 7de8dd3 commit 7b407eb

16 files changed

Lines changed: 1392 additions & 21 deletions

src/main/java/com/amazon/ion/bytecode/BytecodeEmitter.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,19 @@ internal object BytecodeEmitter {
8787
fun emitShortTimestampReference(destination: BytecodeBuffer, opcode: Int, dataPosition: Int) {
8888
destination.add2(Instructions.I_SHORT_TIMESTAMP_REF.packInstructionData(opcode), dataPosition)
8989
}
90+
91+
/**
92+
* Writes a list to the destination, automatically handling the calculation of the generated bytecode
93+
* length and reserving the space in the buffer for the list start instruction.
94+
*
95+
* @param contentWriter Callback function that should write children's bytecode to `destination`
96+
*/
97+
@JvmStatic
98+
inline fun emitList(destination: BytecodeBuffer, contentWriter: () -> Unit) {
99+
val containerStartIndex = destination.reserve()
100+
contentWriter()
101+
destination.add(Instructions.I_END_CONTAINER)
102+
val containerBytecodeSize = destination.size() - containerStartIndex - 1 // excludes the container start instruction
103+
destination[containerStartIndex] = Instructions.I_LIST_START.packInstructionData(containerBytecodeSize)
104+
}
90105
}

src/main/java/com/amazon/ion/bytecode/bin11/ByteArrayBytecodeGenerator11.kt

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,21 @@ internal class ByteArrayBytecodeGenerator11(
3838
while (currentPosition < source.size && !isSystemValue(opcode)) {
3939
opcode = source[currentPosition++].unsignedToInt()
4040
val handler = OpcodeHandlerTable.handler(opcode)
41-
currentPosition += handler.convertOpcodeToBytecode(
42-
opcode,
43-
source,
44-
currentPosition,
45-
destination,
46-
constantPool,
47-
macroSrc,
48-
macroIndices,
49-
symTab
50-
)
41+
try {
42+
currentPosition += handler.convertOpcodeToBytecode(
43+
opcode,
44+
source,
45+
currentPosition,
46+
destination,
47+
constantPool,
48+
macroSrc,
49+
macroIndices,
50+
symTab
51+
)
52+
} catch (e: StackOverflowError) {
53+
// TODO: implement recursion limit instead of catching StackOverflowError
54+
throw IonException("Ion data nested too deeply", e)
55+
}
5156
}
5257

5358
if (currentPosition >= source.size) {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package com.amazon.ion.bytecode.bin11.bytearray
4+
5+
import com.amazon.ion.bytecode.ir.Instructions
6+
import com.amazon.ion.bytecode.ir.Instructions.packInstructionData
7+
import com.amazon.ion.bytecode.util.AppendableConstantPoolView
8+
import com.amazon.ion.bytecode.util.BytecodeBuffer
9+
10+
/**
11+
* Writes an annotation with symbol address to the bytecode buffer. Handles opcode `0x58`.
12+
*/
13+
internal object AnnotationSIDOpcodeHandler : OpcodeToBytecodeHandler {
14+
@OptIn(ExperimentalStdlibApi::class)
15+
override fun convertOpcodeToBytecode(
16+
opcode: Int,
17+
source: ByteArray,
18+
position: Int,
19+
destination: BytecodeBuffer,
20+
constantPool: AppendableConstantPoolView,
21+
macroSrc: IntArray,
22+
macroIndices: IntArray,
23+
symbolTable: Array<String?>
24+
): Int {
25+
val sidValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position)
26+
val sid = sidValueAndLength.toInt()
27+
val length = sidValueAndLength.shr(Int.SIZE_BITS).toInt()
28+
destination.add(Instructions.I_ANNOTATION_SID.packInstructionData(sid))
29+
return length
30+
}
31+
}

src/main/java/com/amazon/ion/bytecode/bin11/bytearray/IntOpcodeHandlers.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt24As
88
import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt32
99
import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedInt8AsShort
1010
import com.amazon.ion.bytecode.bin11.bytearray.PrimitiveDecoder.readFixedIntAsLong
11+
import com.amazon.ion.bytecode.ir.Instructions
12+
import com.amazon.ion.bytecode.ir.Instructions.packInstructionData
1113
import com.amazon.ion.bytecode.util.AppendableConstantPoolView
1214
import com.amazon.ion.bytecode.util.BytecodeBuffer
1315

@@ -143,3 +145,31 @@ internal object LongIntOpcodeHandler : OpcodeToBytecodeHandler {
143145
return fixedIntLength
144146
}
145147
}
148+
149+
/** Writes a variable-length integer in a tagless context to the bytecode buffer. Handles tagless opcode `0x60`.
150+
* For simplicity this only ever emits `I_INT_I32`, `I_INT_I64`, and `I_INT_CP` bytecode, even if the integer could fit
151+
* in `I_INT_I16`.
152+
* */
153+
@OptIn(ExperimentalStdlibApi::class)
154+
internal val TAGLESS_FLEX_INT = OpcodeToBytecodeHandler { opcode, src, pos, dest, cp, _, _, _ ->
155+
val flexIntLength = PrimitiveDecoder.lengthOfFlexIntOrUIntAt(src, pos)
156+
when (flexIntLength) {
157+
// TODO(perf): See if there's any performance benefit to having a separate case for length=1|2 and using INT_I16 instruction
158+
1, 2, 3, 4 -> {
159+
val valueAndLength = PrimitiveDecoder.readFlexIntValueAndLength(src, pos)
160+
val value = valueAndLength.toInt()
161+
dest.add2(Instructions.I_INT_I32, value)
162+
}
163+
5, 6, 7, 8, 9 -> {
164+
val longValue = PrimitiveDecoder.readFlexIntAsLong(src, pos)
165+
BytecodeEmitter.emitInt64Value(dest, longValue)
166+
}
167+
else -> {
168+
val bigInt = PrimitiveDecoder.readFlexIntAsBigInteger(src, pos)
169+
val cpIndex = cp.size
170+
cp.add(bigInt)
171+
dest.add(Instructions.I_INT_CP.packInstructionData(cpIndex))
172+
}
173+
}
174+
flexIntLength
175+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package com.amazon.ion.bytecode.bin11.bytearray
4+
5+
import com.amazon.ion.bytecode.BytecodeEmitter
6+
import com.amazon.ion.bytecode.bin11.OpCode
7+
import com.amazon.ion.bytecode.util.AppendableConstantPoolView
8+
import com.amazon.ion.bytecode.util.BytecodeBuffer
9+
import com.amazon.ion.bytecode.util.unsignedToInt
10+
11+
// TODO: much of the logic here is shared between lists and sexps. It might be worthwhile to do something like
12+
// "SequenceOpcodeHandlers" and pass the start instruction (Instructions.I_LIST_START vs .I_SEXP_START) to a helper
13+
// BytecodeEmitter.emitSequence() or similar so this logic is not duplicated in a set of `*SexpOpcodeHandler`s.
14+
15+
/**
16+
* Writes a length prefixed list to the bytecode buffer. Handles opcode `0xB0`-`0xBF`.
17+
*/
18+
internal object ShortLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler {
19+
@OptIn(ExperimentalStdlibApi::class)
20+
override fun convertOpcodeToBytecode(
21+
opcode: Int,
22+
source: ByteArray,
23+
position: Int,
24+
destination: BytecodeBuffer,
25+
constantPool: AppendableConstantPoolView,
26+
macroSrc: IntArray,
27+
macroIndices: IntArray,
28+
symbolTable: Array<String?>
29+
): Int {
30+
val length = opcode and 0xF
31+
BytecodeEmitter.emitList(destination) {
32+
var p = position
33+
val end = p + length
34+
while (p < end) {
35+
val opcode = source[p++].unsignedToInt()
36+
p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode(
37+
opcode,
38+
source,
39+
p,
40+
destination,
41+
constantPool,
42+
macroSrc,
43+
macroIndices,
44+
symbolTable,
45+
)
46+
}
47+
}
48+
return length
49+
}
50+
}
51+
52+
/**
53+
* Writes a length prefixed list to the bytecode buffer. Handles opcode `0xFA`.
54+
*/
55+
internal object LongLengthPrefixedListOpcodeHandler : OpcodeToBytecodeHandler {
56+
@OptIn(ExperimentalStdlibApi::class)
57+
override fun convertOpcodeToBytecode(
58+
opcode: Int,
59+
source: ByteArray,
60+
position: Int,
61+
destination: BytecodeBuffer,
62+
constantPool: AppendableConstantPoolView,
63+
macroSrc: IntArray,
64+
macroIndices: IntArray,
65+
symbolTable: Array<String?>
66+
): Int {
67+
val containerSizeUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, position)
68+
val containerLength = containerSizeUIntValueAndLength.toInt()
69+
val prefixLength = containerSizeUIntValueAndLength.shr(Int.SIZE_BITS).toInt()
70+
BytecodeEmitter.emitList(destination) {
71+
var p = position + prefixLength
72+
val end = p + containerLength
73+
while (p < end) {
74+
val opcode = source[p++].unsignedToInt()
75+
p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode(
76+
opcode,
77+
source,
78+
p,
79+
destination,
80+
constantPool,
81+
macroSrc,
82+
macroIndices,
83+
symbolTable,
84+
)
85+
}
86+
}
87+
return containerLength + prefixLength
88+
}
89+
}
90+
91+
/**
92+
* Writes a delimited list to the bytecode buffer. Handles opcode `0xF0`.
93+
*/
94+
internal object DelimitedListOpcodeHandler : OpcodeToBytecodeHandler {
95+
@OptIn(ExperimentalStdlibApi::class)
96+
override fun convertOpcodeToBytecode(
97+
opcode: Int,
98+
source: ByteArray,
99+
position: Int,
100+
destination: BytecodeBuffer,
101+
constantPool: AppendableConstantPoolView,
102+
macroSrc: IntArray,
103+
macroIndices: IntArray,
104+
symbolTable: Array<String?>
105+
): Int {
106+
var p = position
107+
BytecodeEmitter.emitList(destination) {
108+
while (true) {
109+
val opcode = source[p++].unsignedToInt()
110+
if (opcode == OpCode.DELIMITED_CONTAINER_END) {
111+
break
112+
}
113+
p += OpcodeHandlerTable.handler(opcode).convertOpcodeToBytecode(
114+
opcode,
115+
source,
116+
p,
117+
destination,
118+
constantPool,
119+
macroSrc,
120+
macroIndices,
121+
symbolTable,
122+
)
123+
}
124+
}
125+
val bytesRead = p - position
126+
return bytesRead
127+
}
128+
}
129+
130+
/**
131+
* Writes a tagless-element list to the bytecode buffer. Handles opcode `0x5B`.
132+
*/
133+
internal object TaglessElementListOpcodeHandler : OpcodeToBytecodeHandler {
134+
@OptIn(ExperimentalStdlibApi::class)
135+
override fun convertOpcodeToBytecode(
136+
opcode: Int,
137+
source: ByteArray,
138+
position: Int,
139+
destination: BytecodeBuffer,
140+
constantPool: AppendableConstantPoolView,
141+
macroSrc: IntArray,
142+
macroIndices: IntArray,
143+
symbolTable: Array<String?>
144+
): Int {
145+
var p = position
146+
val childOpcode = source[p++].unsignedToInt()
147+
val macroAddress = when (childOpcode) {
148+
in 0x00..0x47 -> childOpcode
149+
in 0x48..0x4f -> {
150+
val flexUIntValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p)
151+
val addressLength = flexUIntValueAndLength.shr(Int.SIZE_BITS).toInt()
152+
p += addressLength
153+
val lsb = childOpcode - 0x48
154+
val msb = flexUIntValueAndLength.toInt() * 8
155+
msb + lsb + 72
156+
}
157+
0xf4 -> {
158+
val addressValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p)
159+
val addressValue = addressValueAndLength.toInt()
160+
val addressLength = addressValueAndLength.shr(Int.SIZE_BITS).toInt()
161+
p += addressLength
162+
addressValue
163+
}
164+
else -> -1
165+
}
166+
167+
val childCountValueAndLength = PrimitiveDecoder.readFlexUIntValueAndLength(source, p)
168+
val childCount = childCountValueAndLength.toInt()
169+
val prefixSize = childCountValueAndLength.shr(Int.SIZE_BITS).toInt()
170+
p += prefixSize
171+
172+
// If macroAddress > -1, then it is the address of the macro-shaped values,
173+
// and childOpcode should be ignored.
174+
// If macroAddress is -1, then childOpcode is the opcode of the values.
175+
if (macroAddress < 0) {
176+
val handler = TaglessOpcodeHandlerTable.handler(childOpcode)
177+
BytecodeEmitter.emitList(destination) {
178+
for (i in 0 until childCount) {
179+
p += handler.convertOpcodeToBytecode(
180+
childOpcode,
181+
source,
182+
p,
183+
destination,
184+
constantPool,
185+
macroSrc,
186+
macroIndices,
187+
symbolTable,
188+
)
189+
}
190+
}
191+
} else {
192+
TODO("Macro evaluation not yet implemented")
193+
}
194+
195+
return p - position
196+
}
197+
}

0 commit comments

Comments
 (0)