Skip to content

Commit b33e1e4

Browse files
authored
Implements Ion 1.1 managed writer (#1141)
1 parent 7b407eb commit b33e1e4

7 files changed

Lines changed: 2013 additions & 1 deletion

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package com.amazon.ion.impl
4+
5+
import com.amazon.ion.util._Private_FastAppendable
6+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
7+
import java.io.Closeable
8+
import java.io.Flushable
9+
10+
/**
11+
* A [_Private_FastAppendable] that buffers data to a [StringBuilder]. Only when
12+
* [flush] is called is the data written to the wrapped [Appendable].
13+
*
14+
* This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are
15+
* not known in advance. The [AppendableFastAppendable] class has no buffering, so system and user values would be
16+
* emitted in the wrong order.
17+
*
18+
* Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical
19+
* difference from [AppendableFastAppendable] for the case where no system values are needed.
20+
*
21+
* TODO:
22+
* - Add proper tests
23+
*
24+
* @see BufferedOutputStreamFastAppendable
25+
* @see AppendableFastAppendable
26+
*/
27+
internal class BufferedAppendableFastAppendable(
28+
@SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.")
29+
private val wrapped: Appendable,
30+
@SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.")
31+
private val buffer: StringBuilder,
32+
) : _Private_FastAppendable, Flushable, Closeable, Appendable by buffer {
33+
34+
companion object {
35+
@JvmStatic operator fun invoke(wrapped: Appendable): BufferedAppendableFastAppendable {
36+
return BufferedAppendableFastAppendable(wrapped, StringBuilder())
37+
}
38+
}
39+
40+
override fun appendAscii(c: Char) { append(c) }
41+
override fun appendAscii(csq: CharSequence?) { append(csq) }
42+
override fun appendAscii(csq: CharSequence?, start: Int, end: Int) { append(csq, start, end) }
43+
override fun appendUtf16(c: Char) { append(c) }
44+
45+
override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) {
46+
append(leadSurrogate)
47+
append(trailSurrogate)
48+
}
49+
50+
override fun close() {
51+
flush()
52+
if (wrapped is Closeable) wrapped.close()
53+
}
54+
55+
override fun flush() {
56+
wrapped.append(buffer)
57+
if (wrapped is Flushable) wrapped.flush()
58+
buffer.setLength(0)
59+
}
60+
}
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package com.amazon.ion.impl
4+
5+
import com.amazon.ion.IonWriter
6+
import com.amazon.ion.SymbolTable
7+
import com.amazon.ion.bytecode.bin11.OpCode
8+
import com.amazon.ion.ion_1_1.IonRawWriter_1_1
9+
import com.amazon.ion.ion_1_1.MacroImpl
10+
import java.util.ArrayList
11+
import java.util.HashMap
12+
import java.util.LinkedHashMap
13+
14+
/**
15+
* TODO: Testing that is distinct from [IonManagedWriter_1_1] tests.
16+
* TODO(perf): See if there is a meaningful effect on performance if we move all of this into [IonManagedWriter_1_1].
17+
*/
18+
internal class IonManagedWriterEncodingContext_1_1 {
19+
20+
companion object {
21+
private const val NUMBER_OF_SYSTEM_SIDS = 9
22+
23+
private val SYSTEM_SYMBOLS = mapOf(
24+
"\$ion" to 1,
25+
"\$ion_1_0" to 2,
26+
"\$ion_symbol_table" to 3,
27+
"name" to 4,
28+
"version" to 5,
29+
"imports" to 6,
30+
"symbols" to 7,
31+
"max_id" to 8,
32+
"\$ion_shared_symbol_table" to 9,
33+
)
34+
}
35+
36+
// We take a slightly different approach here by handling the encoding context as a prior encoding context
37+
// plus a list of symbols added by the current encoding context.
38+
/** The symbol table for the prior encoding context */
39+
private var symbolTable: HashMap<String, Int> = HashMap<String, Int>().also { it.putAll(SYSTEM_SYMBOLS) }
40+
41+
/** Symbols to be interned since the prior encoding context. */
42+
private var newSymbols: HashMap<String, Int> = LinkedHashMap() // Preserves insertion order.
43+
44+
/** The macro table of the prior encoding context. Map value is the user-space address. */
45+
private var macroTable: HashMap<MacroImpl, Int> = LinkedHashMap()
46+
/** Macros to be added since the last encoding directive was flushed. Map value is the user-space address. */
47+
private var newMacros: HashMap<MacroImpl, Int> = LinkedHashMap()
48+
/** Macro names by user-space address, including new macros. */
49+
private var macroNames = ArrayList<String?>()
50+
/** Macro definitions by user-space address, including new macros. */
51+
private var macrosById = ArrayList<MacroImpl>()
52+
53+
/**
54+
* Adds a new symbol to the table for this writer, or finds an existing definition of it. This writer does not
55+
* implement [IonWriter.getSymbolTable], so this method supplies some of that functionality.
56+
*
57+
* @return an SID for the given symbol text
58+
* @see SymbolTable.intern
59+
*/
60+
fun intern(text: String): Int {
61+
// Check the current symbol table
62+
var sid = symbolTable[text]
63+
if (sid != null) return sid
64+
// Check the to-be-appended symbols
65+
sid = newSymbols[text]
66+
if (sid != null) return sid
67+
// Add to the to-be-appended symbols
68+
sid = symbolTable.size + newSymbols.size + 1
69+
newSymbols[text] = sid
70+
return sid
71+
}
72+
73+
/**
74+
* Adds a named macro to the macro table
75+
*
76+
* Steps:
77+
* - If the name is not already in use...
78+
* - And the macro is already in `newMacros`...
79+
* 1. Get the address of the macro in `newMacros`
80+
* 2. Add the name to `macroNames` for the that address
81+
* 3. return the address
82+
* - Else...
83+
* 1. Add a new entry for the macro to `newMacros` and get a new address
84+
* 2. Add the name to `macroNames` for the new address
85+
* 3. Return the new address
86+
* - If the name is already in use...
87+
* - And it is associated with the same macro...
88+
* 1. Return the address associated with the name
89+
* - And it is associated with a different macro...
90+
* - This is where the managed writer take an opinion. (Or be configurable.)
91+
* - It could mangle the name
92+
* - It could remove the name from a macro in macroTable, but then it would have to immediately flush to
93+
* make sure that any prior e-expressions are still valid. In addition, we would need to re-export all
94+
* the other macros from `_` (the default module).
95+
* - For now, we're just throwing an Exception.
96+
*/
97+
private fun getOrAssignMacroAddressAndName(name: String, macro: MacroImpl): Int {
98+
// TODO: This is O(n), but could be O(1).
99+
var existingAddress = macroNames.indexOf(name)
100+
if (existingAddress < 0) {
101+
// Name is not already in use
102+
existingAddress = newMacros.getOrDefault(macro, -1)
103+
104+
val address = if (existingAddress < 0) {
105+
// Macro is not in newMacros
106+
// Add to newMacros and get a macro address
107+
assignMacroAddress(macro)
108+
} else {
109+
// Macro already exists in newMacros, but doesn't have a name
110+
existingAddress
111+
}
112+
// Set the name of the macro
113+
macroNames[address] = name
114+
return address
115+
} else if (macrosById[existingAddress] == macro) {
116+
// Macro already in table, and already using the same name
117+
return existingAddress
118+
} else {
119+
// Name is already in use for a different macro.
120+
// This macro may or may not be in the table under a different name, but that's
121+
// not particularly relevant unless we want to try to fall back to a different name.
122+
TODO("Name shadowing is not supported yet. Call finish() before attempting to shadow an existing macro.")
123+
}
124+
}
125+
126+
/**
127+
* Steps for adding an anonymous macro to the macro table
128+
* 1. Check macroTable, if found, return that address
129+
* 2. Check newMacros, if found, return that address
130+
* 3. Add to newMacros, return new address
131+
*/
132+
private fun getOrAssignMacroAddress(macro: MacroImpl): Int {
133+
var address = macroTable.getOrDefault(macro, -1)
134+
if (address >= 0) return address
135+
address = newMacros.getOrDefault(macro, -1)
136+
if (address >= 0) return address
137+
138+
return assignMacroAddress(macro)
139+
}
140+
141+
fun getOrAssignMacroAddress(macro: MacroImpl, name: String?): Int {
142+
return if (name == null)
143+
getOrAssignMacroAddress(macro)
144+
else
145+
getOrAssignMacroAddressAndName(name, macro)
146+
}
147+
148+
fun getMacroNameForId(id: Int): String? = macroNames[id]
149+
150+
/** Unconditionally adds a macro to the macro table data structures and returns the new address. */
151+
private fun assignMacroAddress(macro: MacroImpl): Int {
152+
val address = macrosById.size
153+
macrosById.add(macro)
154+
macroNames.add(null)
155+
newMacros[macro] = address
156+
return address
157+
}
158+
159+
fun reset() {
160+
symbolTable.clear()
161+
macroNames.clear()
162+
macrosById.clear()
163+
macroTable.clear()
164+
newMacros.clear()
165+
symbolTable.putAll(SYSTEM_SYMBOLS)
166+
}
167+
168+
/**
169+
* Writes an encoding directive for the current encoding context, and updates internal state accordingly.
170+
* This always appends to the current encoding context. If there is nothing to append, calling this function
171+
* is a no-op.
172+
*/
173+
fun writeEncodingDirective(systemData: IonRawWriter_1_1) {
174+
if (newSymbols.isEmpty() && newMacros.isEmpty()) return
175+
176+
writeSymbolTableDirective(systemData)
177+
symbolTable.putAll(newSymbols)
178+
newSymbols.clear()
179+
// NOTE: Once we have emitted the symbol table update with set/add_symbols those symbols become available
180+
// for use in set/add_macros (if relevant)
181+
182+
writeMacroTableDirective(systemData)
183+
macroTable.putAll(newMacros)
184+
newMacros.clear()
185+
}
186+
187+
/**
188+
* Updates the symbols in the encoding context by invoking
189+
* the `add_symbols` or `set_symbols` system macro.
190+
* If the symbol table would be empty, writes nothing, which is equivalent
191+
* to an empty symbol table.
192+
*/
193+
private fun writeSymbolTableDirective(systemData: IonRawWriter_1_1) {
194+
val hasSymbolsToAdd = newSymbols.isNotEmpty()
195+
val hasSymbolsToRetain = symbolTable.size > NUMBER_OF_SYSTEM_SIDS
196+
if (!hasSymbolsToAdd) return
197+
val directive = if (!hasSymbolsToRetain) OpCode.DIRECTIVE_SET_SYMBOLS else OpCode.DIRECTIVE_ADD_SYMBOLS
198+
199+
// Add new symbols
200+
systemData.stepInDirective(directive)
201+
newSymbols.forEach { (text, _) -> systemData.writeString(text) }
202+
systemData.stepOut()
203+
}
204+
205+
private fun writeMacroTableDirective(systemData: IonRawWriter_1_1) {
206+
val hasMacrosToAdd = newMacros.isNotEmpty()
207+
val hasMacrosToRetain = macroTable.isNotEmpty()
208+
if (!hasMacrosToAdd) return
209+
val directive = if (!hasMacrosToRetain) OpCode.DIRECTIVE_SET_MACROS else OpCode.DIRECTIVE_ADD_MACROS
210+
211+
// Add new macros
212+
systemData.stepInDirective(directive)
213+
newMacros.forEach { (macro, id) ->
214+
val macroName = macroNames[id]
215+
systemData.stepInSExp(usingLengthPrefix = false)
216+
if (macroName == null) {
217+
systemData.writeNull()
218+
} else {
219+
systemData.writeSymbol(macroName)
220+
}
221+
macro.writeTo(systemData)
222+
systemData.stepOut()
223+
}
224+
systemData.stepOut()
225+
}
226+
}

0 commit comments

Comments
 (0)