From b3a0639571a73b322456fb7b007d626717edd8e6 Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 5 Nov 2025 11:46:44 -0800 Subject: [PATCH 1/5] Implements Ion 1.1 managed writer --- .../impl/BufferedAppendableFastAppendable.kt | 54 ++ .../IonManagedWriterEncodingContext_1_1.kt | 221 +++++ .../ion/impl/IonManagedWriterOptions_1_1.kt | 150 +++ .../amazon/ion/impl/IonManagedWriter_1_1.kt | 504 ++++++++++ .../java/com/amazon/ion/ion_1_1/MacroImpl.kt | 2 +- .../ion/impl/IonManagedWriter_1_1_Test.kt | 911 ++++++++++++++++++ 6 files changed, 1841 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt create mode 100644 src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt create mode 100644 src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt create mode 100644 src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt create mode 100644 src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt diff --git a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt new file mode 100644 index 000000000..cd93481cf --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.util._Private_FastAppendable +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import java.io.Closeable +import java.io.Flushable + +/** + * A [_Private_FastAppendable] that buffers data to a [StringBuilder]. Only when + * [flush] is called is the data written to the wrapped [Appendable]. + * + * This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are + * not known in advance. The [AppendableFastAppendable] class has no buffering, so system and user values would be + * emitted in the wrong order. + * + * Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical + * difference from [AppendableFastAppendable] for the case where no system values are needed. + * + * TODO: + * - Add proper tests + * + * @see BufferedOutputStreamFastAppendable + * @see AppendableFastAppendable + */ +internal class BufferedAppendableFastAppendable( + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val wrapped: Appendable, + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val buffer: StringBuilder = StringBuilder() +) : _Private_FastAppendable, Flushable, Closeable, Appendable by buffer { + + override fun appendAscii(c: Char) { append(c) } + override fun appendAscii(csq: CharSequence?) { append(csq) } + override fun appendAscii(csq: CharSequence?, start: Int, end: Int) { append(csq, start, end) } + override fun appendUtf16(c: Char) { append(c) } + + override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) { + append(leadSurrogate) + append(trailSurrogate) + } + + override fun close() { + flush() + if (wrapped is Closeable) wrapped.close() + } + + override fun flush() { + wrapped.append(buffer) + if (wrapped is Flushable) wrapped.flush() + buffer.setLength(0) + } +} diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt new file mode 100644 index 000000000..c5808a851 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt @@ -0,0 +1,221 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.IonWriter +import com.amazon.ion.SymbolTable +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.ion_1_1.IonRawWriter_1_1 +import com.amazon.ion.ion_1_1.MacroImpl +import java.util.ArrayList +import java.util.HashMap +import java.util.LinkedHashMap + +/** + * TODO: Testing that is distinct from [IonManagedWriter_1_1] tests. + * TODO(perf): See if there is a meaningful effect on performance if we move all of this into [IonManagedWriter_1_1]. + */ +internal class IonManagedWriterEncodingContext_1_1 { + + companion object { + private const val NUMBER_OF_SYSTEM_SIDS = 9 + + private val SYSTEM_SYMBOLS = mapOf( + "\$ion" to 1, + "\$ion_1_0" to 2, + "\$ion_symbol_table" to 3, + "name" to 4, + "version" to 5, + "imports" to 6, + "symbols" to 7, + "max_id" to 8, + "\$ion_shared_symbol_table" to 9, + ) + } + + // We take a slightly different approach here by handling the encoding context as a prior encoding context + // plus a list of symbols added by the current encoding context. + /** The symbol table for the prior encoding context */ + private var symbolTable: HashMap = HashMap().also { it.putAll(SYSTEM_SYMBOLS) } + + /** Symbols to be interned since the prior encoding context. */ + private var newSymbols: HashMap = LinkedHashMap() // Preserves insertion order. + + /** The macro table of the prior encoding context. Map value is the user-space address. */ + private var macroTable: HashMap = LinkedHashMap() + /** Macros to be added since the last encoding directive was flushed. Map value is the user-space address. */ + private var newMacros: HashMap = LinkedHashMap() + /** Macro names by user-space address, including new macros. */ + private var macroNames = ArrayList() + /** Macro definitions by user-space address, including new macros. */ + private var macrosById = ArrayList() + + /** + * Adds a new symbol to the table for this writer, or finds an existing definition of it. This writer does not + * implement [IonWriter.getSymbolTable], so this method supplies some of that functionality. + * + * @return an SID for the given symbol text + * @see SymbolTable.intern + */ + fun intern(text: String): Int { + // Check the current symbol table + var sid = symbolTable[text] + if (sid != null) return sid + // Check the to-be-appended symbols + sid = newSymbols[text] + if (sid != null) return sid + // Add to the to-be-appended symbols + sid = symbolTable.size + newSymbols.size + 1 + newSymbols[text] = sid + return sid + } + + /** + * Adds a named macro to the macro table + * + * Steps: + * - If the name is not already in use... + * - And the macro is already in `newMacros`... + * 1. Get the address of the macro in `newMacros` + * 2. Add the name to `macroNames` for the that address + * 3. return the address + * - Else... + * 1. Add a new entry for the macro to `newMacros` and get a new address + * 2. Add the name to `macroNames` for the new address + * 3. Return the new address + * - If the name is already in use... + * - And it is associated with the same macro... + * 1. Return the address associated with the name + * - And it is associated with a different macro... + * - This is where the managed writer take an opinion. (Or be configurable.) + * - It could mangle the name + * - It could remove the name from a macro in macroTable, but then it would have to immediately flush to + * make sure that any prior e-expressions are still valid. In addition, we would need to re-export all + * the other macros from `_` (the default module). + * - For now, we're just throwing an Exception. + * + * Visible for testing. + */ + fun getOrAssignMacroAddressAndName(name: String, macro: MacroImpl): Int { + // TODO: This is O(n), but could be O(1). + var existingAddress = macroNames.indexOf(name) + if (existingAddress < 0) { + // Name is not already in use + existingAddress = newMacros.getOrDefault(macro, -1) + + val address = if (existingAddress < 0) { + // Macro is not in newMacros + // Add to newMacros and get a macro address + assignMacroAddress(macro) + } else { + // Macro already exists in newMacros, but doesn't have a name + existingAddress + } + // Set the name of the macro + macroNames[address] = name + return address + } else if (macrosById[existingAddress] == macro) { + // Macro already in table, and already using the same name + return existingAddress + } else { + // Name is already in use for a different macro. + // This macro may or may not be in the table under a different name, but that's + // not particularly relevant unless we want to try to fall back to a different name. + TODO("Name shadowing is not supported yet. Call finish() before attempting to shadow an existing macro.") + } + } + + /** + * Steps for adding an anonymous macro to the macro table + * 1. Check macroTable, if found, return that address + * 2. Check newMacros, if found, return that address + * 3. Add to newMacros, return new address + */ + fun getOrAssignMacroAddress(macro: MacroImpl): Int { + var address = macroTable.getOrDefault(macro, -1) + if (address >= 0) return address + address = newMacros.getOrDefault(macro, -1) + if (address >= 0) return address + + return assignMacroAddress(macro) + } + + fun getMacroNameForId(id: Int): String? = macroNames[id] + + /** Unconditionally adds a macro to the macro table data structures and returns the new address. */ + private fun assignMacroAddress(macro: MacroImpl): Int { + val address = macrosById.size + macrosById.add(macro) + macroNames.add(null) + newMacros[macro] = address + return address + } + + fun reset() { + symbolTable.clear() + macroNames.clear() + macrosById.clear() + macroTable.clear() + newMacros.clear() + symbolTable.putAll(SYSTEM_SYMBOLS) + } + + /** + * Writes an encoding directive for the current encoding context, and updates internal state accordingly. + * This always appends to the current encoding context. If there is nothing to append, calling this function + * is a no-op. + */ + fun writeEncodingDirective(systemData: IonRawWriter_1_1) { + if (newSymbols.isEmpty() && newMacros.isEmpty()) return + + writeSymbolTableDirective(systemData) + symbolTable.putAll(newSymbols) + newSymbols.clear() + // NOTE: Once we have emitted the symbol table update with set/add_symbols those symbols become available + // for use in set/add_macros (if relevant) + + writeMacroTableDirective(systemData) + macroTable.putAll(newMacros) + newMacros.clear() + } + + /** + * Updates the symbols in the encoding context by invoking + * the `add_symbols` or `set_symbols` system macro. + * If the symbol table would be empty, writes nothing, which is equivalent + * to an empty symbol table. + */ + private fun writeSymbolTableDirective(systemData: IonRawWriter_1_1) { + val hasSymbolsToAdd = newSymbols.isNotEmpty() + val hasSymbolsToRetain = symbolTable.size > NUMBER_OF_SYSTEM_SIDS + if (!hasSymbolsToAdd) return + val directive = if (!hasSymbolsToRetain) OpCode.DIRECTIVE_SET_SYMBOLS else OpCode.DIRECTIVE_ADD_SYMBOLS + + // Add new symbols + systemData.stepInDirective(directive) + newSymbols.forEach { (text, _) -> systemData.writeString(text) } + systemData.stepOut() + } + + private fun writeMacroTableDirective(systemData: IonRawWriter_1_1) { + val hasMacrosToAdd = newMacros.isNotEmpty() + val hasMacrosToRetain = macroTable.isNotEmpty() + if (!hasMacrosToAdd) return + val directive = if (!hasMacrosToRetain) OpCode.DIRECTIVE_SET_MACROS else OpCode.DIRECTIVE_ADD_MACROS + + // Add new macros + systemData.stepInDirective(directive) + newMacros.forEach { (macro, id) -> + val macroName = macroNames[id] + systemData.stepInSExp(usingLengthPrefix = false) + if (macroName == null) { + systemData.writeNull() + } else { + systemData.writeSymbol(macroName) + } + macro.writeTo(systemData) + systemData.stepOut() + } + systemData.stepOut() + } +} diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt new file mode 100644 index 000000000..ad2f4a53b --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt @@ -0,0 +1,150 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings + +/** + * Options that are specific to Ion 1.1 and handled in the managed writer. + * These are (mostly) generalizable to both text and binary. + * + * This should be hidden behind the IonWriteBuilders abstraction. + */ +internal class ManagedWriterOptions_1_1( + /** + * Whether the symbols in the encoding directive should be interned or not. + * For binary, almost certainly want this to be true, and for text, it's + * more readable if it's false. + */ + val internEncodingDirectiveSymbols: Boolean, + val symbolInliningStrategy: SymbolInliningStrategy, + @get:SuppressFBWarnings("EI_EXPOSE_REP", justification = "LengthPrefixStrategy instances are not mutable.") + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "LengthPrefixStrategy instances are not mutable.") + val lengthPrefixStrategy: LengthPrefixStrategy, + val eExpressionIdentifierStrategy: EExpressionIdentifierStrategy, +) : SymbolInliningStrategy by symbolInliningStrategy, LengthPrefixStrategy by lengthPrefixStrategy { + + /** + * Indicates whether e-expressions should be written using macro + * names or macro addresses (when a choice is available). + */ + enum class EExpressionIdentifierStrategy { + BY_NAME, + BY_ADDRESS, + } +} + +/** + * A strategy to determine whether a SymbolToken with known text should be encoded by Symbol ID (SID) or as inline text. + * Symbols with unknown text are always be written as a SID because the text is unknown. + * + * Some possible implementation ideas: + * + * - A simple implementation could elect to inline symbols that are less than `N` characters long. + * - A domain-specific implementation could choose to inline symbols with specific prefixes. E.g. annotations starting + * with `org.example` always get written inline. + * - A stateful implementation could keep track of how often a symbol is used, and elect to write the symbol inline + * until it has been used at least `N` times. + * - A streaming-oriented implementation could keep track of the symbols that are used, and inline any symbols not + * already in the symbol table. Once a top-level value is complete, some other component could inspect the list of + * new symbols and emit a Local Symbol Table append with those symbols so that they can be interned use in future + * top-level values. + * + * TODO: remove "sealed" when we are confident of the API. + */ +@SuppressFBWarnings("IC_SUPERCLASS_USES_SUBCLASS_DURING_INITIALIZATION") +sealed interface SymbolInliningStrategy { + /** + * Represents the different kinds of usage of a symbol token. + */ + enum class SymbolKind { + VALUE, + FIELD_NAME, + ANNOTATION, + } + + /** + * Indicates whether a particular symbol text should be written inline (as opposed to writing as a SID). + */ + fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean + + private object NeverInline : SymbolInliningStrategy { + override fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean = false + override fun toString(): String = "NEVER_INLINE" + } + + private object AlwaysInline : SymbolInliningStrategy { + override fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean = true + override fun toString(): String = "ALWAYS_INLINE" + } + + companion object { + /** + * A [SymbolInliningStrategy] that causes all symbols to be written as a SID, + * interning the text in the Local Symbol Table if necessary. + * + * This is equivalent to the behavior of symbols in Ion 1.0 binary. + */ + @JvmField + val NEVER_INLINE: SymbolInliningStrategy = NeverInline + + /** + * A [SymbolInliningStrategy] that causes all symbols with known text to have their text written inline. + */ + @JvmField + val ALWAYS_INLINE: SymbolInliningStrategy = AlwaysInline + } +} + +/** + * TODO: Proper documentation. + * + * See [SymbolInliningStrategy] for a similar strategy interface. + * + * TODO: remove "sealed" when we are confident of the API. + */ +@SuppressFBWarnings("IC_SUPERCLASS_USES_SUBCLASS_DURING_INITIALIZATION") +sealed interface LengthPrefixStrategy { + /** + * Indicates whether a container should be written using a length prefix. + * + * TODO: See if we can add other context, such as annotations that are going to be added to this container, + * the field name (if this container is in a struct), or the delimited/prefixed status of the parent + * container. + * + * With more context, we could enable strategies like: + * - Write lists with annotation `X` as a delimited container. + */ + fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean + + private data object NeverPrefixed : LengthPrefixStrategy { + override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = false + } + private data object AlwaysPrefixed : LengthPrefixStrategy { + override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = true + } + private data object ContainersPrefixed : LengthPrefixStrategy { + override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = containerType != ContainerType.EEXP + } + + companion object { + @JvmField + val NEVER_PREFIXED: LengthPrefixStrategy = NeverPrefixed + @JvmField + val ALWAYS_PREFIXED: LengthPrefixStrategy = AlwaysPrefixed + /** Containers are prefixed. E-Exps are not. */ + @JvmField + val CONTAINERS_PREFIXED: LengthPrefixStrategy = ContainersPrefixed + } + + enum class ContainerType { + LIST, + STRUCT, + SEXP, + /** + * These are only containers at an encoding/syntax level. + * There isn't really a "delimited" option for macros, but there is a length-prefix option. + */ + EEXP, + } +} diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt new file mode 100644 index 000000000..d0c739355 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt @@ -0,0 +1,504 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.IonCatalog +import com.amazon.ion.IonException +import com.amazon.ion.IonReader +import com.amazon.ion.IonType +import com.amazon.ion.IonValue +import com.amazon.ion.Macro +import com.amazon.ion.SymbolTable +import com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID +import com.amazon.ion.SymbolToken +import com.amazon.ion.SystemSymbols +import com.amazon.ion.Timestamp +import com.amazon.ion.UnknownSymbolException +import com.amazon.ion.bytecode.bin11.OpCode +import com.amazon.ion.impl.LengthPrefixStrategy.ContainerType +import com.amazon.ion.impl.SymbolInliningStrategy.SymbolKind +import com.amazon.ion.ion_1_1.IonRawWriter_1_1 +import com.amazon.ion.ion_1_1.IonWriter_1_1 +import com.amazon.ion.ion_1_1.MacroImpl +import com.amazon.ion.ion_1_1.TaglessScalarType +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings +import java.math.BigDecimal +import java.math.BigInteger +import java.util.Date + +/** + * A managed writer for Ion 1.1 that is generic over whether the raw encoding is text or binary. + * + * TODO: + * - Handling of shared symbol tables + * - Proper handling of user-supplied symbol tables + * - Auto-flush (for binary and text) + * - Check that arguments match the signatures of macros/templates (in all cases). + * - Check that values in TE lists/sexps are the right encoding (in all cases). + * - If a macro has a fixed size because it only has tagless parameters, always write it without a length prefix + * - Determine if it's faster to read template definitions that use prefixed vs delimited containers, and make this + * always do the right thing. + * + * See also [ManagedWriterOptions_1_1], [SymbolInliningStrategy], and [LengthPrefixStrategy]. + */ +internal class IonManagedWriter_1_1( + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val userData: IonRawWriter_1_1, + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") + private val systemData: IonRawWriter_1_1, + @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "Not mutable") + private val options: ManagedWriterOptions_1_1, + private val onClose: () -> Unit, +) : _Private_IonWriter, IonWriter_1_1 { + + companion object { + private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_\\d+_\\d+$") + } + + // Since this is Ion 1.1, we must always start with the IVM. + private var needsIVM: Boolean = true + + private val encodingContextManager = IonManagedWriterEncodingContext_1_1() + + private var annotationsTextBuffer = arrayOfNulls(8) + private var annotationsIdBuffer = IntArray(8) + private var numAnnotations = 0 + private var fieldNameText: String? = null + private var fieldNameId = -1 + + private var containers = _Private_RecyclingStack(16) { ContainerInfo() } + init { + // Push a container to be at the top-level so we don't need to do null checks. + containers.push { it.reset() } + } + + // Stores info about the types of the child elements. + private class ContainerInfo( + @JvmField var signature: Array = EMPTY_ARRAY, + @JvmField var type: Int = -1, + /** + // If i>=-1, type is an opcode + // If i==-2, there is no type + // If i==-3, type is a macro id + */ + @JvmField var i: Int = 0, + ) { + + companion object { + @JvmStatic + private val EMPTY_ARRAY = emptyArray() + } + + val macroId: Int get() = -1 - type + + fun reset(signature: Array) { + this.signature = signature + i = 0 + prepareNextType() + } + + fun resetWithMacroShape(macroId: Int) { + this.signature = EMPTY_ARRAY + this.type = -1 - macroId + i = -3 + } + + fun reset(opcode: Int = 0) { + this.signature = EMPTY_ARRAY + this.type = opcode + i = if (opcode == 0) -2 else -1 + } + + fun prepareNextType() { + if (i >= 0) { + if (i < signature.size) { + type = signature[i++].opcode + } else { + type = OpCode.DELIMITED_CONTAINER_END + } + } + } + } + + override fun getCatalog(): IonCatalog { + TODO("Not part of the public API.") + } + + /** No facets supported */ + override fun asFacet(facetType: Class?): T? = null + + override fun getSymbolTable(): SymbolTable { + TODO("Why do we need to expose this to users in the first place?") + } + + override fun setFieldName(name: String) { + fieldNameText = name + fieldNameId = -1 + } + + override fun setFieldNameSymbol(name: SymbolToken) { + fieldNameId = name.sid + fieldNameText = name.text + } + + /** + * Ensures that there is enough space in the annotation buffers for [n] annotations. + * If more space is needed, it over-allocates by 8 to ensure that we're not continually allocating when annotations + * are being added one by one. + */ + private fun ensureAnnotationSpace(n: Int) { + if (annotationsIdBuffer.size < n || annotationsTextBuffer.size < n) { + val oldIds = annotationsIdBuffer + annotationsIdBuffer = IntArray(n + 8) + oldIds.copyInto(annotationsIdBuffer) + val oldText = annotationsTextBuffer + annotationsTextBuffer = arrayOfNulls(n + 8) + oldText.copyInto(annotationsTextBuffer) + } + } + + private fun _private_clearAnnotations() { + numAnnotations = 0 + // erase the first entries to ensure old values don't leak into `_private_hasFirstAnnotation()` + annotationsIdBuffer[0] = -1 + annotationsTextBuffer[0] = null + } + + private fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + if (numAnnotations == 0) return false + if (sid >= 0 && annotationsIdBuffer[0] == sid) { + return true + } + if (text != null && annotationsTextBuffer[0] == text) { + return true + } + return false + } + + override fun addTypeAnnotation(annotation: String) { + val index = numAnnotations++ + ensureAnnotationSpace(numAnnotations) + annotationsIdBuffer[index] = UNKNOWN_SYMBOL_ID + annotationsTextBuffer[index] = annotation + } + + override fun setTypeAnnotations(annotations: Array?) { + _private_clearAnnotations() + numAnnotations = annotations?.size ?: 0 + ensureAnnotationSpace(numAnnotations) + annotations?.forEachIndexed { index, text -> + annotationsIdBuffer[index] = UNKNOWN_SYMBOL_ID + annotationsTextBuffer[index] = text + } + } + + override fun setTypeAnnotationSymbols(annotations: Array?) { + _private_clearAnnotations() + numAnnotations = annotations?.size ?: 0 + ensureAnnotationSpace(numAnnotations) + annotations?.forEachIndexed { index, token -> + annotationsIdBuffer[index] = token.sid + annotationsTextBuffer[index] = token.text + } + } + + override fun stepIn(containerType: IonType?) { + val newDepth = depth + 1 + when (containerType) { + IonType.LIST -> prepareFieldNameAndAnnotations { userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth)) } + IonType.SEXP -> prepareFieldNameAndAnnotations { userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth)) } + IonType.STRUCT -> { + if (depth == 0 && _private_hasFirstAnnotation(SystemSymbols.ION_SYMBOL_TABLE_SID, SystemSymbols.ION_SYMBOL_TABLE)) { + // TODO: Should we throw here, or turn this into a no-op? + throw IonException("User-defined symbol tables not permitted by the Ion 1.1 managed writer.") + } + prepareFieldNameAndAnnotations { + userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth)) + } + } + else -> throw IllegalArgumentException("Not a container type: $containerType") + } + containers.push { it.reset() } + } + + override fun stepInTaglessElementList(name: String?, macro: Macro) = prepareFieldNameAndAnnotations { + require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } + val macroId = if (name == null) encodingContextManager.getOrAssignMacroAddress(macro) else encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + userData.stepInTaglessElementList(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) + containers.push { it.resetWithMacroShape(macroId) } + } + + override fun stepInTaglessElementList(scalar: TaglessScalarType) = prepareFieldNameAndAnnotations { + userData.stepInTaglessElementList(scalar.getOpcode()) + containers.push { it.reset(scalar.getOpcode()) } + } + + override fun stepInTaglessElementSExp(name: String?, macro: Macro) = prepareFieldNameAndAnnotations { + require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } + val macroId = if (name == null) encodingContextManager.getOrAssignMacroAddress(macro) else encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + userData.stepInTaglessElementSExp(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) + containers.push { it.resetWithMacroShape(macroId) } + } + override fun stepInTaglessElementSExp(scalar: TaglessScalarType) = prepareFieldNameAndAnnotations { + userData.stepInTaglessElementSExp(scalar.getOpcode()) + containers.push { it.reset(scalar.getOpcode()) } + } + + override fun stepOut() { + // TODO: Make sure you can't use this function to step out of a macro? + containers.pop() + userData.stepOut() + } + + override fun isInStruct(): Boolean = userData.isInStruct() + + private inline fun T?.writeMaybeNull(type: IonType, writeNotNull: (T) -> Unit) { + if (this == null) { + writeNull(type) + } else { + writeNotNull(this) + } + } + + private inline fun prepareFieldNameAndAnnotations(valueWriterExpression: () -> Unit) { + if (isInStruct()) { + if (!isFieldNameSet) throw IonException("Values in a struct must have a field name.") + handleSymbolToken(fieldNameId, fieldNameText, SymbolKind.FIELD_NAME, userData) + fieldNameId = -1 + fieldNameText = null + } + + val numAnnotations = this.numAnnotations + if (numAnnotations > 0) { + for (i in 0 until numAnnotations) { + val annotationText = annotationsTextBuffer[i] + val sid = annotationsIdBuffer[i] + if (sid >= 0) { + handleSymbolToken(UNKNOWN_SYMBOL_ID, annotationText, SymbolKind.ANNOTATION, userData) + } else { + handleSymbolToken(sid, annotationText, SymbolKind.ANNOTATION, userData) + } + } + this.numAnnotations = 0 + } + valueWriterExpression() + } + + override fun writeSymbol(content: String?) = prepareFieldNameAndAnnotations { + val container = containers.peek() + if (container.type != 0) { + content ?: throw IonException("Cannot write null.symbol for tagless symbol") + if (container.type != OpCode.TE_SYMBOL_FS) { + val expectedType = TaglessScalarType.getTaglessScalarTypeForOpcode(container.type) ?: "e-expression" + throw IllegalArgumentException("Cannot write a symbol when a tagless $expectedType is expected.") + } + if (options.shouldWriteInline(SymbolKind.VALUE, content)) { + userData.writeTaglessSymbol(container.type, content) + } else { + userData.writeTaglessSymbol(container.type, encodingContextManager.intern(content)) + } + container.prepareNextType() + } else if (content == null) { + userData.writeNull(IonType.SYMBOL) + } else { + if (content == SystemSymbols.ION_1_0 && depth == 0) throw IonException("Can't write a top-level symbol that is the same as the IVM.") + handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) + } + } + + override fun writeSymbolToken(content: SymbolToken?) = prepareFieldNameAndAnnotations { + val container = containers.peek() + if (content?.text != null) return writeSymbol(content.text) + + if (container.type != 0) { + content ?: throw IonException("Cannot write null.symbol for tagless symbol") + if (container.type != OpCode.TE_SYMBOL_FS) { + val expectedType = TaglessScalarType.getTaglessScalarTypeForOpcode(container.type) ?: "e-expression" + throw IllegalArgumentException("Cannot write a symbol when a tagless $expectedType is expected.") + } + // content.text is already known to be null + userData.writeTaglessSymbol(container.type, 0) + container.prepareNextType() + } else if (content == null) { + userData.writeNull(IonType.SYMBOL) + } else { + // TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM + handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) + } + } + + private fun IonRawWriter_1_1.write(kind: SymbolKind, sid: Int) = when (kind) { + SymbolKind.VALUE -> writeSymbol(sid) + SymbolKind.FIELD_NAME -> writeFieldName(sid) + SymbolKind.ANNOTATION -> writeAnnotations(sid) + } + + private fun IonRawWriter_1_1.write(kind: SymbolKind, text: String) = when (kind) { + SymbolKind.VALUE -> writeSymbol(text) + SymbolKind.FIELD_NAME -> writeFieldName(text) + SymbolKind.ANNOTATION -> writeAnnotations(text) + } + + /** Helper function that determines whether to write a symbol token as a SID or inline symbol */ + private fun handleSymbolToken(sid: Int, text: String?, kind: SymbolKind, rawWriter: IonRawWriter_1_1, preserveEncoding: Boolean = false) { + if (text == null) { + // No text. Decide whether to write $0 or some other SID + if (sid == UNKNOWN_SYMBOL_ID) { + // No (known) SID either. + throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") + } else { + rawWriter.write(kind, sid) + } + } else if (preserveEncoding && sid < 0) { + rawWriter.write(kind, text) + } else if (options.shouldWriteInline(kind, text)) { + rawWriter.write(kind, text) + } else { + rawWriter.write(kind, encodingContextManager.intern(text)) + } + } + + override fun writeNull() = prepareFieldNameAndAnnotations { userData.writeNull() } + override fun writeNull(type: IonType?) = prepareFieldNameAndAnnotations { userData.writeNull(type ?: IonType.NULL) } + override fun writeBool(value: Boolean) = prepareFieldNameAndAnnotations { userData.writeBool(value) } + override fun writeInt(value: Long) = prepareFieldNameAndAnnotations { + val container = containers.peek() + if (container.type != 0) { + userData.writeTaglessInt(container.type, value) + container.prepareNextType() + } else { + userData.writeInt(value) + } + } + + // TODO: Tagless int support for BigIntegers + override fun writeInt(value: BigInteger?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.INT, userData::writeInt) } + + override fun writeFloat(value: Double) = prepareFieldNameAndAnnotations { + val container = containers.peek() + if (container.type != 0) { + userData.writeTaglessFloat(container.type, value) + container.prepareNextType() + } else { + userData.writeFloat(value) + } + } + override fun writeDecimal(value: BigDecimal?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.DECIMAL, userData::writeDecimal) } + override fun writeTimestamp(value: Timestamp?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.TIMESTAMP, userData::writeTimestamp) } + override fun writeString(value: String?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.STRING, userData::writeString) } + + override fun writeClob(value: ByteArray?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.CLOB, userData::writeClob) } + override fun writeClob(value: ByteArray?, start: Int, len: Int) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.CLOB) { userData.writeClob(it, start, len) } } + + override fun writeBlob(value: ByteArray?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.BLOB, userData::writeBlob) } + override fun writeBlob(value: ByteArray?, start: Int, len: Int) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.BLOB) { userData.writeBlob(it, start, len) } } + + override fun isFieldNameSet(): Boolean { + return fieldNameId >= 0 || fieldNameText != null + } + + override fun getDepth(): Int { + return userData.depth() + } + + override fun writeIonVersionMarker() { + if (depth == 0) { + // Make sure we write out any symbol tables and buffered values before the IVM + finish() + } else { + writeSymbol("\$ion_1_1") + } + } + + @Deprecated("Use IonValue.writeTo(IonWriter) instead.") + override fun writeValue(value: IonValue) = value.writeTo(this) + + @Deprecated("Use writeTimestamp instead.") + override fun writeTimestampUTC(value: Date?) { + TODO() + } + + override fun isStreamCopyOptimized(): Boolean = false + + override fun writeValues(reader: IonReader) { + TODO() + } + + override fun writeValue(reader: IonReader) { + TODO() + } + + // Stream termination + + override fun close() { + flush() + systemData.close() + userData.close() + onClose() + } + + override fun flush() { + if (depth != 0) throw IllegalStateException("Cannot call flush() while stepped in any value.") + if (needsIVM) { + systemData.writeIVM() + needsIVM = false + } + encodingContextManager.writeEncodingDirective(systemData) + systemData.flush() + userData.flush() + } + + override fun finish() { + if (depth != 0) throw IllegalStateException("Cannot call finish() while stepped in any value.") + flush() + encodingContextManager.reset() + needsIVM = true + } + + override fun startMacro(macro: Macro) { + require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } + val address = encodingContextManager.getOrAssignMacroAddress(macro) + startMacro(encodingContextManager.getMacroNameForId(address), address, macro) + } + + override fun startMacro(name: String, macro: Macro) { + require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } + val address = encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + startMacro(name, address, macro) + } + + private fun startMacro(name: String?, address: Int, definition: MacroImpl) = prepareFieldNameAndAnnotations { + val container = containers.peek() + val prescribedMacroType = container.macroId + if (prescribedMacroType < 0) { + val useNames = + options.eExpressionIdentifierStrategy == ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME + if (useNames && name != null) { + userData.stepInEExp(name) + } else { + val includeLengthPrefix = if (definition.signature.isEmpty()) false else options.writeLengthPrefix(ContainerType.EEXP, depth + 1) + userData.stepInEExp(address, includeLengthPrefix) + } + } else { + userData.stepInTaglessEExp() + container.prepareNextType() + } + containers.push { it.reset(definition.signature) } + } + + override fun endMacro() { + // TODO: See if there are unwritten parameters, and attempt to write `absent arg` for all of them. + userData.stepOut() + containers.pop() + } + + override fun absentArgument() { + val container = containers.peek() + if (container.type != 0) throw IonException("The argument corresponding to a tagless placeholder cannot be absent.") + userData.writeAbsentArgument() + container.prepareNextType() + } + + /** Visible for testing */ + internal fun getOrAssignMacroAddress(macro: MacroImpl): Int = encodingContextManager.getOrAssignMacroAddress(macro) +} diff --git a/src/main/java/com/amazon/ion/ion_1_1/MacroImpl.kt b/src/main/java/com/amazon/ion/ion_1_1/MacroImpl.kt index 882248967..78669ce5d 100644 --- a/src/main/java/com/amazon/ion/ion_1_1/MacroImpl.kt +++ b/src/main/java/com/amazon/ion/ion_1_1/MacroImpl.kt @@ -33,7 +33,7 @@ internal class MacroImpl private constructor(private val body: Array = mutableListOf().also { sig -> body.forEach { addPlaceholdersToSignature(it, sig) } } + val signature: Array = mutableListOf().also { sig -> body.forEach { addPlaceholdersToSignature(it, sig) } }.toTypedArray() @get:[SuppressFBWarnings("EI_EXPOSE_REP", justification = "internal use only")] val bodyExpressions: List = Arrays.asList(*body) diff --git a/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt new file mode 100644 index 000000000..a32f64788 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt @@ -0,0 +1,911 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.Decimal +import com.amazon.ion.IonException +import com.amazon.ion.IonType +import com.amazon.ion.IonWriter +import com.amazon.ion.SystemSymbols +import com.amazon.ion.TextToBinaryUtils.cleanCommentedHexBytes +import com.amazon.ion.TextToBinaryUtils.hexStringToByteArray +import com.amazon.ion.Timestamp +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1 +import com.amazon.ion.ion_1_1.IonWriter_1_1 +import com.amazon.ion.ion_1_1.MacroBuilder +import com.amazon.ion.ion_1_1.MacroImpl +import com.amazon.ion.ion_1_1.TaglessScalarType +import com.amazon.ion.system.IonTextWriterBuilder +import com.amazon.ion.system.IonTextWriterBuilder.NewLineType +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.Arguments.arguments +import org.junit.jupiter.params.provider.MethodSource +import java.io.ByteArrayOutputStream +import java.math.BigInteger + +/** + * TODO: Ensure that tests cover all of [IonManagedWriter_1_1]. + */ +internal class IonManagedWriter_1_1_Test { + + internal fun IonWriter.writeObject(obj: WriteAsIon) { + if (this is IonWriter_1_1) { + obj.writeToMacroAware(this) + } else { + obj.writeTo(this) + } + } + + @OptIn(ExperimentalStdlibApi::class) + fun ByteArray.toPrettyHexString(bytesPerWord: Int = 4, wordsPerLine: Int = 8): String { + return map { it.toHexString(HexFormat.UpperCase) } + .windowed(bytesPerWord, bytesPerWord, partialWindows = true) + .windowed(wordsPerLine, wordsPerLine, partialWindows = true) + .joinToString("\n") { it.joinToString(" ") { it.joinToString(" ") } } + } + + companion object { + // Some symbols that are annoying to use with Kotlin's string substitution. + val ion_1_1 = "\$ion_1_1" + val ion = "\$ion" + + // Some symbol tokens so that we don't have to keep declaring them + private val fooMacro = MacroBuilder.newBuilder().stringValue("foo").build() as MacroImpl + private val barMacro = MacroBuilder.newBuilder().stringValue("bar").build() as MacroImpl + + // Helper function that writes to a writer and returns the text Ion + private fun write( + topLevelValuesOnNewLines: Boolean = true, + closeWriter: Boolean = true, + pretty: Boolean = false, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, + block: IonManagedWriter_1_1.() -> Unit + ): String { + val sb = StringBuilder() + val appendable = { + _Private_IonTextAppender.forAppendable(BufferedAppendableFastAppendable(sb)) + } + + val textWriterBuilder = IonTextWriterBuilder.standard().apply { + if (topLevelValuesOnNewLines) withWriteTopLevelValuesOnNewLines(true) + if (pretty) withPrettyPrinting() + withNewLineType(NewLineType.LF) + } as _Private_IonTextWriterBuilder + + val writer = IonManagedWriter_1_1( + userData = IonRawTextWriter_1_1(textWriterBuilder, appendable()), + systemData = IonRawTextWriter_1_1(textWriterBuilder, appendable()), + options = ManagedWriterOptions_1_1( + internEncodingDirectiveSymbols = false, + symbolInliningStrategy = symbolInliningStrategy, + lengthPrefixStrategy = LengthPrefixStrategy.NEVER_PREFIXED, + eExpressionIdentifierStrategy = ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME, + ), + onClose = {} + ) + + writer.apply(block) + if (closeWriter) writer.close() + return sb.toString().trim() + } + + // Helper function that writes to a writer and returns the binary Ion + private fun writeBinary( + closeWriter: Boolean = true, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, + lengthPrefixStrategy: LengthPrefixStrategy = LengthPrefixStrategy.CONTAINERS_PREFIXED, + block: IonManagedWriter_1_1.() -> Unit + ): ByteArray { + val output = ByteArrayOutputStream() + + val writer = IonManagedWriter_1_1( + userData = IonRawBinaryWriter_1_1.from(output, 1024, 1), + systemData = IonRawBinaryWriter_1_1.from(output, 1024, 1), + options = ManagedWriterOptions_1_1( + internEncodingDirectiveSymbols = false, + symbolInliningStrategy = symbolInliningStrategy, + lengthPrefixStrategy = lengthPrefixStrategy, + eExpressionIdentifierStrategy = ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME, + ), + onClose = output::close, + ) + + writer.apply(block) + if (closeWriter) writer.close() + return output.toByteArray() + } + } + + @Test + fun `attempting to manually write a symbol table throws an exception`() { + write(closeWriter = false) { + addTypeAnnotation(SystemSymbols.ION_SYMBOL_TABLE) + assertThrows { stepIn(IonType.STRUCT) } + } + } + + @Test + fun `attempting to step into a scalar type throws an exception`() { + write { + assertThrows { stepIn(IonType.NULL) } + } + } + + @Test + fun `write various data model values`() { + // TODO: Split this up and make sure edge cases, particularly around + // field names an annotations, are handled correctly. + val expected = """ + $ion_1_1 + null + null.list + foo::true + [1,10] + bar::{a:"Hello world!",b:d::e::+inf,f:(baz -0. {{AQID}} {{"abc"}}),g:2025-11-04T12:34:56.000Z} + """.trimIndent() + + val actual = write { + writeNull() + writeNull(IonType.LIST) + addTypeAnnotation("foo") + writeBool(true) + stepIn(IonType.LIST) + writeInt(1) + writeInt(BigInteger.TEN) + stepOut() + addTypeAnnotation("bar") + stepIn(IonType.STRUCT) + setFieldName("a") + writeString("Hello world!") + setFieldName("b") + setTypeAnnotations(arrayOf("d", "e")) + writeFloat(Double.POSITIVE_INFINITY) + setFieldName("f") + stepIn(IonType.SEXP) + writeSymbol("baz") + writeDecimal(Decimal.NEGATIVE_ZERO) + writeBlob(byteArrayOf(1, 2, 3)) + writeClob(byteArrayOf(0x61, 0x62, 0x63)) + stepOut() + setFieldName("g") + writeTimestamp(Timestamp.valueOf("2025-11-04T12:34:56.000Z")) + stepOut() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an IVM`() { + assertEquals( + """ + $ion_1_1 + $ion_1_1 + """.trimIndent(), + write { writeIonVersionMarker() } + ) + } + + @Test + fun `write an IVM in a container should write a symbol`() { + assertEquals( + """ + $ion_1_1 + [$ion_1_1] + """.trimIndent(), + write { + stepIn(IonType.LIST) + writeIonVersionMarker() + stepOut() + } + ) + } + + @Test + fun `write an encoding directive with a non-empty macro table`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo")) + (:0) + """.trimIndent() + + val actual = write { + startMacro(fooMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression by name`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (a "foo")) + (:a) + """.trimIndent() + + val actual = write { + startMacro("a", fooMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression by address`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo")) + (:0) + """.trimIndent() + + val actual = write { + startMacro(fooMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression with tagless parameter in text`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null [(:? {#int8})])) + (:0 1) + """.trimIndent() + + val macro = MacroBuilder.newBuilder() + .listValue { it.taglessPlaceholder(TaglessScalarType.INT_8) } + .build() + + val actual = write { + startMacro(macro) + writeInt(1) + endMacro() + } + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression with tagless parameter in binary`() { + val expected = """ + E0 01 01 EA + E3 | set_macros + F1 | delim sexp start + 8E | null + F0 EB 61 EF | [(:? {#int8})] + EF + EF + 00 01 + """.cleanCommentedHexBytes().hexStringToByteArray() + + val macro = MacroBuilder.newBuilder() + .listValue { it.taglessPlaceholder(TaglessScalarType.INT_8) } + .build() + + val actual = writeBinary { + startMacro(macro) + writeInt(1) + endMacro() + } + + println("Expected: ${expected.toPrettyHexString()}") + println("Actual: ${actual.toPrettyHexString()}") + + assertArrayEquals(expected, actual) + } + + @Test + fun `write a tagless-element list with scalar type`() { + val expectedText = """ + $ion_1_1 + [{#int8} 1,2] + """.trimIndent() + + val expectedBinary = """ + E0 01 01 EA + 5B 61 05 01 02 + """.cleanCommentedHexBytes().hexStringToByteArray() + + val writeFn: IonManagedWriter_1_1.() -> Unit = { + stepInTaglessElementList(TaglessScalarType.INT_8) + writeInt(1) + writeInt(2) + stepOut() + } + val actualText = write(block = writeFn) + assertEquals(expectedText, actualText) + + val actualBinary = writeBinary(block = writeFn) + assertEquals(expectedBinary.toPrettyHexString(), actualBinary.toPrettyHexString()) + } + + @Test + fun `write a tagless-element list with macro shape`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null [(:? {#int8})])) + [{:0} (1),(2),(3)] + """.trimIndent() + + val macro = MacroBuilder.newBuilder() + .listValue { it.taglessPlaceholder(TaglessScalarType.INT_8) } + .build() + + val actual = write { + stepInTaglessElementList(null, macro) + startMacro(macro) + writeInt(1) + endMacro() + startMacro(macro) + writeInt(2) + endMacro() + startMacro(macro) + writeInt(3) + endMacro() + stepOut() + } + assertEquals(expected, actual) + } + + @Test + fun `write an encoding directive with a non-empty symbol table`() { + val expected = """ + $ion_1_1 + (:$ion set_symbols "foo") + $10 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + } + + assertEquals(expected, actual) + } + + @Test + fun `calling flush() causes the next encoding directive to append to a macro table`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo")) + (:0) + (:$ion add_macros (null "bar")) + (:0) + (:1) + """.trimIndent() + + val actual = write { + startMacro(fooMacro) + endMacro() + flush() + startMacro(fooMacro) + endMacro() + startMacro(barMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `calling flush() causes the next encoding directive to append to the symbol table`() { + val expected = """ + $ion_1_1 + (:$ion set_symbols "foo") + $10 + (:$ion add_symbols "bar") + $11 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + flush() + writeSymbol("bar") + } + + assertEquals(expected, actual) + } + + @Test + fun `calling finish() causes the next encoding directive to NOT append to a macro table`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo")) + (:0) + $ion_1_1 + (:$ion set_macros (null "bar")) + (:0) + """.trimIndent() + + val actual = write { + startMacro(fooMacro) + endMacro() + finish() + startMacro(barMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `calling finish() causes the next encoding directive to NOT append to the symbol table`() { + val expected = """ + $ion_1_1 + (:$ion set_symbols "foo") + $10 + $ion_1_1 + (:$ion set_symbols "bar") + $10 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + finish() + writeSymbol("bar") + } + + assertEquals(expected, actual) + } + + @Test + fun `adding to the macro table should preserve existing symbols`() { + val expected = """ + $ion_1_1 + (:$ion set_symbols "foo") + $10 + (:$ion set_macros (null "bar")) + (:0) + $10 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + flush() + startMacro(barMacro) + endMacro() + writeSymbol("foo") + } + + assertEquals(expected, actual) + } + + @Test + fun `adding to the symbol table should preserve existing macros`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo")) + (:0) + (:$ion set_symbols "foo") + $10 + (:0) + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + val theMacro = fooMacro + startMacro(theMacro) + endMacro() + flush() + writeSymbol("foo") + startMacro(theMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + /** Holds a static factory method with the test cases for [testWritingMacroDefinitions]. */ + object TestWritingMacroDefinitions { + const val THE_METHOD = "com.amazon.ion.impl.IonManagedWriter_1_1_Test\$TestWritingMacroDefinitions#cases" + + @JvmStatic + fun cases(): List { + fun case( + name: String, + body: MacroBuilder.TemplateBody.() -> MacroBuilder.FinalState = { nullValue() }, + expectedBody: String = "null" + ) = arguments(name, MacroBuilder.newBuilder().body().build(), expectedBody) + + return listOf( + case( + "null", + body = { nullValue() }, + expectedBody = "null" + ), + // Annotations on `null` are representative for all types that don't have special annotation logic + case( + "annotated null", + body = { annotated("foo").nullValue() }, + expectedBody = "foo::null" + ), + case( + "null annotated with $0", + body = { annotated(null).nullValue() }, + expectedBody = "$0::null" + ), + case( + "bool", + body = { boolValue(true) }, + expectedBody = "true" + ), + case( + "int", + body = { intValue(1) }, + expectedBody = "1" + ), + case( + "(big) int", + body = { intValue(BigInteger.ONE) }, + expectedBody = "1" + ), + case( + "float", + body = { floatValue(Double.POSITIVE_INFINITY) }, + expectedBody = "+inf" + ), + case( + "decimal", + body = { decimalValue(Decimal.valueOf(1.1)) }, + expectedBody = "1.1" + ), + case( + "timestamp", + body = { timestampValue(Timestamp.valueOf("2024T")) }, + expectedBody = "2024T" + ), + case( + "symbol", + body = { symbolValue("foo") }, + expectedBody = "foo" + ), + case( + "unknown symbol", + body = { symbolValue(null) }, + expectedBody = "$0" + ), + case( + "annotated symbol", + body = { + annotated("foo").symbolValue("bar") + }, + expectedBody = "foo::bar" + ), + case( + "symbol annotated with $0", + body = { + annotated(null).symbolValue("bar") + }, + expectedBody = "$0::bar" + ), + case( + "string", + body = { stringValue("abc") }, + expectedBody = "\"abc\"" + ), + case( + "blob", + body = { blobValue(byteArrayOf()) }, + expectedBody = "{{}}" + ), + case( + "clob", + body = { clobValue(byteArrayOf()) }, + expectedBody = "{{\"\"}}" + ), + case( + "list", + body = { listValue { l -> l.intValue(1) } }, + expectedBody = "[1]" + ), + case( + "sexp", + body = { sexpValue { intValue(1) } }, + expectedBody = "(1)" + ), + case( + "empty sexp", + body = { sexpValue { } }, + expectedBody = "()" + ), + case( + "annotated sexp", + body = { annotated("foo").sexpValue { intValue(1) } }, + expectedBody = "foo::(1)" + ), + case( + "sexp with $0 annotation", + body = { annotated(null).sexpValue { intValue(1) } }, + expectedBody = "$0::(1)" + ), + case( + "struct", + body = { structValue { it.fieldName("foo").intValue(1) } }, + expectedBody = "{foo:1}" + ), + case( + "struct with $0 field name", + body = { structValue { it.fieldName(null).intValue(1) } }, + expectedBody = "{$0:1}" + ), + case( + "variable", + body = { listValue { l -> l.placeholder() } }, + expectedBody = "[(:?)]" + ), + + case( + "variable with default value", + body = { + listValue { l -> + l.placeholderWithDefault { + d -> + d.structValue { s -> + s.fieldName("foo").intValue(1) + } + } + } + }, + expectedBody = "[(:? {foo:1})]" + ), + case( + "multiple variables", + body = { listValue { l -> l.placeholder().placeholder().placeholder() } }, + expectedBody = "[(:?),(:?),(:?)]" + ), + case( + "nested expressions in body", + body = { + listValue { + sexpValue { intValue(1) } + structValue { s -> s.fieldName("foo").intValue(2) } + } + }, + expectedBody = "[(1),{foo:2}]" + ), + ) + } + } + + @MethodSource(TestWritingMacroDefinitions.THE_METHOD) + @ParameterizedTest(name = "a macro definition with {0}") + fun testWritingMacroDefinitions(description: String, macro: MacroImpl, expectedSignatureAndBody: String) { + val expected = """ + $ion_1_1 + (:$ion set_macros (null "foo") (null "bar") (null $expectedSignatureAndBody)) + """.trimIndent() + + val actual = write { + getOrAssignMacroAddress(fooMacro) + getOrAssignMacroAddress(barMacro) + getOrAssignMacroAddress(macro) + } + + assertEquals(expected, actual) + } + + @Test + fun `when pretty printing, directive expressions should have the clause name on the first line`() { + // ...and look reasonably pleasant. + // However, this should be held loosely. + val expected = """ + $ion_1_1 + (:$ion set_symbols + "foo" + "bar" + "baz" + ) + (:$ion set_macros + ( + null + "foo" + ) + ) + ${'$'}10 + ${'$'}11 + ${'$'}12 + (:0) + (:$ion add_symbols + "a" + "b" + "c" + ) + (:$ion add_macros + ( + null + "bar" + ) + ) + ${'$'}13 + ${'$'}14 + ${'$'}15 + (:1) + """.trimIndent() + + val fooMacro = fooMacro + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE, pretty = true) { + writeSymbol("foo") + writeSymbol("bar") + writeSymbol("baz") + startMacro(fooMacro) + endMacro() + flush() + writeSymbol("a") + writeSymbol("b") + writeSymbol("c") + startMacro(barMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something with a macro representation`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (Point2D {x:(:?),y:(:?)})) + (:Point2D 2 4) + """.trimIndent() + + val actual = write { + writeObject(Point2D(2, 4)) + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something without a macro representation`() { + val expected = """ + $ion_1_1 + Red + Yellow + Green + Blue + """.trimIndent() + + val actual = write { + Colors.entries.forEach { + color -> + writeObject(color) + } + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something with nested macro representation`() { + val expected = """ + $ion_1_1 + (:$ion set_macros (Polygon {vertices:(:?),fill:(:?)}) (Point2D {x:(:?),y:(:?)})) + (:Polygon [(:Point2D 0 0),(:Point2D 0 1),(:Point2D 1 1),(:Point2D 1 0)] Blue) + """.trimIndent() + + val data = Polygon( + listOf( + Point2D(0, 0), + Point2D(0, 1), + Point2D(1, 1), + Point2D(1, 0), + ), + Colors.Blue, + ) + + val actual = write { + writeObject(data) + } + + assertEquals(expected, actual) + } + + interface WriteAsIon { + fun writeTo(writer: IonWriter) + fun writeToMacroAware(writer: IonWriter_1_1) + + fun IonWriter.writeObject(obj: WriteAsIon) { + if (this is IonWriter_1_1) { + obj.writeToMacroAware(this) + } else { + obj.writeTo(this) + } + } + } + + private data class Polygon(val vertices: List, val fill: Colors?) : WriteAsIon { + init { require(vertices.size >= 3) { "A polygon must have at least 3 edges and 3 vertices" } } + + companion object { + // Using the qualified class name would be verbose, but may be safer for general + // use so that there is almost no risk of having a name conflict with another macro. + private val MACRO_NAME = Polygon::class.simpleName!!.replace(".", "_") + private val MACRO = MacroBuilder.newBuilder() + .structValue { + it.fieldName("vertices") + .placeholder() + .fieldName("fill") + .placeholder() + } + .build() + } + + override fun writeTo(writer: IonWriter) { + with(writer) { + stepIn(IonType.STRUCT) + setFieldName("vertices") + stepIn(IonType.LIST) + vertices.forEach { writeObject(it) } + stepOut() + if (fill != null) { + setFieldName("fill") + writeObject(fill) + } + stepOut() + } + } + + override fun writeToMacroAware(writer: IonWriter_1_1) { + + with(writer) { + startMacro(MACRO_NAME, MACRO) + stepIn(IonType.LIST) + vertices.forEach { writeObject(it) } + stepOut() + fill?.let { writeObject(it) } ?: absentArgument() + endMacro() + } + } + } + + private data class Point2D(val x: Long, val y: Long) : IonManagedWriter_1_1_Test.WriteAsIon { + companion object { + // This is a very long macro name, but by using the qualified class name, + // there is almost no risk of having a name conflict with another macro. + private val MACRO_NAME = Point2D::class.simpleName!!.replace(".", "_") + private val MACRO = MacroBuilder.newBuilder().structValue { + it.fieldName("x").placeholder().fieldName("y").placeholder() + }.build() + } + + override fun writeToMacroAware(writer: IonWriter_1_1) { + with(writer) { + startMacro(MACRO_NAME, MACRO) + writeInt(x) + writeInt(y) + endMacro() + } + } + + override fun writeTo(writer: IonWriter) { + with(writer) { + stepIn(IonType.STRUCT) + setFieldName("x") + writeInt(x) + setFieldName("y") + writeInt(y) + stepOut() + } + } + } + + private enum class Colors : WriteAsIon { + Red, + Yellow, + Green, + Blue, + ; + override fun writeTo(writer: IonWriter) { + writer.writeSymbol(this.name) + } + override fun writeToMacroAware(writer: IonWriter_1_1) = writeTo(writer) + } +} From a2569b378b9b745d073545f62a1d0c32597526f2 Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 5 Nov 2025 15:05:39 -0800 Subject: [PATCH 2/5] Fix tests of minified jar --- .../amazon/ion/impl/BufferedAppendableFastAppendable.kt | 9 ++++++++- .../java/com/amazon/ion/impl/IonManagedWriter_1_1.kt | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt index cd93481cf..98d788b8b 100644 --- a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt +++ b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl +import com.amazon.ion.bytecode.EncodingContextManager.Companion.SYSTEM_SYMBOLS import com.amazon.ion.util._Private_FastAppendable import edu.umd.cs.findbugs.annotations.SuppressFBWarnings import java.io.Closeable @@ -28,9 +29,15 @@ internal class BufferedAppendableFastAppendable( @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") private val wrapped: Appendable, @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "We're intentionally storing a reference to a mutable object because we need to write to it.") - private val buffer: StringBuilder = StringBuilder() + private val buffer: StringBuilder, ) : _Private_FastAppendable, Flushable, Closeable, Appendable by buffer { + companion object { + @JvmStatic operator fun invoke(wrapped: Appendable): BufferedAppendableFastAppendable { + return BufferedAppendableFastAppendable(wrapped, StringBuilder()) + } + } + override fun appendAscii(c: Char) { append(c) } override fun appendAscii(csq: CharSequence?) { append(csq) } override fun appendAscii(csq: CharSequence?, start: Int, end: Int) { append(csq, start, end) } diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt index d0c739355..8b3d20870 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt @@ -48,7 +48,7 @@ internal class IonManagedWriter_1_1( private val systemData: IonRawWriter_1_1, @SuppressFBWarnings("EI_EXPOSE_REP2", justification = "Not mutable") private val options: ManagedWriterOptions_1_1, - private val onClose: () -> Unit, + private val onClose: Runnable, ) : _Private_IonWriter, IonWriter_1_1 { companion object { @@ -434,7 +434,7 @@ internal class IonManagedWriter_1_1( flush() systemData.close() userData.close() - onClose() + onClose.run() } override fun flush() { From de4f0f54c72b84ff449b313ceaddde38fc2dea7b Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 5 Nov 2025 15:09:38 -0800 Subject: [PATCH 3/5] Fix ktlint violations --- .../java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt index 98d788b8b..2f93cfaef 100644 --- a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt +++ b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl -import com.amazon.ion.bytecode.EncodingContextManager.Companion.SYSTEM_SYMBOLS import com.amazon.ion.util._Private_FastAppendable import edu.umd.cs.findbugs.annotations.SuppressFBWarnings import java.io.Closeable From 75c703ca8e2f214845baa729272dfd06e9b36720 Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Thu, 6 Nov 2025 12:07:24 -0800 Subject: [PATCH 4/5] Apply suggestions from PR feedback --- .../IonManagedWriterEncodingContext_1_1.kt | 13 +- .../ion/impl/IonManagedWriterOptions_1_1.kt | 2 + .../amazon/ion/impl/IonManagedWriter_1_1.kt | 344 ++++++++++++------ .../amazon/ion/ion_1_1/IonRawWriter_1_1.kt | 4 + .../ion/impl/IonManagedWriter_1_1_Test.kt | 147 +++++--- 5 files changed, 341 insertions(+), 169 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt index c5808a851..c22368152 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriterEncodingContext_1_1.kt @@ -93,10 +93,8 @@ internal class IonManagedWriterEncodingContext_1_1 { * make sure that any prior e-expressions are still valid. In addition, we would need to re-export all * the other macros from `_` (the default module). * - For now, we're just throwing an Exception. - * - * Visible for testing. */ - fun getOrAssignMacroAddressAndName(name: String, macro: MacroImpl): Int { + private fun getOrAssignMacroAddressAndName(name: String, macro: MacroImpl): Int { // TODO: This is O(n), but could be O(1). var existingAddress = macroNames.indexOf(name) if (existingAddress < 0) { @@ -131,7 +129,7 @@ internal class IonManagedWriterEncodingContext_1_1 { * 2. Check newMacros, if found, return that address * 3. Add to newMacros, return new address */ - fun getOrAssignMacroAddress(macro: MacroImpl): Int { + private fun getOrAssignMacroAddress(macro: MacroImpl): Int { var address = macroTable.getOrDefault(macro, -1) if (address >= 0) return address address = newMacros.getOrDefault(macro, -1) @@ -140,6 +138,13 @@ internal class IonManagedWriterEncodingContext_1_1 { return assignMacroAddress(macro) } + fun getOrAssignMacroAddress(macro: MacroImpl, name: String?): Int { + return if (name == null) + getOrAssignMacroAddress(macro) + else + getOrAssignMacroAddressAndName(name, macro) + } + fun getMacroNameForId(id: Int): String? = macroNames[id] /** Unconditionally adds a macro to the macro table data structures and returns the new address. */ diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt index ad2f4a53b..0bc4336d2 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt @@ -32,6 +32,8 @@ internal class ManagedWriterOptions_1_1( BY_NAME, BY_ADDRESS, } + + val useMacroNames = eExpressionIdentifierStrategy == EExpressionIdentifierStrategy.BY_NAME } /** diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt index 8b3d20870..b3e1dfa29 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt @@ -72,51 +72,76 @@ internal class IonManagedWriter_1_1( containers.push { it.reset() } } - // Stores info about the types of the child elements. + /** Stores info about the types of the child elements. */ private class ContainerInfo( @JvmField var signature: Array = EMPTY_ARRAY, - @JvmField var type: Int = -1, - /** - // If i>=-1, type is an opcode - // If i==-2, there is no type - // If i==-3, type is a macro id - */ - @JvmField var i: Int = 0, + /** 0 for tagged value, >0 for tagless primitive, <0 for macroId */ + @JvmField var childType: Int = TAGGED_VALUE, + /** If i < 0, this container does not iterate over multiple type (i.e. TE List/Sexp) */ + @JvmField var i: Int = HOMOGENEOUS_TYPE, ) { companion object { @JvmStatic private val EMPTY_ARRAY = emptyArray() + const val TAGGED_VALUE = 0 + private const val HOMOGENEOUS_TYPE = -1 } - val macroId: Int get() = -1 - type + fun childIsTaglessMacroShaped() = childType < 0 + fun childIsTaglessScalar() = childType > 0 + fun childIsTaggedValue() = childType == 0 + + fun isMacroInvocation() = i != HOMOGENEOUS_TYPE - fun reset(signature: Array) { + val childMacroId: Int get() = -1 - childType + + fun resetForMacroInvocation(signature: Array) { this.signature = signature i = 0 - prepareNextType() + prepareNextChildType() + } + + fun resetForMacroTESequence(macroId: Int) { + this.signature = EMPTY_ARRAY + this.childType = -1 - macroId + i = HOMOGENEOUS_TYPE } - fun resetWithMacroShape(macroId: Int) { + fun resetForScalarTESequence(opcode: Int) { this.signature = EMPTY_ARRAY - this.type = -1 - macroId - i = -3 + this.childType = opcode + i = HOMOGENEOUS_TYPE } - fun reset(opcode: Int = 0) { + fun reset() { this.signature = EMPTY_ARRAY - this.type = opcode - i = if (opcode == 0) -2 else -1 + this.childType = TAGGED_VALUE + i = HOMOGENEOUS_TYPE // They're all tagged values } - fun prepareNextType() { - if (i >= 0) { + /** + * Prepares this ContainerInfo to provide information about the type of the next child value. + * Returns false if the end of the macro signature has been reached. Otherwise, returns true. + */ + fun prepareNextChildType(): Boolean { + if (i != HOMOGENEOUS_TYPE) { if (i < signature.size) { - type = signature[i++].opcode + childType = signature[i++].opcode } else { - type = OpCode.DELIMITED_CONTAINER_END + childType = OpCode.DELIMITED_CONTAINER_END + return false } } + return true + } + + fun expectedChildTypeName(): String { + return if (childIsTaggedValue()) { + "tagged value" + } else { + TaglessScalarType.getTaglessScalarTypeForOpcode(childType)?.textEncodingName ?: "e-expression" + } } } @@ -164,15 +189,9 @@ internal class IonManagedWriter_1_1( annotationsTextBuffer[0] = null } - private fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + private fun isIonSymbolTableAnnotationPresent(): Boolean { if (numAnnotations == 0) return false - if (sid >= 0 && annotationsIdBuffer[0] == sid) { - return true - } - if (text != null && annotationsTextBuffer[0] == text) { - return true - } - return false + return annotationsIdBuffer[0] == SystemSymbols.ION_SYMBOL_TABLE_SID || annotationsTextBuffer[0] == SystemSymbols.ION_SYMBOL_TABLE } override fun addTypeAnnotation(annotation: String) { @@ -205,43 +224,52 @@ internal class IonManagedWriter_1_1( override fun stepIn(containerType: IonType?) { val newDepth = depth + 1 when (containerType) { - IonType.LIST -> prepareFieldNameAndAnnotations { userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth)) } - IonType.SEXP -> prepareFieldNameAndAnnotations { userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth)) } + IonType.LIST -> { + writeTaggedValueFieldNameAndAnnotations() + userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth)) + } + IonType.SEXP -> { + writeTaggedValueFieldNameAndAnnotations() + userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth)) + } IonType.STRUCT -> { - if (depth == 0 && _private_hasFirstAnnotation(SystemSymbols.ION_SYMBOL_TABLE_SID, SystemSymbols.ION_SYMBOL_TABLE)) { - // TODO: Should we throw here, or turn this into a no-op? - throw IonException("User-defined symbol tables not permitted by the Ion 1.1 managed writer.") - } - prepareFieldNameAndAnnotations { - userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth)) + if (depth == 0 && isIonSymbolTableAnnotationPresent()) { + throw IonException("Ion 1.0 symbol tables not permitted in the Ion 1.1 managed writer.") } + writeTaggedValueFieldNameAndAnnotations() + userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth)) } else -> throw IllegalArgumentException("Not a container type: $containerType") } containers.push { it.reset() } } - override fun stepInTaglessElementList(name: String?, macro: Macro) = prepareFieldNameAndAnnotations { + override fun stepInTaglessElementList(name: String?, macro: Macro) { require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } - val macroId = if (name == null) encodingContextManager.getOrAssignMacroAddress(macro) else encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + writeTaggedValueFieldNameAndAnnotations() + val macroId = encodingContextManager.getOrAssignMacroAddress(macro, name.takeIf { options.useMacroNames }) userData.stepInTaglessElementList(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) - containers.push { it.resetWithMacroShape(macroId) } + containers.push { it.resetForMacroTESequence(macroId) } } - override fun stepInTaglessElementList(scalar: TaglessScalarType) = prepareFieldNameAndAnnotations { + override fun stepInTaglessElementList(scalar: TaglessScalarType) { + writeTaggedValueFieldNameAndAnnotations() userData.stepInTaglessElementList(scalar.getOpcode()) - containers.push { it.reset(scalar.getOpcode()) } + containers.push { it.resetForScalarTESequence(scalar.getOpcode()) } } - override fun stepInTaglessElementSExp(name: String?, macro: Macro) = prepareFieldNameAndAnnotations { + override fun stepInTaglessElementSExp(name: String?, macro: Macro) { require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } - val macroId = if (name == null) encodingContextManager.getOrAssignMacroAddress(macro) else encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + writeTaggedValueFieldNameAndAnnotations() + val macroId = encodingContextManager.getOrAssignMacroAddress(macro, name.takeIf { options.useMacroNames }) userData.stepInTaglessElementSExp(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) - containers.push { it.resetWithMacroShape(macroId) } + containers.push { it.resetForMacroTESequence(macroId) } } - override fun stepInTaglessElementSExp(scalar: TaglessScalarType) = prepareFieldNameAndAnnotations { + + override fun stepInTaglessElementSExp(scalar: TaglessScalarType) { + writeTaggedValueFieldNameAndAnnotations() userData.stepInTaglessElementSExp(scalar.getOpcode()) - containers.push { it.reset(scalar.getOpcode()) } + containers.push { it.resetForScalarTESequence(scalar.getOpcode()) } } override fun stepOut() { @@ -260,7 +288,14 @@ internal class IonManagedWriter_1_1( } } - private inline fun prepareFieldNameAndAnnotations(valueWriterExpression: () -> Unit) { + private fun writeTaggedValueFieldNameAndAnnotations() { + val currentContainer = containers.peek() + if (!currentContainer.childIsTaggedValue()) { + // TODO: Improve this message to have the expected type and the actual type. + throw IonException("Tagless value expected, but not provided.") + } + currentContainer.prepareNextChildType() + if (isInStruct()) { if (!isFieldNameSet) throw IonException("Values in a struct must have a field name.") handleSymbolToken(fieldNameId, fieldNameText, SymbolKind.FIELD_NAME, userData) @@ -281,49 +316,56 @@ internal class IonManagedWriter_1_1( } this.numAnnotations = 0 } - valueWriterExpression() } - override fun writeSymbol(content: String?) = prepareFieldNameAndAnnotations { + override fun writeSymbol(content: String?) { val container = containers.peek() - if (container.type != 0) { + if (container.childIsTaggedValue()) { + if (content == null) { + writeTaggedValueFieldNameAndAnnotations() + userData.writeNull(IonType.SYMBOL) + } else { + if (numAnnotations > 0 && depth == 0 && ION_VERSION_MARKER_REGEX.matches(content)) { + throw IonException("Can't write a top-level symbol that is the same as an IVM.") + } + writeTaggedValueFieldNameAndAnnotations() + handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) + } + } else { content ?: throw IonException("Cannot write null.symbol for tagless symbol") - if (container.type != OpCode.TE_SYMBOL_FS) { - val expectedType = TaglessScalarType.getTaglessScalarTypeForOpcode(container.type) ?: "e-expression" + if (container.childType != OpCode.TE_SYMBOL_FS) { + val expectedType = container.expectedChildTypeName() throw IllegalArgumentException("Cannot write a symbol when a tagless $expectedType is expected.") } if (options.shouldWriteInline(SymbolKind.VALUE, content)) { - userData.writeTaglessSymbol(container.type, content) + userData.writeTaglessSymbol(container.childType, content) } else { - userData.writeTaglessSymbol(container.type, encodingContextManager.intern(content)) + userData.writeTaglessSymbol(container.childType, encodingContextManager.intern(content)) } - container.prepareNextType() - } else if (content == null) { - userData.writeNull(IonType.SYMBOL) - } else { - if (content == SystemSymbols.ION_1_0 && depth == 0) throw IonException("Can't write a top-level symbol that is the same as the IVM.") - handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) + container.prepareNextChildType() } } - override fun writeSymbolToken(content: SymbolToken?) = prepareFieldNameAndAnnotations { + override fun writeSymbolToken(content: SymbolToken?) { val container = containers.peek() if (content?.text != null) return writeSymbol(content.text) - - if (container.type != 0) { - content ?: throw IonException("Cannot write null.symbol for tagless symbol") - if (container.type != OpCode.TE_SYMBOL_FS) { - val expectedType = TaglessScalarType.getTaglessScalarTypeForOpcode(container.type) ?: "e-expression" + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() + if (content == null) { + userData.writeNull(IonType.SYMBOL) + } else { + // TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM + handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) + } + } else { + if (container.childType != OpCode.TE_SYMBOL_FS) { + val expectedType = container.expectedChildTypeName() throw IllegalArgumentException("Cannot write a symbol when a tagless $expectedType is expected.") } + content ?: throw IonException("Cannot write null.symbol for tagless symbol") // content.text is already known to be null - userData.writeTaglessSymbol(container.type, 0) - container.prepareNextType() - } else if (content == null) { - userData.writeNull(IonType.SYMBOL) - } else { - // TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM - handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) + userData.writeTaglessSymbol(container.childType, 0) + container.prepareNextChildType() } } @@ -358,40 +400,99 @@ internal class IonManagedWriter_1_1( } } - override fun writeNull() = prepareFieldNameAndAnnotations { userData.writeNull() } - override fun writeNull(type: IonType?) = prepareFieldNameAndAnnotations { userData.writeNull(type ?: IonType.NULL) } - override fun writeBool(value: Boolean) = prepareFieldNameAndAnnotations { userData.writeBool(value) } - override fun writeInt(value: Long) = prepareFieldNameAndAnnotations { + override fun writeNull() = writeNull(IonType.NULL) + + override fun writeNull(type: IonType?) { + if (type == IonType.STRUCT && depth == 0 && isIonSymbolTableAnnotationPresent()) { + throw IonException("Ion 1.0 symbol tables not permitted in the Ion 1.1 managed writer.") + } + writeTaggedValueFieldNameAndAnnotations() + userData.writeNull(type ?: IonType.NULL) + } + + override fun writeBool(value: Boolean) { + writeTaggedValueFieldNameAndAnnotations() + userData.writeBool(value) + } + + override fun writeInt(value: Long) { val container = containers.peek() - if (container.type != 0) { - userData.writeTaglessInt(container.type, value) - container.prepareNextType() - } else { + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() userData.writeInt(value) + } else { + userData.writeTaglessInt(container.childType, value) + container.prepareNextChildType() } } - // TODO: Tagless int support for BigIntegers - override fun writeInt(value: BigInteger?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.INT, userData::writeInt) } - - override fun writeFloat(value: Double) = prepareFieldNameAndAnnotations { + override fun writeInt(value: BigInteger?) { val container = containers.peek() - if (container.type != 0) { - userData.writeTaglessFloat(container.type, value) - container.prepareNextType() + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.INT, userData::writeInt) } else { + value ?: throw IonException("Cannot write null integer when tagless ${container.expectedChildTypeName()} is expected") + userData.writeTaglessInt(container.childType, value) + container.prepareNextChildType() + } + } + + override fun writeFloat(value: Double) { + val container = containers.peek() + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() userData.writeFloat(value) + } else { + userData.writeTaglessFloat(container.childType, value) + container.prepareNextChildType() + } + } + + override fun writeDecimal(value: BigDecimal?) { + val container = containers.peek() + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.DECIMAL, userData::writeDecimal) + } else { + TODO("Tagless decimals") + } + } + + override fun writeTimestamp(value: Timestamp?) { + val container = containers.peek() + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.TIMESTAMP, userData::writeTimestamp) + } else { + TODO("Tagless timestamps") } } - override fun writeDecimal(value: BigDecimal?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.DECIMAL, userData::writeDecimal) } - override fun writeTimestamp(value: Timestamp?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.TIMESTAMP, userData::writeTimestamp) } - override fun writeString(value: String?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.STRING, userData::writeString) } - override fun writeClob(value: ByteArray?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.CLOB, userData::writeClob) } - override fun writeClob(value: ByteArray?, start: Int, len: Int) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.CLOB) { userData.writeClob(it, start, len) } } + override fun writeString(value: String?) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.STRING, userData::writeString) + } + + override fun writeClob(value: ByteArray?) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.CLOB, userData::writeClob) + } - override fun writeBlob(value: ByteArray?) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.BLOB, userData::writeBlob) } - override fun writeBlob(value: ByteArray?, start: Int, len: Int) = prepareFieldNameAndAnnotations { value.writeMaybeNull(IonType.BLOB) { userData.writeBlob(it, start, len) } } + override fun writeClob(value: ByteArray?, start: Int, len: Int) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.CLOB) { userData.writeClob(it, start, len) } + } + + override fun writeBlob(value: ByteArray?) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.BLOB, userData::writeBlob) + } + + override fun writeBlob(value: ByteArray?, start: Int, len: Int) { + writeTaggedValueFieldNameAndAnnotations() + value.writeMaybeNull(IonType.BLOB) { userData.writeBlob(it, start, len) } + } override fun isFieldNameSet(): Boolean { return fieldNameId >= 0 || fieldNameText != null @@ -406,7 +507,7 @@ internal class IonManagedWriter_1_1( // Make sure we write out any symbol tables and buffered values before the IVM finish() } else { - writeSymbol("\$ion_1_1") + throw IonException("A version marker may only be written at the top level of the data stream.") } } @@ -457,48 +558,65 @@ internal class IonManagedWriter_1_1( override fun startMacro(macro: Macro) { require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } - val address = encodingContextManager.getOrAssignMacroAddress(macro) + val address = encodingContextManager.getOrAssignMacroAddress(macro, null) startMacro(encodingContextManager.getMacroNameForId(address), address, macro) } override fun startMacro(name: String, macro: Macro) { require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } - val address = encodingContextManager.getOrAssignMacroAddressAndName(name, macro) + val address = encodingContextManager.getOrAssignMacroAddress(macro, name.takeIf { options.useMacroNames }) startMacro(name, address, macro) } - private fun startMacro(name: String?, address: Int, definition: MacroImpl) = prepareFieldNameAndAnnotations { + private fun startMacro(name: String?, address: Int, definition: MacroImpl) { val container = containers.peek() - val prescribedMacroType = container.macroId - if (prescribedMacroType < 0) { - val useNames = - options.eExpressionIdentifierStrategy == ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME - if (useNames && name != null) { + if (container.childIsTaggedValue()) { + writeTaggedValueFieldNameAndAnnotations() + if (options.useMacroNames && name != null) { userData.stepInEExp(name) } else { - val includeLengthPrefix = if (definition.signature.isEmpty()) false else options.writeLengthPrefix(ContainerType.EEXP, depth + 1) + val includeLengthPrefix = if (definition.signature.isEmpty()) { + false + } else { + options.writeLengthPrefix(ContainerType.EEXP, depth + 1) + } userData.stepInEExp(address, includeLengthPrefix) } - } else { + } else if (container.childIsTaglessMacroShaped()) { + // TODO: Look up names, etc. to improve the message, if the macro is incorrect. + if (address != container.childMacroId) throw IonException("Incorrect tagless macro.") + container.prepareNextChildType() // So it's ready when we step out of the tagless EExp. userData.stepInTaglessEExp() - container.prepareNextType() + } else { + throw IonException("Cannot write a macro invocation when tagless ${container.expectedChildTypeName()} is expected") } - containers.push { it.reset(definition.signature) } + containers.push { it.resetForMacroInvocation(definition.signature) } } override fun endMacro() { - // TODO: See if there are unwritten parameters, and attempt to write `absent arg` for all of them. + val currentContainer = containers.peek() + if (currentContainer.isMacroInvocation()) { + val numArgsProvided = currentContainer.i + // If there are any missing parameters, attempt to write absent argument for them. + while (currentContainer.prepareNextChildType()) { + if (currentContainer.childIsTaggedValue()) { + absentArgument() + } else { + throw IonException("Expected ${currentContainer.signature.size} arguments, but only $numArgsProvided were given.") + } + } + } userData.stepOut() containers.pop() } override fun absentArgument() { val container = containers.peek() - if (container.type != 0) throw IonException("The argument corresponding to a tagless placeholder cannot be absent.") + if (container.childType != ContainerInfo.TAGGED_VALUE) throw IonException("The argument corresponding to a tagless placeholder cannot be absent.") userData.writeAbsentArgument() - container.prepareNextType() + container.prepareNextChildType() } /** Visible for testing */ - internal fun getOrAssignMacroAddress(macro: MacroImpl): Int = encodingContextManager.getOrAssignMacroAddress(macro) + internal fun getOrAssignMacroAddress(macro: MacroImpl): Int = encodingContextManager.getOrAssignMacroAddress(macro, null) } diff --git a/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt index 2cfbee6be..76c1456ba 100644 --- a/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/ion_1_1/IonRawWriter_1_1.kt @@ -21,6 +21,10 @@ import java.util.function.Consumer * exactly one field name. * * Most users should interact with an [IonWriter] or [IonWriter_1_1] instead of this interface. + * + * TODO: Consider updating this interface to use pairs of `start___()`/`end___()` methods for lists, sexps, structs, macros. + * + * TODO: There are unnecessary annotation methods in this interface. Remove them. */ interface IonRawWriter_1_1 { diff --git a/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt index a32f64788..7e9fe17f2 100644 --- a/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt +++ b/src/test/java/com/amazon/ion/impl/IonManagedWriter_1_1_Test.kt @@ -15,6 +15,7 @@ import com.amazon.ion.ion_1_1.IonWriter_1_1 import com.amazon.ion.ion_1_1.MacroBuilder import com.amazon.ion.ion_1_1.MacroImpl import com.amazon.ion.ion_1_1.TaglessScalarType +import com.amazon.ion.system.IonBinaryWriterBuilder import com.amazon.ion.system.IonTextWriterBuilder import com.amazon.ion.system.IonTextWriterBuilder.NewLineType import org.junit.jupiter.api.Assertions.assertArrayEquals @@ -30,6 +31,16 @@ import java.math.BigInteger /** * TODO: Ensure that tests cover all of [IonManagedWriter_1_1]. + * + * Missing test coverage includes, but not limited to: + * - type validation for tagless macro arguments + * - type validation for TE sequences with scalars + * - type validation for TE sequences with macros + * - Field names and annotations when they are/aren't allowed + * - Absent argument cases, including filling in missing arguments + * - Clobs, tagless floats, TE SExps, IVMs + * - Ion 1.0 symbol tables (including `$ion_symbol_table::null.struct`) + * - Symbols, with and without text, with and without SIDs */ internal class IonManagedWriter_1_1_Test { @@ -97,28 +108,34 @@ internal class IonManagedWriter_1_1_Test { // Helper function that writes to a writer and returns the binary Ion private fun writeBinary( closeWriter: Boolean = true, - symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE, lengthPrefixStrategy: LengthPrefixStrategy = LengthPrefixStrategy.CONTAINERS_PREFIXED, block: IonManagedWriter_1_1.() -> Unit ): ByteArray { val output = ByteArrayOutputStream() - val writer = IonManagedWriter_1_1( - userData = IonRawBinaryWriter_1_1.from(output, 1024, 1), - systemData = IonRawBinaryWriter_1_1.from(output, 1024, 1), - options = ManagedWriterOptions_1_1( - internEncodingDirectiveSymbols = false, - symbolInliningStrategy = symbolInliningStrategy, - lengthPrefixStrategy = lengthPrefixStrategy, - eExpressionIdentifierStrategy = ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME, - ), - onClose = output::close, - ) + val writer = newIon11BinaryWriter(output, symbolInliningStrategy, lengthPrefixStrategy) writer.apply(block) if (closeWriter) writer.close() return output.toByteArray() } + + private fun newIon11BinaryWriter( + output: ByteArrayOutputStream, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE, + lengthPrefixStrategy: LengthPrefixStrategy = LengthPrefixStrategy.CONTAINERS_PREFIXED, + ) = IonManagedWriter_1_1( + userData = IonRawBinaryWriter_1_1.from(output, 1024, 1), + systemData = IonRawBinaryWriter_1_1.from(output, 1024, 1), + options = ManagedWriterOptions_1_1( + internEncodingDirectiveSymbols = false, + symbolInliningStrategy = symbolInliningStrategy, + lengthPrefixStrategy = lengthPrefixStrategy, + eExpressionIdentifierStrategy = ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_ADDRESS, + ), + onClose = output::close, + ) } @Test @@ -192,18 +209,11 @@ internal class IonManagedWriter_1_1_Test { } @Test - fun `write an IVM in a container should write a symbol`() { - assertEquals( - """ - $ion_1_1 - [$ion_1_1] - """.trimIndent(), - write { - stepIn(IonType.LIST) - writeIonVersionMarker() - stepOut() - } - ) + fun `write an IVM in a container should throw an exception`() { + write(closeWriter = false) { + stepIn(IonType.LIST) + assertThrows { writeIonVersionMarker() } + } } @Test @@ -297,9 +307,6 @@ internal class IonManagedWriter_1_1_Test { endMacro() } - println("Expected: ${expected.toPrettyHexString()}") - println("Actual: ${actual.toPrettyHexString()}") - assertArrayEquals(expected, actual) } @@ -752,7 +759,7 @@ internal class IonManagedWriter_1_1_Test { fun `writeObject() should write something with a macro representation`() { val expected = """ $ion_1_1 - (:$ion set_macros (Point2D {x:(:?),y:(:?)})) + (:$ion set_macros (Point2D {x:(:? {#int}),y:(:? {#int})})) (:Point2D 2 4) """.trimIndent() @@ -783,12 +790,13 @@ internal class IonManagedWriter_1_1_Test { assertEquals(expected, actual) } + @OptIn(ExperimentalStdlibApi::class) @Test fun `writeObject() should write something with nested macro representation`() { val expected = """ $ion_1_1 - (:$ion set_macros (Polygon {vertices:(:?),fill:(:?)}) (Point2D {x:(:?),y:(:?)})) - (:Polygon [(:Point2D 0 0),(:Point2D 0 1),(:Point2D 1 1),(:Point2D 1 0)] Blue) + (:$ion set_macros (Polygon {vertices:(:?),fill:(:? {#symbol})}) (Point2D {x:(:? {#int}),y:(:? {#int})})) + (:Polygon [{:Point2D} (0 0),(0 1),(1 1),(1 0)] Blue) """.trimIndent() val data = Polygon( @@ -802,26 +810,62 @@ internal class IonManagedWriter_1_1_Test { ) val actual = write { - writeObject(data) + data.writeToMacroAware(this) } assertEquals(expected, actual) } + @Test + fun `ion 1,1 demo`() { + // Writes some data in a few different ways and prints out comparisons. + + val data = Polygon( + listOf( + Point2D(0, 3), + Point2D(0, 8), + Point2D(4, 13), + Point2D(8, 8), + Point2D(8, 3), + Point2D(4, 0), + ), + Colors.Blue, + ) + + println("\nIon 1.0 Text") + println(StringBuilder().also { IonTextWriterBuilder.standard().build(it).use(data::writeTo) }) + + println("\nIon 1.1 Text w/ macros") + println(write { data.writeToMacroAware(this) }) + + println("\nIon 1.0 Binary") + calculateSystemAndUserSizes(IonBinaryWriterBuilder.standard()::build, data::writeTo) + println("\nIon 1.1 Binary w/o macros") + calculateSystemAndUserSizes(::newIon11BinaryWriter, data::writeTo) + println("\nIon 1.1 Binary w/ macros") + calculateSystemAndUserSizes(::newIon11BinaryWriter, data::writeToMacroAware) + } + + private fun calculateSystemAndUserSizes(newWriter: (ByteArrayOutputStream) -> T, writeValue: (T) -> Unit) { + val oneValueBytes = ByteArrayOutputStream().also { newWriter(it).use(writeValue) }.toByteArray() + val twoValueBytes = ByteArrayOutputStream().also { newWriter(it).use { w -> repeat(2) { writeValue(w) } } }.toByteArray() + + val valueSize = twoValueBytes.size - oneValueBytes.size + val ivmSize = 4 + val systemValueSize = oneValueBytes.size - ivmSize - valueSize + + println("IVM (4 bytes) : ${oneValueBytes.copyOfRange(0, ivmSize).toPrettyHexString(wordsPerLine = 32)}") + println("SYSTEM ($systemValueSize bytes) : ${oneValueBytes.copyOfRange(ivmSize, ivmSize + systemValueSize).toPrettyHexString(wordsPerLine = 32)}") + println("USER ($valueSize bytes) : ${oneValueBytes.copyOfRange(ivmSize + systemValueSize, oneValueBytes.size).toPrettyHexString(wordsPerLine = 32)}") + println("TOTAL: ${oneValueBytes.size} bytes") + } + interface WriteAsIon { fun writeTo(writer: IonWriter) fun writeToMacroAware(writer: IonWriter_1_1) - - fun IonWriter.writeObject(obj: WriteAsIon) { - if (this is IonWriter_1_1) { - obj.writeToMacroAware(this) - } else { - obj.writeTo(this) - } - } } - private data class Polygon(val vertices: List, val fill: Colors?) : WriteAsIon { + private data class Polygon(val vertices: List, val fill: Colors) : WriteAsIon { init { require(vertices.size >= 3) { "A polygon must have at least 3 edges and 3 vertices" } } companion object { @@ -833,7 +877,7 @@ internal class IonManagedWriter_1_1_Test { it.fieldName("vertices") .placeholder() .fieldName("fill") - .placeholder() + .taglessPlaceholder(TaglessScalarType.SYMBOL) } .build() } @@ -843,12 +887,10 @@ internal class IonManagedWriter_1_1_Test { stepIn(IonType.STRUCT) setFieldName("vertices") stepIn(IonType.LIST) - vertices.forEach { writeObject(it) } + vertices.forEach { it.writeTo(writer) } stepOut() - if (fill != null) { - setFieldName("fill") - writeObject(fill) - } + setFieldName("fill") + fill.writeTo(this) stepOut() } } @@ -857,10 +899,10 @@ internal class IonManagedWriter_1_1_Test { with(writer) { startMacro(MACRO_NAME, MACRO) - stepIn(IonType.LIST) - vertices.forEach { writeObject(it) } + stepInTaglessElementList(Point2D.MACRO_NAME, Point2D.MACRO) + vertices.forEach { it.writeToMacroAware(this) } stepOut() - fill?.let { writeObject(it) } ?: absentArgument() + fill.writeToMacroAware(this) endMacro() } } @@ -870,9 +912,10 @@ internal class IonManagedWriter_1_1_Test { companion object { // This is a very long macro name, but by using the qualified class name, // there is almost no risk of having a name conflict with another macro. - private val MACRO_NAME = Point2D::class.simpleName!!.replace(".", "_") - private val MACRO = MacroBuilder.newBuilder().structValue { - it.fieldName("x").placeholder().fieldName("y").placeholder() + val MACRO_NAME = Point2D::class.simpleName!!.replace(".", "_") + val MACRO = MacroBuilder.newBuilder().structValue { + it.fieldName("x").taglessPlaceholder(TaglessScalarType.INT) + .fieldName("y").taglessPlaceholder(TaglessScalarType.INT) }.build() } From ca262271c2a0c11a9a1fe5a0a0e84307772849ad Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Thu, 6 Nov 2025 19:49:00 -0800 Subject: [PATCH 5/5] Add suggested changes --- .../ion/impl/IonManagedWriterOptions_1_1.kt | 8 +-- .../amazon/ion/impl/IonManagedWriter_1_1.kt | 52 ++++++++----------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt index 0bc4336d2..f4b3283b3 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriterOptions_1_1.kt @@ -117,16 +117,16 @@ sealed interface LengthPrefixStrategy { * With more context, we could enable strategies like: * - Write lists with annotation `X` as a delimited container. */ - fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean + fun shouldWriteLengthPrefix(containerType: ContainerType, depth: Int): Boolean private data object NeverPrefixed : LengthPrefixStrategy { - override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = false + override fun shouldWriteLengthPrefix(containerType: ContainerType, depth: Int): Boolean = false } private data object AlwaysPrefixed : LengthPrefixStrategy { - override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = true + override fun shouldWriteLengthPrefix(containerType: ContainerType, depth: Int): Boolean = true } private data object ContainersPrefixed : LengthPrefixStrategy { - override fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean = containerType != ContainerType.EEXP + override fun shouldWriteLengthPrefix(containerType: ContainerType, depth: Int): Boolean = containerType != ContainerType.EEXP } companion object { diff --git a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt index b3e1dfa29..db515279f 100644 --- a/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/IonManagedWriter_1_1.kt @@ -31,9 +31,9 @@ import java.util.Date * * TODO: * - Handling of shared symbol tables - * - Proper handling of user-supplied symbol tables + * - Proper handling of user-supplied symbol tables -- if we allow it at all. * - Auto-flush (for binary and text) - * - Check that arguments match the signatures of macros/templates (in all cases). + * - Check that arguments match the signatures of macros/templates, including the type of tagless values (in all cases). * - Check that values in TE lists/sexps are the right encoding (in all cases). * - If a macro has a fixed size because it only has tagless parameters, always write it without a length prefix * - Determine if it's faster to read template definitions that use prefixed vs delimited containers, and make this @@ -182,9 +182,9 @@ internal class IonManagedWriter_1_1( } } - private fun _private_clearAnnotations() { + private fun clearAnnotations() { numAnnotations = 0 - // erase the first entries to ensure old values don't leak into `_private_hasFirstAnnotation()` + // erase the first entries to ensure old values don't leak into isIonSymbolTableAnnotationPresent() annotationsIdBuffer[0] = -1 annotationsTextBuffer[0] = null } @@ -202,7 +202,7 @@ internal class IonManagedWriter_1_1( } override fun setTypeAnnotations(annotations: Array?) { - _private_clearAnnotations() + clearAnnotations() numAnnotations = annotations?.size ?: 0 ensureAnnotationSpace(numAnnotations) annotations?.forEachIndexed { index, text -> @@ -212,7 +212,7 @@ internal class IonManagedWriter_1_1( } override fun setTypeAnnotationSymbols(annotations: Array?) { - _private_clearAnnotations() + clearAnnotations() numAnnotations = annotations?.size ?: 0 ensureAnnotationSpace(numAnnotations) annotations?.forEachIndexed { index, token -> @@ -226,18 +226,18 @@ internal class IonManagedWriter_1_1( when (containerType) { IonType.LIST -> { writeTaggedValueFieldNameAndAnnotations() - userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth)) + userData.stepInList(options.shouldWriteLengthPrefix(ContainerType.LIST, newDepth)) } IonType.SEXP -> { writeTaggedValueFieldNameAndAnnotations() - userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth)) + userData.stepInSExp(options.shouldWriteLengthPrefix(ContainerType.SEXP, newDepth)) } IonType.STRUCT -> { if (depth == 0 && isIonSymbolTableAnnotationPresent()) { throw IonException("Ion 1.0 symbol tables not permitted in the Ion 1.1 managed writer.") } writeTaggedValueFieldNameAndAnnotations() - userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth)) + userData.stepInStruct(options.shouldWriteLengthPrefix(ContainerType.STRUCT, newDepth)) } else -> throw IllegalArgumentException("Not a container type: $containerType") } @@ -248,7 +248,7 @@ internal class IonManagedWriter_1_1( require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } writeTaggedValueFieldNameAndAnnotations() val macroId = encodingContextManager.getOrAssignMacroAddress(macro, name.takeIf { options.useMacroNames }) - userData.stepInTaglessElementList(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) + userData.stepInTaglessElementList(macroId, name, lengthPrefixed = options.shouldWriteLengthPrefix(ContainerType.EEXP, macroId)) containers.push { it.resetForMacroTESequence(macroId) } } @@ -262,7 +262,7 @@ internal class IonManagedWriter_1_1( require(macro is MacroImpl) { "Provided macro is not an Ion 1.1 macro." } writeTaggedValueFieldNameAndAnnotations() val macroId = encodingContextManager.getOrAssignMacroAddress(macro, name.takeIf { options.useMacroNames }) - userData.stepInTaglessElementSExp(macroId, name, lengthPrefixed = options.writeLengthPrefix(ContainerType.EEXP, macroId)) + userData.stepInTaglessElementSExp(macroId, name, lengthPrefixed = options.shouldWriteLengthPrefix(ContainerType.EEXP, macroId)) containers.push { it.resetForMacroTESequence(macroId) } } @@ -308,11 +308,7 @@ internal class IonManagedWriter_1_1( for (i in 0 until numAnnotations) { val annotationText = annotationsTextBuffer[i] val sid = annotationsIdBuffer[i] - if (sid >= 0) { - handleSymbolToken(UNKNOWN_SYMBOL_ID, annotationText, SymbolKind.ANNOTATION, userData) - } else { - handleSymbolToken(sid, annotationText, SymbolKind.ANNOTATION, userData) - } + handleSymbolToken(sid, annotationText, SymbolKind.ANNOTATION, userData) } this.numAnnotations = 0 } @@ -321,15 +317,12 @@ internal class IonManagedWriter_1_1( override fun writeSymbol(content: String?) { val container = containers.peek() if (container.childIsTaggedValue()) { - if (content == null) { - writeTaggedValueFieldNameAndAnnotations() - userData.writeNull(IonType.SYMBOL) - } else { - if (numAnnotations > 0 && depth == 0 && ION_VERSION_MARKER_REGEX.matches(content)) { + writeTaggedValueFieldNameAndAnnotations() + content.writeMaybeNull(IonType.SYMBOL) { + if (numAnnotations > 0 && depth == 0 && ION_VERSION_MARKER_REGEX.matches(it)) { throw IonException("Can't write a top-level symbol that is the same as an IVM.") } - writeTaggedValueFieldNameAndAnnotations() - handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) + handleSymbolToken(UNKNOWN_SYMBOL_ID, it, SymbolKind.VALUE, userData) } } else { content ?: throw IonException("Cannot write null.symbol for tagless symbol") @@ -351,11 +344,12 @@ internal class IonManagedWriter_1_1( if (content?.text != null) return writeSymbol(content.text) if (container.childIsTaggedValue()) { writeTaggedValueFieldNameAndAnnotations() - if (content == null) { - userData.writeNull(IonType.SYMBOL) - } else { + content.writeMaybeNull(IonType.SYMBOL) { // TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM - handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) + if (it.sid == SystemSymbols.ION_SYMBOL_TABLE_SID) { + throw IonException("Can't write a top-level symbol that is the same as an IVM.") + } + handleSymbolToken(it.sid, it.text, SymbolKind.VALUE, userData) } } else { if (container.childType != OpCode.TE_SYMBOL_FS) { @@ -384,11 +378,11 @@ internal class IonManagedWriter_1_1( /** Helper function that determines whether to write a symbol token as a SID or inline symbol */ private fun handleSymbolToken(sid: Int, text: String?, kind: SymbolKind, rawWriter: IonRawWriter_1_1, preserveEncoding: Boolean = false) { if (text == null) { - // No text. Decide whether to write $0 or some other SID if (sid == UNKNOWN_SYMBOL_ID) { // No (known) SID either. throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") } else { + // TODO: Should we always write $0 here? rawWriter.write(kind, sid) } } else if (preserveEncoding && sid < 0) { @@ -578,7 +572,7 @@ internal class IonManagedWriter_1_1( val includeLengthPrefix = if (definition.signature.isEmpty()) { false } else { - options.writeLengthPrefix(ContainerType.EEXP, depth + 1) + options.shouldWriteLengthPrefix(ContainerType.EEXP, depth + 1) } userData.stepInEExp(address, includeLengthPrefix) }