diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index 0a3650ca133b..19a2cd62eb75 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -53,6 +53,24 @@ * * @param the type of values being encoded and decoded */ +/** + * Example usage: + * + *
{@code
+ * Coder coder = StringUtf8Coder.of();
+ *
+ * // Encoding a single standalone value(typically uses OUTER context)
+ * coder.encode("hello", outStream);
+ *
+ * // Encoding multiple values (NESTED context scenario)
+ * for (String value : values) {
+ *     coder.encode(value, outStream);
+ * }
+ * }
+ * + *

When multiple values are encoded into the same stream, coders must ensure that each value can + * be correctly decoded. This is typically done by encoding length or delimiter information. + */ public abstract class Coder implements Serializable { /** * The context in which encoding or decoding is being done. @@ -64,21 +82,31 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context: the value being encoded or decoded takes up the remainder of the - * record/stream contents. + * The outer context indicates that the value being encoded or decoded occupies the entire + * remaining stream. + * + *

In this context, the coder does not need to include length or boundary information, since + * the value extends to the end of the stream. + * + *

Example: Encoding a single standalone value. */ public static final Context OUTER = new Context(true); - /** - * The nested context: the value being encoded or decoded is (potentially) a part of a larger - * record/stream contents, and may have other parts encoded or decoded after it. + * The nested context indicates that the value being encoded or decoded is part of a larger + * structure or stream containing multiple values. + * + *

In this context, the coder must include enough information (such as length or delimiters) + * to allow correct decoding of individual elements. + * + *

Example: Encoding elements inside a collection or record. */ public static final Context NESTED = new Context(false); /** - * Whether the encoded or decoded value fills the remainder of the output or input (resp.) - * record/stream contents. If so, then the size of the decoded value can be determined from the - * remaining size of the record/stream contents, and so explicit lengths aren't required. + * Indicates whether the encoded/decoded value consumes the entire remaining stream. + * + *

If true, no additional length information is required. If false, the coder must encode + * boundaries to allow correct decoding. */ public final boolean isWholeStream; @@ -117,8 +145,11 @@ public String toString() { * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. * - * @throws IOException if writing to the {@code OutputStream} fails for some reason - * @throws CoderException if the value could not be encoded for some reason + *

The behavior of encoding depends on the {@link Context} in which it is used. When using + * {@link Context#OUTER}, the encoded value may consume the entire remaining stream, so no + * additional length information is required. In contrast, when using {@link Context#NESTED}, the + * encoded value is part of a larger structure, and the coder must include sufficient boundary + * information (such as length prefixes) to allow correct decoding of individual elements. */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -136,10 +167,16 @@ public void encode(T value, OutputStream outStream, Context context) } /** - * Decodes a value of type {@code T} from the given input stream in the given context. Returns the - * decoded value. Multiple elements can be encoded next to each other on the input stream, each - * coder should encode information to know how many bytes to read when decoding. A common approach - * is to prefix the encoding with the element's encoded length. + * Decodes a value of type {@code T} from the given input stream and returns the decoded value. + * + *

When multiple elements are encoded in the same stream, the coder must be able to determine + * how many bytes to read for each element. This is typically achieved by encoding length or + * delimiter information during encoding. + * + *

The behavior of decoding depends on the {@link Context} in which it is used. When decoding + * in {@link Context#OUTER}, the value is expected to consume the entire remaining stream. In + * {@link Context#NESTED}, the value is part of a larger structure, so the coder must rely on + * encoded boundaries (such as length prefixes) to correctly extract individual elements. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason