From 60c8a23b43d480cf29834dbe782384b35ec893ea Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Mon, 30 Mar 2026 14:26:02 +0530 Subject: [PATCH 1/2] Improve the Javadoc for encode/decode methods in coder methods in coder for better clarity on context behaviour --- .../org/apache/beam/sdk/coders/Coder.java | 72 ++++++++++++++----- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index 0a3650ca133b..d001f8a93b48 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -53,6 +53,24 @@ * * @param the type of values being encoded and decoded */ +/** + *

Example usage: + *

{@code
+ * Coder coder = StringUtf8Coder.of();
+ *
+ * // Encoding a single standalone value(typically uses OUTER context)
+ * coder.encode("hello", outStream);
+ *
+ * // Encoding multiple values (NESTED context scenario)
+ * for (String value : values) {
+ *     coder.encode(value, outStream);
+ * }
+ * }
+ * + *

When multiple values are encoded into the same stream, coders must ensure + * that each value can be correctly decoded. This is typically done by encoding + * length or delimiter information. + */ public abstract class Coder implements Serializable { /** * The context in which encoding or decoding is being done. @@ -64,22 +82,32 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context: the value being encoded or decoded takes up the remainder of the - * record/stream contents. - */ + * The outer context indicates that the value being encoded or decoded + * occupies the entire remaining stream. + * + *

In this context, the coder does not need to include length or boundary + * information, since the value extends to the end of the stream. + * + *

Example: Encoding a single standalone value. + */ public static final Context OUTER = new Context(true); - /** - * The nested context: the value being encoded or decoded is (potentially) a part of a larger - * record/stream contents, and may have other parts encoded or decoded after it. - */ + * The nested context indicates that the value being encoded or decoded + * is part of a larger structure or stream containing multiple values. + * + *

In this context, the coder must include enough information (such as + * length or delimiters) to allow correct decoding of individual elements. + * + *

Example: Encoding elements inside a collection or record. + */ public static final Context NESTED = new Context(false); /** - * Whether the encoded or decoded value fills the remainder of the output or input (resp.) - * record/stream contents. If so, then the size of the decoded value can be determined from the - * remaining size of the record/stream contents, and so explicit lengths aren't required. - */ + * Indicates whether the encoded/decoded value consumes the entire remaining stream. + * + *

If true, no additional length information is required. + * If false, the coder must encode boundaries to allow correct decoding. + */ public final boolean isWholeStream; public Context(boolean isWholeStream) { @@ -116,9 +144,11 @@ public String toString() { * be encoded next to each other on the output stream, each coder should encode information to * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. - * - * @throws IOException if writing to the {@code OutputStream} fails for some reason - * @throws CoderException if the value could not be encoded for some reason + *

The behavior of encoding depends on the {@link Context} in which it is used. + * When using {@link Context#OUTER}, the encoded value may consume the entire remaining stream, + * so no additional length information is required. In contrast, when using {@link Context#NESTED}, + * the encoded value is part of a larger structure, and the coder must include sufficient + * boundary information (such as length prefixes) to allow correct decoding of individual elements. */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -136,10 +166,16 @@ public void encode(T value, OutputStream outStream, Context context) } /** - * Decodes a value of type {@code T} from the given input stream in the given context. Returns the - * decoded value. Multiple elements can be encoded next to each other on the input stream, each - * coder should encode information to know how many bytes to read when decoding. A common approach - * is to prefix the encoding with the element's encoded length. + * Decodes a value of type {@code T} from the given input stream and returns the decoded value. + * + *

When multiple elements are encoded in the same stream, the coder must be able to determine + * how many bytes to read for each element. This is typically achieved by encoding length or + * delimiter information during encoding. + * + *

The behavior of decoding depends on the {@link Context} in which it is used. + * When decoding in {@link Context#OUTER}, the value is expected to consume the entire remaining + * stream. In {@link Context#NESTED}, the value is part of a larger structure, so the coder must + * rely on encoded boundaries (such as length prefixes) to correctly extract individual elements. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason From 760905140cba0c9fc9a445fed8232473f1c2b98e Mon Sep 17 00:00:00 2001 From: SubramanyaV Date: Mon, 30 Mar 2026 18:14:27 +0530 Subject: [PATCH 2/2] Apply spotless formatting --- .../org/apache/beam/sdk/coders/Coder.java | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java index d001f8a93b48..19a2cd62eb75 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java @@ -54,7 +54,8 @@ * @param the type of values being encoded and decoded */ /** - *

Example usage: + * Example usage: + * *

{@code
  * Coder coder = StringUtf8Coder.of();
  *
@@ -67,9 +68,8 @@
  * }
  * }
* - *

When multiple values are encoded into the same stream, coders must ensure - * that each value can be correctly decoded. This is typically done by encoding - * length or delimiter information. + *

When multiple values are encoded into the same stream, coders must ensure that each value can + * be correctly decoded. This is typically done by encoding length or delimiter information. */ public abstract class Coder implements Serializable { /** @@ -82,32 +82,32 @@ public abstract class Coder implements Serializable { @Deprecated public static class Context { /** - * The outer context indicates that the value being encoded or decoded - * occupies the entire remaining stream. - * - *

In this context, the coder does not need to include length or boundary - * information, since the value extends to the end of the stream. - * - *

Example: Encoding a single standalone value. - */ + * The outer context indicates that the value being encoded or decoded occupies the entire + * remaining stream. + * + *

In this context, the coder does not need to include length or boundary information, since + * the value extends to the end of the stream. + * + *

Example: Encoding a single standalone value. + */ public static final Context OUTER = new Context(true); /** - * The nested context indicates that the value being encoded or decoded - * is part of a larger structure or stream containing multiple values. - * - *

In this context, the coder must include enough information (such as - * length or delimiters) to allow correct decoding of individual elements. - * - *

Example: Encoding elements inside a collection or record. - */ + * The nested context indicates that the value being encoded or decoded is part of a larger + * structure or stream containing multiple values. + * + *

In this context, the coder must include enough information (such as length or delimiters) + * to allow correct decoding of individual elements. + * + *

Example: Encoding elements inside a collection or record. + */ public static final Context NESTED = new Context(false); /** - * Indicates whether the encoded/decoded value consumes the entire remaining stream. - * - *

If true, no additional length information is required. - * If false, the coder must encode boundaries to allow correct decoding. - */ + * Indicates whether the encoded/decoded value consumes the entire remaining stream. + * + *

If true, no additional length information is required. If false, the coder must encode + * boundaries to allow correct decoding. + */ public final boolean isWholeStream; public Context(boolean isWholeStream) { @@ -144,11 +144,12 @@ public String toString() { * be encoded next to each other on the output stream, each coder should encode information to * know how many bytes to read when decoding. A common approach is to prefix the encoding with the * element's encoded length. - *

The behavior of encoding depends on the {@link Context} in which it is used. - * When using {@link Context#OUTER}, the encoded value may consume the entire remaining stream, - * so no additional length information is required. In contrast, when using {@link Context#NESTED}, - * the encoded value is part of a larger structure, and the coder must include sufficient - * boundary information (such as length prefixes) to allow correct decoding of individual elements. + * + *

The behavior of encoding depends on the {@link Context} in which it is used. When using + * {@link Context#OUTER}, the encoded value may consume the entire remaining stream, so no + * additional length information is required. In contrast, when using {@link Context#NESTED}, the + * encoded value is part of a larger structure, and the coder must include sufficient boundary + * information (such as length prefixes) to allow correct decoding of individual elements. */ public abstract void encode(T value, OutputStream outStream) throws CoderException, IOException; @@ -172,10 +173,10 @@ public void encode(T value, OutputStream outStream, Context context) * how many bytes to read for each element. This is typically achieved by encoding length or * delimiter information during encoding. * - *

The behavior of decoding depends on the {@link Context} in which it is used. - * When decoding in {@link Context#OUTER}, the value is expected to consume the entire remaining - * stream. In {@link Context#NESTED}, the value is part of a larger structure, so the coder must - * rely on encoded boundaries (such as length prefixes) to correctly extract individual elements. + *

The behavior of decoding depends on the {@link Context} in which it is used. When decoding + * in {@link Context#OUTER}, the value is expected to consume the entire remaining stream. In + * {@link Context#NESTED}, the value is part of a larger structure, so the coder must rely on + * encoded boundaries (such as length prefixes) to correctly extract individual elements. * * @throws IOException if reading from the {@code InputStream} fails for some reason * @throws CoderException if the value could not be decoded for some reason