Skip to content

Commit d8e82d4

Browse files
committed
Add docs and tests for FormUrlEncodedSink
1 parent 52ab394 commit d8e82d4

4 files changed

Lines changed: 312 additions & 9 deletions

File tree

aws/client/aws-client-awsquery/src/main/java/software/amazon/smithy/java/aws/client/awsquery/AwsQueryClientProtocol.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public final class AwsQueryClientProtocol extends HttpClientProtocol {
3838

3939
private static final String CONTENT_TYPE = "application/x-www-form-urlencoded";
4040
private static final List<String> CONTENT_TYPE_LIST = List.of(CONTENT_TYPE);
41+
public static final HttpHeaders CONTENT_TYPE_HEADERS = HttpHeaders.of(Map.of("Content-Type", CONTENT_TYPE_LIST));
4142

4243
private final ShapeId service;
4344
private final String version;
@@ -80,7 +81,7 @@ public <I extends SerializableStruct, O extends SerializableStruct> HttpRequest
8081
return HttpRequest.builder()
8182
.method("POST")
8283
.uri(endpoint)
83-
.headers(HttpHeaders.of(Map.of("Content-Type", CONTENT_TYPE_LIST)))
84+
.headers(CONTENT_TYPE_HEADERS)
8485
.body(DataStream.ofByteBuffer(body, CONTENT_TYPE))
8586
.build();
8687
}

aws/client/aws-client-awsquery/src/main/java/software/amazon/smithy/java/aws/client/awsquery/FormUrlEncodedSink.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
import java.nio.ByteBuffer;
99
import java.util.Arrays;
1010

11+
/**
12+
* A byte buffer sink for building URL-encoded form data using RFC 3986 percent-encoding.
13+
*
14+
* <p>This uses RFC 3986 unreserved characters (A-Z, a-z, 0-9, '-', '.', '_', '~') which pass through
15+
* unencoded, while all other characters are percent-encoded as UTF-8 bytes. This differs from the
16+
* application/x-www-form-urlencoded spec which encodes space as '+', but AWS Query protocol expects
17+
* RFC 3986 encoding.
18+
*/
1119
final class FormUrlEncodedSink {
1220
private static final byte[] HEX = {
1321
'0',
@@ -72,15 +80,13 @@ void writeUrlEncoded(String s) {
7280
} else if (c < 0x800) {
7381
writePercentEncoded(0xC0 | (c >> 6));
7482
writePercentEncoded(0x80 | (c & 0x3F));
75-
} else if (Character.isHighSurrogate(c) && i + 1 < len) {
83+
} else if (Character.isHighSurrogate(c) && i + 1 < len && Character.isLowSurrogate(s.charAt(i + 1))) {
7684
char low = s.charAt(++i);
77-
if (Character.isLowSurrogate(low)) {
78-
int cp = Character.toCodePoint(c, low);
79-
writePercentEncoded(0xF0 | (cp >> 18));
80-
writePercentEncoded(0x80 | ((cp >> 12) & 0x3F));
81-
writePercentEncoded(0x80 | ((cp >> 6) & 0x3F));
82-
writePercentEncoded(0x80 | (cp & 0x3F));
83-
}
85+
int cp = Character.toCodePoint(c, low);
86+
writePercentEncoded(0xF0 | (cp >> 18));
87+
writePercentEncoded(0x80 | ((cp >> 12) & 0x3F));
88+
writePercentEncoded(0x80 | ((cp >> 6) & 0x3F));
89+
writePercentEncoded(0x80 | (cp & 0x3F));
8490
} else {
8591
writePercentEncoded(0xE0 | (c >> 12));
8692
writePercentEncoded(0x80 | ((c >> 6) & 0x3F));
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package software.amazon.smithy.java.aws.client.awsquery;
7+
8+
import org.junit.jupiter.api.Test;
9+
import org.junit.jupiter.params.ParameterizedTest;
10+
import org.junit.jupiter.params.provider.Arguments;
11+
import org.junit.jupiter.params.provider.MethodSource;
12+
import org.junit.jupiter.params.provider.ValueSource;
13+
14+
import java.nio.ByteBuffer;
15+
import java.nio.charset.StandardCharsets;
16+
import java.util.stream.Stream;
17+
18+
import static org.hamcrest.MatcherAssert.assertThat;
19+
import static org.hamcrest.Matchers.equalTo;
20+
import static software.amazon.smithy.java.io.ByteBufferUtils.getUTF8String;
21+
22+
class FormUrlEncodedSinkTest {
23+
24+
@ParameterizedTest
25+
@ValueSource(strings = {
26+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
27+
"abcdefghijklmnopqrstuvwxyz",
28+
"0123456789",
29+
"-._~",
30+
"Hello",
31+
"test123",
32+
"a-b.c_d~e"
33+
})
34+
void unreservedCharactersPassThrough(String input) {
35+
var sink = new FormUrlEncodedSink();
36+
sink.writeUrlEncoded(input);
37+
assertThat(getUTF8String(sink.finish()), equalTo(input));
38+
}
39+
40+
@ParameterizedTest
41+
@MethodSource("reservedCharactersProvider")
42+
void reservedCharactersArePercentEncoded(String input, String expected) {
43+
var sink = new FormUrlEncodedSink();
44+
sink.writeUrlEncoded(input);
45+
assertThat(getUTF8String(sink.finish()), equalTo(expected));
46+
}
47+
48+
static Stream<Arguments> reservedCharactersProvider() {
49+
return Stream.of(
50+
Arguments.of(" ", "%20"),
51+
Arguments.of("!", "%21"),
52+
Arguments.of("#", "%23"),
53+
Arguments.of("$", "%24"),
54+
Arguments.of("%", "%25"),
55+
Arguments.of("&", "%26"),
56+
Arguments.of("'", "%27"),
57+
Arguments.of("(", "%28"),
58+
Arguments.of(")", "%29"),
59+
Arguments.of("*", "%2A"),
60+
Arguments.of("+", "%2B"),
61+
Arguments.of(",", "%2C"),
62+
Arguments.of("/", "%2F"),
63+
Arguments.of(":", "%3A"),
64+
Arguments.of(";", "%3B"),
65+
Arguments.of("=", "%3D"),
66+
Arguments.of("?", "%3F"),
67+
Arguments.of("@", "%40"),
68+
Arguments.of("[", "%5B"),
69+
Arguments.of("]", "%5D"),
70+
Arguments.of("hello world", "hello%20world"),
71+
Arguments.of("a=b&c=d", "a%3Db%26c%3Dd"),
72+
Arguments.of("foo/bar", "foo%2Fbar")
73+
);
74+
}
75+
76+
@ParameterizedTest
77+
@MethodSource("utf8TwoByteProvider")
78+
void twoByteUtf8CharactersAreEncoded(String input, String expected) {
79+
var sink = new FormUrlEncodedSink();
80+
sink.writeUrlEncoded(input);
81+
assertThat(getUTF8String(sink.finish()), equalTo(expected));
82+
}
83+
84+
static Stream<Arguments> utf8TwoByteProvider() {
85+
return Stream.of(
86+
Arguments.of("é", "%C3%A9"),
87+
Arguments.of("ñ", "%C3%B1"),
88+
Arguments.of("ü", "%C3%BC"),
89+
Arguments.of("café", "caf%C3%A9"),
90+
Arguments.of("©", "%C2%A9")
91+
);
92+
}
93+
94+
@ParameterizedTest
95+
@MethodSource("utf8ThreeByteProvider")
96+
void threeByteUtf8CharactersAreEncoded(String input, String expected) {
97+
var sink = new FormUrlEncodedSink();
98+
sink.writeUrlEncoded(input);
99+
assertThat(getUTF8String(sink.finish()), equalTo(expected));
100+
}
101+
102+
static Stream<Arguments> utf8ThreeByteProvider() {
103+
return Stream.of(
104+
Arguments.of("€", "%E2%82%AC"),
105+
Arguments.of("中", "%E4%B8%AD"),
106+
Arguments.of("日本", "%E6%97%A5%E6%9C%AC"),
107+
Arguments.of("☃", "%E2%98%83")
108+
);
109+
}
110+
111+
@ParameterizedTest
112+
@MethodSource("utf8FourByteProvider")
113+
void fourByteUtf8SurrogatePairsAreEncoded(String input, String expected) {
114+
var sink = new FormUrlEncodedSink();
115+
sink.writeUrlEncoded(input);
116+
assertThat(getUTF8String(sink.finish()), equalTo(expected));
117+
}
118+
119+
static Stream<Arguments> utf8FourByteProvider() {
120+
return Stream.of(
121+
Arguments.of("🎉", "%F0%9F%8E%89"),
122+
Arguments.of("😀", "%F0%9F%98%80"),
123+
Arguments.of("𝄞", "%F0%9D%84%9E"),
124+
Arguments.of("hello🎉world", "hello%F0%9F%8E%89world")
125+
);
126+
}
127+
128+
@Test
129+
void writeUrlEncodedWithEmptyString() {
130+
var sink = new FormUrlEncodedSink();
131+
sink.writeUrlEncoded("");
132+
assertThat(getUTF8String(sink.finish()), equalTo(""));
133+
}
134+
135+
@Test
136+
void writeUrlEncodedWithMixedContent() {
137+
var sink = new FormUrlEncodedSink();
138+
sink.writeUrlEncoded("Hello World! café 日本 🎉");
139+
assertThat(getUTF8String(sink.finish()),
140+
equalTo("Hello%20World%21%20caf%C3%A9%20%E6%97%A5%E6%9C%AC%20%F0%9F%8E%89"));
141+
}
142+
143+
@ParameterizedTest
144+
@ValueSource(ints = {0, 1, 9, 10, 99, 100, 999, 1000, 12345, 999999, Integer.MAX_VALUE})
145+
void writeIntPositiveValues(int value) {
146+
var sink = new FormUrlEncodedSink();
147+
sink.writeInt(value);
148+
assertThat(getUTF8String(sink.finish()), equalTo(Integer.toString(value)));
149+
}
150+
151+
@ParameterizedTest
152+
@ValueSource(ints = {-1, -10, -999, Integer.MIN_VALUE})
153+
void writeIntNegativeValues(int value) {
154+
var sink = new FormUrlEncodedSink();
155+
sink.writeInt(value);
156+
assertThat(getUTF8String(sink.finish()), equalTo(Integer.toString(value)));
157+
}
158+
159+
@Test
160+
void writeAsciiSimpleString() {
161+
var sink = new FormUrlEncodedSink();
162+
sink.writeAscii("Action=GetUser");
163+
assertThat(getUTF8String(sink.finish()), equalTo("Action=GetUser"));
164+
}
165+
166+
@Test
167+
void writeAsciiEmptyString() {
168+
var sink = new FormUrlEncodedSink();
169+
sink.writeAscii("");
170+
assertThat(getUTF8String(sink.finish()), equalTo(""));
171+
}
172+
173+
@Test
174+
void writeByteSingleByte() {
175+
var sink = new FormUrlEncodedSink();
176+
sink.writeByte('&');
177+
assertThat(getUTF8String(sink.finish()), equalTo("&"));
178+
}
179+
180+
@Test
181+
void writeByteMultipleBytes() {
182+
var sink = new FormUrlEncodedSink();
183+
sink.writeByte('a');
184+
sink.writeByte('=');
185+
sink.writeByte('b');
186+
assertThat(getUTF8String(sink.finish()), equalTo("a=b"));
187+
}
188+
189+
@Test
190+
void writeBytesFromArray() {
191+
var sink = new FormUrlEncodedSink();
192+
byte[] data = "Hello".getBytes(StandardCharsets.UTF_8);
193+
sink.writeBytes(data, 0, data.length);
194+
assertThat(getUTF8String(sink.finish()), equalTo("Hello"));
195+
}
196+
197+
@Test
198+
void writeBytesWithOffset() {
199+
var sink = new FormUrlEncodedSink();
200+
byte[] data = "xxHelloxx".getBytes(StandardCharsets.UTF_8);
201+
sink.writeBytes(data, 2, 5);
202+
assertThat(getUTF8String(sink.finish()), equalTo("Hello"));
203+
}
204+
205+
@Test
206+
void combineMultipleWriteOperations() {
207+
var sink = new FormUrlEncodedSink();
208+
sink.writeAscii("Action=Test");
209+
sink.writeByte('&');
210+
sink.writeAscii("Index=");
211+
sink.writeInt(42);
212+
sink.writeByte('&');
213+
sink.writeAscii("Name=");
214+
sink.writeUrlEncoded("hello world");
215+
assertThat(getUTF8String(sink.finish()), equalTo("Action=Test&Index=42&Name=hello%20world"));
216+
}
217+
218+
@Test
219+
void bufferGrowsBeyondInitialCapacity() {
220+
var sink = new FormUrlEncodedSink(8);
221+
sink.writeAscii("This is a much longer string that exceeds the initial capacity");
222+
assertThat(getUTF8String(sink.finish()),
223+
equalTo("This is a much longer string that exceeds the initial capacity"));
224+
}
225+
226+
@Test
227+
void bufferGrowsWithUrlEncodedContent() {
228+
var sink = new FormUrlEncodedSink(10);
229+
sink.writeUrlEncoded("Special chars: !@#$%^&*()");
230+
assertThat(getUTF8String(sink.finish()),
231+
equalTo("Special%20chars%3A%20%21%40%23%24%25%5E%26%2A%28%29"));
232+
}
233+
234+
@Test
235+
void finishReturnsByteBufferWithCorrectPosition() {
236+
var sink = new FormUrlEncodedSink();
237+
sink.writeAscii("test");
238+
ByteBuffer result = sink.finish();
239+
assertThat(result.position(), equalTo(0));
240+
assertThat(result.remaining(), equalTo(4));
241+
}
242+
243+
@Test
244+
void hexEncodingUsesUppercase() {
245+
var sink = new FormUrlEncodedSink();
246+
sink.writeUrlEncoded("ÿ");
247+
String result = getUTF8String(sink.finish());
248+
assertThat(result, equalTo("%C3%BF"));
249+
assertThat(result.contains("a") || result.contains("b") || result.contains("c")
250+
|| result.contains("d") || result.contains("e") || result.contains("f"), equalTo(false));
251+
}
252+
253+
@Test
254+
void unpairedHighSurrogateIsEncodedAsSingleCharacter() {
255+
var sink = new FormUrlEncodedSink();
256+
// High surrogate \uD83C without a following low surrogate
257+
sink.writeUrlEncoded("a\uD83Cb");
258+
String result = getUTF8String(sink.finish());
259+
// High surrogate encoded as 3-byte sequence, then 'b' passes through
260+
assertThat(result, equalTo("a%ED%A0%BCb"));
261+
}
262+
263+
@Test
264+
void highSurrogateFollowedByNonSurrogateEncodesEachSeparately() {
265+
var sink = new FormUrlEncodedSink();
266+
// High surrogate \uD83C followed by regular char 'X' (not a low surrogate)
267+
sink.writeUrlEncoded("\uD83CX");
268+
String result = getUTF8String(sink.finish());
269+
// High surrogate encoded as 3-byte, then X passes through
270+
assertThat(result, equalTo("%ED%A0%BCX"));
271+
}
272+
273+
@Test
274+
void highSurrogateAtEndOfStringIsEncoded() {
275+
var sink = new FormUrlEncodedSink();
276+
// High surrogate at end with no following character
277+
sink.writeUrlEncoded("test\uD83C");
278+
String result = getUTF8String(sink.finish());
279+
assertThat(result, equalTo("test%ED%A0%BC"));
280+
}
281+
282+
@Test
283+
void lowSurrogateAloneIsEncoded() {
284+
var sink = new FormUrlEncodedSink();
285+
// Lone low surrogate (no preceding high surrogate)
286+
sink.writeUrlEncoded("a\uDE89b");
287+
String result = getUTF8String(sink.finish());
288+
assertThat(result, equalTo("a%ED%BA%89b"));
289+
}
290+
}

io/src/main/java/software/amazon/smithy/java/io/ByteBufferUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import java.io.InputStream;
99
import java.nio.ByteBuffer;
10+
import java.nio.charset.StandardCharsets;
1011
import java.util.Base64;
1112

1213
public final class ByteBufferUtils {
@@ -24,6 +25,11 @@ public static String base64Encode(ByteBuffer buffer) {
2425
return Base64.getEncoder().encodeToString(bytes);
2526
}
2627

28+
public static String getUTF8String(ByteBuffer buffer) {
29+
var bytes = getBytes(buffer);
30+
return new String(bytes, StandardCharsets.UTF_8);
31+
}
32+
2733
public static byte[] getBytes(ByteBuffer buffer) {
2834
if (isExact(buffer)) {
2935
return buffer.array();

0 commit comments

Comments
 (0)