Skip to content

Commit f43c631

Browse files
Add java.net.URLDecoder
1 parent 38a1e86 commit f43c631

File tree

1 file changed

+207
-0
lines changed

1 file changed

+207
-0
lines changed
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation. Oracle designates this
8+
* particular file as subject to the "Classpath" exception as provided
9+
* by Oracle in the LICENSE file that accompanied this code.
10+
*
11+
* This code is distributed in the hope that it will be useful, but WITHOUT
12+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
* version 2 for more details (a copy is included in the LICENSE file that
15+
* accompanied this code).
16+
*
17+
* You should have received a copy of the GNU General Public License version
18+
* 2 along with this work; if not, write to the Free Software Foundation,
19+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20+
*
21+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22+
* or visit www.oracle.com if you need additional information or have any
23+
* questions.
24+
*/
25+
26+
package java.net;
27+
28+
import java.io.*;
29+
30+
/**
31+
* Utility class for HTML form decoding. This class contains static methods
32+
* for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
33+
* MIME format.
34+
* <p>
35+
* The conversion process is the reverse of that used by the URLEncoder class. It is assumed
36+
* that all characters in the encoded string are one of the following:
37+
* &quot;{@code a}&quot; through &quot;{@code z}&quot;,
38+
* &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
39+
* &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
40+
* &quot;{@code -}&quot;, &quot;{@code _}&quot;,
41+
* &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
42+
* character &quot;{@code %}&quot; is allowed but is interpreted
43+
* as the start of a special escaped sequence.
44+
* <p>
45+
* The following rules are applied in the conversion:
46+
*
47+
* <ul>
48+
* <li>The alphanumeric characters &quot;{@code a}&quot; through
49+
* &quot;{@code z}&quot;, &quot;{@code A}&quot; through
50+
* &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
51+
* through &quot;{@code 9}&quot; remain the same.
52+
* <li>The special characters &quot;{@code .}&quot;,
53+
* &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
54+
* &quot;{@code _}&quot; remain the same.
55+
* <li>The plus sign &quot;{@code +}&quot; is converted into a
56+
* space character &quot; &nbsp; &quot; .
57+
* <li>A sequence of the form "<i>{@code %xy}</i>" will be
58+
* treated as representing a byte where <i>xy</i> is the two-digit
59+
* hexadecimal representation of the 8 bits. Then, all substrings
60+
* that contain one or more of these byte sequences consecutively
61+
* will be replaced by the character(s) whose encoding would result
62+
* in those consecutive bytes.
63+
* The encoding scheme used to decode these characters may be specified,
64+
* or if unspecified, the default encoding of the platform will be used.
65+
* </ul>
66+
* <p>
67+
* There are two possible ways in which this decoder could deal with
68+
* illegal strings. It could either leave illegal characters alone or
69+
* it could throw an {@link java.lang.IllegalArgumentException}.
70+
* Which approach the decoder takes is left to the
71+
* implementation.
72+
*
73+
* @author Mark Chamness
74+
* @author Michael McCloskey
75+
* @since 1.2
76+
*/
77+
78+
public class URLDecoder {
79+
80+
// The platform default encoding
81+
static String dfltEncName = URLEncoder.dfltEncName;
82+
83+
/**
84+
* Decodes a {@code x-www-form-urlencoded} string.
85+
* The platform's default encoding is used to determine what characters
86+
* are represented by any consecutive sequences of the form
87+
* "<i>{@code %xy}</i>".
88+
* @param s the {@code String} to decode
89+
* @deprecated The resulting string may vary depending on the platform's
90+
* default encoding. Instead, use the decode(String,String) method
91+
* to specify the encoding.
92+
* @return the newly decoded {@code String}
93+
*/
94+
@Deprecated
95+
public static String decode(String s) {
96+
97+
String str = null;
98+
99+
try {
100+
str = decode(s, dfltEncName);
101+
} catch (UnsupportedEncodingException e) {
102+
// The system should always have the platform default
103+
}
104+
105+
return str;
106+
}
107+
108+
/**
109+
* Decodes a {@code application/x-www-form-urlencoded} string using a specific
110+
* encoding scheme.
111+
* The supplied encoding is used to determine
112+
* what characters are represented by any consecutive sequences of the
113+
* form "<i>{@code %xy}</i>".
114+
* <p>
115+
* <em><strong>Note:</strong> The <a href=
116+
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
117+
* World Wide Web Consortium Recommendation</a> states that
118+
* UTF-8 should be used. Not doing so may introduce
119+
* incompatibilities.</em>
120+
*
121+
* @param s the {@code String} to decode
122+
* @param enc The name of a supported
123+
* <a href="../lang/package-summary.html#charenc">character
124+
* encoding</a>.
125+
* @return the newly decoded {@code String}
126+
* @exception UnsupportedEncodingException
127+
* If character encoding needs to be consulted, but
128+
* named character encoding is not supported
129+
* @see URLEncoder#encode(java.lang.String, java.lang.String)
130+
* @since 1.4
131+
*/
132+
public static String decode(String s, String enc)
133+
throws UnsupportedEncodingException{
134+
135+
boolean needToChange = false;
136+
int numChars = s.length();
137+
StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
138+
int i = 0;
139+
140+
if (enc.length() == 0) {
141+
throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
142+
}
143+
144+
char c;
145+
byte[] bytes = null;
146+
while (i < numChars) {
147+
c = s.charAt(i);
148+
switch (c) {
149+
case '+':
150+
sb.append(' ');
151+
i++;
152+
needToChange = true;
153+
break;
154+
case '%':
155+
/*
156+
* Starting with this instance of %, process all
157+
* consecutive substrings of the form %xy. Each
158+
* substring %xy will yield a byte. Convert all
159+
* consecutive bytes obtained this way to whatever
160+
* character(s) they represent in the provided
161+
* encoding.
162+
*/
163+
164+
try {
165+
166+
// (numChars-i)/3 is an upper bound for the number
167+
// of remaining bytes
168+
if (bytes == null)
169+
bytes = new byte[(numChars-i)/3];
170+
int pos = 0;
171+
172+
while ( ((i+2) < numChars) &&
173+
(c=='%')) {
174+
int v = Integer.parseInt(s.substring(i+1,i+3),16);
175+
if (v < 0)
176+
throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
177+
bytes[pos++] = (byte) v;
178+
i+= 3;
179+
if (i < numChars)
180+
c = s.charAt(i);
181+
}
182+
183+
// A trailing, incomplete byte encoding such as
184+
// "%x" will cause an exception to be thrown
185+
186+
if ((i < numChars) && (c=='%'))
187+
throw new IllegalArgumentException(
188+
"URLDecoder: Incomplete trailing escape (%) pattern");
189+
190+
sb.append(new String(bytes, 0, pos, enc));
191+
} catch (NumberFormatException e) {
192+
throw new IllegalArgumentException(
193+
"URLDecoder: Illegal hex characters in escape (%) pattern - "
194+
+ e.getMessage());
195+
}
196+
needToChange = true;
197+
break;
198+
default:
199+
sb.append(c);
200+
i++;
201+
break;
202+
}
203+
}
204+
205+
return (needToChange? sb.toString() : s);
206+
}
207+
}

0 commit comments

Comments
 (0)