@@ -18,124 +18,122 @@ Author: Daniel Kroening, kroening@kroening.com
1818#include " invariant.h"
1919
2020#ifdef _WIN32
21- #include < util/pragma_push.def>
22- #ifdef _MSC_VER
23- #pragma warning(disable: 4668)
24- // using #if/#elif on undefined macro
25- #pragma warning(disable : 5039)
21+ # include < util/pragma_push.def>
22+ # ifdef _MSC_VER
23+ # pragma warning(disable : 4668)
24+ // using #if/#elif on undefined macro
25+ # pragma warning(disable : 5039)
2626// pointer or reference to potentially throwing function passed to extern C
27- #endif
28- #include < windows.h >
29- #include < util/pragma_pop.def >
27+ # endif
28+ # include < util/pragma_pop.def >
29+ # include < windows.h >
3030#endif
3131
3232std::string narrow (const wchar_t *s)
3333{
34- #ifdef _WIN32
34+ #ifdef _WIN32
3535
36- int slength= static_cast <int >(wcslen (s));
37- int rlength=
36+ int slength = static_cast <int >(wcslen (s));
37+ int rlength =
3838 WideCharToMultiByte (CP_UTF8, 0 , s, slength, NULL , 0 , NULL , NULL );
3939 std::string r (rlength, 0 );
4040 WideCharToMultiByte (CP_UTF8, 0 , s, slength, &r[0 ], rlength, NULL , NULL );
4141 return r;
4242
43- #else
43+ #else
4444 // dummy conversion
4545 std::string r;
4646 r.reserve (wcslen (s));
47- while (*s!= 0 )
47+ while (*s != 0 )
4848 {
49- r+= static_cast <char >(*s);
49+ r += static_cast <char >(*s);
5050 s++;
5151 }
5252
5353 return r;
54- #endif
54+ #endif
5555}
5656
5757std::wstring widen (const char *s)
5858{
59- #ifdef _WIN32
59+ #ifdef _WIN32
6060
61- int slength=static_cast <int >(strlen (s));
62- int rlength=
63- MultiByteToWideChar (CP_UTF8, 0 , s, slength, NULL , 0 );
61+ int slength = static_cast <int >(strlen (s));
62+ int rlength = MultiByteToWideChar (CP_UTF8, 0 , s, slength, NULL , 0 );
6463 std::wstring r (rlength, 0 );
6564 MultiByteToWideChar (CP_UTF8, 0 , s, slength, &r[0 ], rlength);
6665 return r;
6766
68- #else
67+ #else
6968 // dummy conversion
7069 std::wstring r;
7170 r.reserve (strlen (s));
72- while (*s!= 0 )
71+ while (*s != 0 )
7372 {
74- r+= wchar_t (*s);
73+ r += wchar_t (*s);
7574 s++;
7675 }
7776
7877 return r;
79- #endif
78+ #endif
8079}
8180
8281std::string narrow (const std::wstring &s)
8382{
84- #ifdef _WIN32
83+ #ifdef _WIN32
8584
86- int slength= static_cast <int >(s.size ());
87- int rlength=
85+ int slength = static_cast <int >(s.size ());
86+ int rlength =
8887 WideCharToMultiByte (CP_UTF8, 0 , &s[0 ], slength, NULL , 0 , NULL , NULL );
8988 std::string r (rlength, 0 );
9089 WideCharToMultiByte (CP_UTF8, 0 , &s[0 ], slength, &r[0 ], rlength, NULL , NULL );
9190 return r;
9291
93- #else
92+ #else
9493 // dummy conversion
9594 return std::string (s.begin (), s.end ());
96- #endif
95+ #endif
9796}
9897
9998std::wstring widen (const std::string &s)
10099{
101- #ifdef _WIN32
100+ #ifdef _WIN32
102101
103- int slength=static_cast <int >(s.size ());
104- int rlength=
105- MultiByteToWideChar (CP_UTF8, 0 , &s[0 ], slength, NULL , 0 );
102+ int slength = static_cast <int >(s.size ());
103+ int rlength = MultiByteToWideChar (CP_UTF8, 0 , &s[0 ], slength, NULL , 0 );
106104 std::wstring r (rlength, 0 );
107105 MultiByteToWideChar (CP_UTF8, 0 , &s[0 ], slength, &r[0 ], rlength);
108106 return r;
109107
110- #else
108+ #else
111109 // dummy conversion
112110 return std::wstring (s.begin (), s.end ());
113- #endif
111+ #endif
114112}
115113
116114// / Appends a unicode character to a utf8-encoded string
117115// / \par parameters: character to append, string to append to
118116static void utf8_append_code (unsigned int c, std::string &result)
119117{
120- if (c<= 0x7f )
121- result+= static_cast <char >(c);
122- else if (c<= 0x7ff )
118+ if (c <= 0x7f )
119+ result += static_cast <char >(c);
120+ else if (c <= 0x7ff )
123121 {
124- result+= static_cast <char >((c >> 6 ) | 0xc0 );
125- result+= static_cast <char >((c &0x3f ) | 0x80 );
122+ result += static_cast <char >((c >> 6 ) | 0xc0 );
123+ result += static_cast <char >((c & 0x3f ) | 0x80 );
126124 }
127- else if (c<= 0xffff )
125+ else if (c <= 0xffff )
128126 {
129- result+= static_cast <char >((c >> 12 ) | 0xe0 );
130- result+= static_cast <char >(((c >> 6 ) &0x3f ) | 0x80 );
131- result+= static_cast <char >((c &0x3f ) | 0x80 );
127+ result += static_cast <char >((c >> 12 ) | 0xe0 );
128+ result += static_cast <char >(((c >> 6 ) & 0x3f ) | 0x80 );
129+ result += static_cast <char >((c & 0x3f ) | 0x80 );
132130 }
133131 else
134132 {
135- result+= static_cast <char >((c >> 18 ) | 0xf0 );
136- result+= static_cast <char >(((c >> 12 ) &0x3f )| 0x80 );
137- result+= static_cast <char >(((c >> 6 ) &0x3f ) | 0x80 );
138- result+= static_cast <char >((c &0x3f ) | 0x80 );
133+ result += static_cast <char >((c >> 18 ) | 0xf0 );
134+ result += static_cast <char >(((c >> 12 ) & 0x3f ) | 0x80 );
135+ result += static_cast <char >(((c >> 6 ) & 0x3f ) | 0x80 );
136+ result += static_cast <char >((c & 0x3f ) | 0x80 );
139137 }
140138}
141139
@@ -156,13 +154,13 @@ utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s)
156154
157155std::vector<std::string> narrow_argv (int argc, const wchar_t **argv_wide)
158156{
159- if (argv_wide== nullptr )
157+ if (argv_wide == nullptr )
160158 return std::vector<std::string>();
161159
162160 std::vector<std::string> argv_narrow;
163161 argv_narrow.reserve (argc);
164162
165- for (int i= 0 ; i!= argc; ++i)
163+ for (int i = 0 ; i != argc; ++i)
166164 argv_narrow.push_back (narrow (argv_wide[i]));
167165
168166 return argv_narrow;
@@ -173,7 +171,7 @@ static void utf16_append_code(unsigned int code, std::wstring &result)
173171 // we do not treat 0xD800 to 0xDFFF, although
174172 // they are not valid unicode symbols
175173
176- if (code< 0xFFFF )
174+ if (code < 0xFFFF )
177175 {
178176 // code is encoded as one UTF16 character
179177 result += static_cast <wchar_t >(code);
@@ -185,76 +183,75 @@ static void utf16_append_code(unsigned int code, std::wstring &result)
185183 // but let's not check it programmatically
186184
187185 // encode the code in UTF16
188- code= code- 0x10000 ;
186+ code = code - 0x10000 ;
189187 const uint16_t i1 = static_cast <uint16_t >(((code >> 10 ) & 0x3ff ) | 0xD800 );
190188 result += static_cast <wchar_t >(i1);
191189 const uint16_t i2 = static_cast <uint16_t >((code & 0x3ff ) | 0xDC00 );
192190 result += static_cast <wchar_t >(i2);
193191 }
194192}
195193
196-
197194// / Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
198195// / \par parameters: String in UTF-8 format
199196// / \return String in UTF-16 format. The encoding follows the endianness of the
200197// / architecture iff swap_bytes is true.
201198std::wstring utf8_to_utf16_native_endian (const std::string &in)
202199{
203- std::wstring result;
204- result.reserve (in.size ());
205- std::string::size_type i=0 ;
206- while (i<in.size ())
200+ std::wstring result;
201+ result.reserve (in.size ());
202+ std::string::size_type i = 0 ;
203+ while (i < in.size ())
204+ {
205+ unsigned char c = in[i++];
206+ unsigned int code = 0 ;
207+ // the ifs that follow find out how many UTF8 characters (1-4) store the
208+ // next unicode character. This is determined by the few most
209+ // significant bits.
210+ if (c <= 0x7F )
207211 {
208- unsigned char c=in[i++];
209- unsigned int code=0 ;
210- // the ifs that follow find out how many UTF8 characters (1-4) store the
211- // next unicode character. This is determined by the few most
212- // significant bits.
213- if (c<=0x7F )
214- {
215- // if it's one character, then code is exactly the value
216- code=c;
217- }
218- else if (c<=0xDF && i<in.size ())
219- { // in other cases, we need to read the right number of chars and decode
220- // note: if we wanted to make sure that we capture incorrect strings,
221- // we should check that whatever follows first character starts with
222- // bits 10.
223- code = (c & 0x1Fu ) << 6 ;
224- c=in[i++];
225- code += c & 0x3Fu ;
226- }
227- else if (c<=0xEF && i+1 <in.size ())
228- {
229- code = (c & 0xFu ) << 12 ;
230- c=in[i++];
231- code += (c & 0x3Fu ) << 6 ;
232- c=in[i++];
233- code += c & 0x3Fu ;
234- }
235- else if (c<=0xF7 && i+2 <in.size ())
236- {
237- code = (c & 0x7u ) << 18 ;
238- c=in[i++];
239- code += (c & 0x3Fu ) << 12 ;
240- c=in[i++];
241- code += (c & 0x3Fu ) << 6 ;
242- c=in[i++];
243- code += c & 0x3Fu ;
244- }
245- else
246- {
247- // The string is not a valid UTF8 string! Either it has some characters
248- // missing from a multi-character unicode symbol, or it has a char with
249- // too high value.
250- // For now, let's replace the character with a space
251- code=32 ;
252- }
253-
254- utf16_append_code (code, result);
212+ // if it's one character, then code is exactly the value
213+ code = c;
214+ }
215+ else if (c <= 0xDF && i < in.size ())
216+ { // in other cases, we need to read the right number of chars and decode
217+ // note: if we wanted to make sure that we capture incorrect strings,
218+ // we should check that whatever follows first character starts with
219+ // bits 10.
220+ code = (c & 0x1Fu ) << 6 ;
221+ c = in[i++];
222+ code += c & 0x3Fu ;
223+ }
224+ else if (c <= 0xEF && i + 1 < in.size ())
225+ {
226+ code = (c & 0xFu ) << 12 ;
227+ c = in[i++];
228+ code += (c & 0x3Fu ) << 6 ;
229+ c = in[i++];
230+ code += c & 0x3Fu ;
231+ }
232+ else if (c <= 0xF7 && i + 2 < in.size ())
233+ {
234+ code = (c & 0x7u ) << 18 ;
235+ c = in[i++];
236+ code += (c & 0x3Fu ) << 12 ;
237+ c = in[i++];
238+ code += (c & 0x3Fu ) << 6 ;
239+ c = in[i++];
240+ code += c & 0x3Fu ;
241+ }
242+ else
243+ {
244+ // The string is not a valid UTF8 string! Either it has some characters
245+ // missing from a multi-character unicode symbol, or it has a char with
246+ // too high value.
247+ // For now, let's replace the character with a space
248+ code = 32 ;
255249 }
256250
257- return result;
251+ utf16_append_code (code, result);
252+ }
253+
254+ return result;
258255}
259256
260257// / Escapes non-printable characters, whitespace except for spaces, double
0 commit comments