Skip to content

Commit f1f853c

Browse files
committed
clang-format unicode.h and unicode.cpp
This is in preparation for the next commit, which would otherwise have a lot of whitespace changes.
1 parent e4c80a5 commit f1f853c

File tree

2 files changed

+103
-109
lines changed

2 files changed

+103
-109
lines changed

src/util/unicode.cpp

Lines changed: 101 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -18,124 +18,122 @@ Author: Daniel Kroening, kroening@kroening.com
1818
#include "invariant.h"
1919

2020
#ifdef _WIN32
21-
#include <util/pragma_push.def>
22-
#ifdef _MSC_VER
23-
#pragma warning(disable:4668)
24-
// using #if/#elif on undefined macro
25-
#pragma warning(disable : 5039)
21+
# include <util/pragma_push.def>
22+
# ifdef _MSC_VER
23+
# pragma warning(disable : 4668)
24+
// using #if/#elif on undefined macro
25+
# pragma warning(disable : 5039)
2626
// pointer or reference to potentially throwing function passed to extern C
27-
#endif
28-
#include <windows.h>
29-
#include <util/pragma_pop.def>
27+
# endif
28+
# include <util/pragma_pop.def>
29+
# include <windows.h>
3030
#endif
3131

3232
std::string narrow(const wchar_t *s)
3333
{
34-
#ifdef _WIN32
34+
#ifdef _WIN32
3535

36-
int slength=static_cast<int>(wcslen(s));
37-
int rlength=
36+
int slength = static_cast<int>(wcslen(s));
37+
int rlength =
3838
WideCharToMultiByte(CP_UTF8, 0, s, slength, NULL, 0, NULL, NULL);
3939
std::string r(rlength, 0);
4040
WideCharToMultiByte(CP_UTF8, 0, s, slength, &r[0], rlength, NULL, NULL);
4141
return r;
4242

43-
#else
43+
#else
4444
// dummy conversion
4545
std::string r;
4646
r.reserve(wcslen(s));
47-
while(*s!=0)
47+
while(*s != 0)
4848
{
49-
r+=static_cast<char>(*s);
49+
r += static_cast<char>(*s);
5050
s++;
5151
}
5252

5353
return r;
54-
#endif
54+
#endif
5555
}
5656

5757
std::wstring widen(const char *s)
5858
{
59-
#ifdef _WIN32
59+
#ifdef _WIN32
6060

61-
int slength=static_cast<int>(strlen(s));
62-
int rlength=
63-
MultiByteToWideChar(CP_UTF8, 0, s, slength, NULL, 0);
61+
int slength = static_cast<int>(strlen(s));
62+
int rlength = MultiByteToWideChar(CP_UTF8, 0, s, slength, NULL, 0);
6463
std::wstring r(rlength, 0);
6564
MultiByteToWideChar(CP_UTF8, 0, s, slength, &r[0], rlength);
6665
return r;
6766

68-
#else
67+
#else
6968
// dummy conversion
7069
std::wstring r;
7170
r.reserve(strlen(s));
72-
while(*s!=0)
71+
while(*s != 0)
7372
{
74-
r+=wchar_t(*s);
73+
r += wchar_t(*s);
7574
s++;
7675
}
7776

7877
return r;
79-
#endif
78+
#endif
8079
}
8180

8281
std::string narrow(const std::wstring &s)
8382
{
84-
#ifdef _WIN32
83+
#ifdef _WIN32
8584

86-
int slength=static_cast<int>(s.size());
87-
int rlength=
85+
int slength = static_cast<int>(s.size());
86+
int rlength =
8887
WideCharToMultiByte(CP_UTF8, 0, &s[0], slength, NULL, 0, NULL, NULL);
8988
std::string r(rlength, 0);
9089
WideCharToMultiByte(CP_UTF8, 0, &s[0], slength, &r[0], rlength, NULL, NULL);
9190
return r;
9291

93-
#else
92+
#else
9493
// dummy conversion
9594
return std::string(s.begin(), s.end());
96-
#endif
95+
#endif
9796
}
9897

9998
std::wstring widen(const std::string &s)
10099
{
101-
#ifdef _WIN32
100+
#ifdef _WIN32
102101

103-
int slength=static_cast<int>(s.size());
104-
int rlength=
105-
MultiByteToWideChar(CP_UTF8, 0, &s[0], slength, NULL, 0);
102+
int slength = static_cast<int>(s.size());
103+
int rlength = MultiByteToWideChar(CP_UTF8, 0, &s[0], slength, NULL, 0);
106104
std::wstring r(rlength, 0);
107105
MultiByteToWideChar(CP_UTF8, 0, &s[0], slength, &r[0], rlength);
108106
return r;
109107

110-
#else
108+
#else
111109
// dummy conversion
112110
return std::wstring(s.begin(), s.end());
113-
#endif
111+
#endif
114112
}
115113

116114
/// Appends a unicode character to a utf8-encoded string
117115
/// \par parameters: character to append, string to append to
118116
static void utf8_append_code(unsigned int c, std::string &result)
119117
{
120-
if(c<=0x7f)
121-
result+=static_cast<char>(c);
122-
else if(c<=0x7ff)
118+
if(c <= 0x7f)
119+
result += static_cast<char>(c);
120+
else if(c <= 0x7ff)
123121
{
124-
result+=static_cast<char>((c >> 6) | 0xc0);
125-
result+=static_cast<char>((c &0x3f) | 0x80);
122+
result += static_cast<char>((c >> 6) | 0xc0);
123+
result += static_cast<char>((c & 0x3f) | 0x80);
126124
}
127-
else if(c<=0xffff)
125+
else if(c <= 0xffff)
128126
{
129-
result+=static_cast<char>((c >> 12) | 0xe0);
130-
result+=static_cast<char>(((c >> 6) &0x3f) | 0x80);
131-
result+=static_cast<char>((c &0x3f) | 0x80);
127+
result += static_cast<char>((c >> 12) | 0xe0);
128+
result += static_cast<char>(((c >> 6) & 0x3f) | 0x80);
129+
result += static_cast<char>((c & 0x3f) | 0x80);
132130
}
133131
else
134132
{
135-
result+=static_cast<char>((c >> 18) | 0xf0);
136-
result+=static_cast<char>(((c >> 12) &0x3f)| 0x80);
137-
result+=static_cast<char>(((c >> 6) &0x3f) | 0x80);
138-
result+=static_cast<char>((c &0x3f) | 0x80);
133+
result += static_cast<char>((c >> 18) | 0xf0);
134+
result += static_cast<char>(((c >> 12) & 0x3f) | 0x80);
135+
result += static_cast<char>(((c >> 6) & 0x3f) | 0x80);
136+
result += static_cast<char>((c & 0x3f) | 0x80);
139137
}
140138
}
141139

@@ -156,13 +154,13 @@ utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s)
156154

157155
std::vector<std::string> narrow_argv(int argc, const wchar_t **argv_wide)
158156
{
159-
if(argv_wide==nullptr)
157+
if(argv_wide == nullptr)
160158
return std::vector<std::string>();
161159

162160
std::vector<std::string> argv_narrow;
163161
argv_narrow.reserve(argc);
164162

165-
for(int i=0; i!=argc; ++i)
163+
for(int i = 0; i != argc; ++i)
166164
argv_narrow.push_back(narrow(argv_wide[i]));
167165

168166
return argv_narrow;
@@ -173,7 +171,7 @@ static void utf16_append_code(unsigned int code, std::wstring &result)
173171
// we do not treat 0xD800 to 0xDFFF, although
174172
// they are not valid unicode symbols
175173

176-
if(code<0xFFFF)
174+
if(code < 0xFFFF)
177175
{
178176
// code is encoded as one UTF16 character
179177
result += static_cast<wchar_t>(code);
@@ -185,76 +183,75 @@ static void utf16_append_code(unsigned int code, std::wstring &result)
185183
// but let's not check it programmatically
186184

187185
// encode the code in UTF16
188-
code=code-0x10000;
186+
code = code - 0x10000;
189187
const uint16_t i1 = static_cast<uint16_t>(((code >> 10) & 0x3ff) | 0xD800);
190188
result += static_cast<wchar_t>(i1);
191189
const uint16_t i2 = static_cast<uint16_t>((code & 0x3ff) | 0xDC00);
192190
result += static_cast<wchar_t>(i2);
193191
}
194192
}
195193

196-
197194
/// Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
198195
/// \par parameters: String in UTF-8 format
199196
/// \return String in UTF-16 format. The encoding follows the endianness of the
200197
/// architecture iff swap_bytes is true.
201198
std::wstring utf8_to_utf16_native_endian(const std::string &in)
202199
{
203-
std::wstring result;
204-
result.reserve(in.size());
205-
std::string::size_type i=0;
206-
while(i<in.size())
200+
std::wstring result;
201+
result.reserve(in.size());
202+
std::string::size_type i = 0;
203+
while(i < in.size())
204+
{
205+
unsigned char c = in[i++];
206+
unsigned int code = 0;
207+
// the ifs that follow find out how many UTF8 characters (1-4) store the
208+
// next unicode character. This is determined by the few most
209+
// significant bits.
210+
if(c <= 0x7F)
207211
{
208-
unsigned char c=in[i++];
209-
unsigned int code=0;
210-
// the ifs that follow find out how many UTF8 characters (1-4) store the
211-
// next unicode character. This is determined by the few most
212-
// significant bits.
213-
if(c<=0x7F)
214-
{
215-
// if it's one character, then code is exactly the value
216-
code=c;
217-
}
218-
else if(c<=0xDF && i<in.size())
219-
{ // in other cases, we need to read the right number of chars and decode
220-
// note: if we wanted to make sure that we capture incorrect strings,
221-
// we should check that whatever follows first character starts with
222-
// bits 10.
223-
code = (c & 0x1Fu) << 6;
224-
c=in[i++];
225-
code += c & 0x3Fu;
226-
}
227-
else if(c<=0xEF && i+1<in.size())
228-
{
229-
code = (c & 0xFu) << 12;
230-
c=in[i++];
231-
code += (c & 0x3Fu) << 6;
232-
c=in[i++];
233-
code += c & 0x3Fu;
234-
}
235-
else if(c<=0xF7 && i+2<in.size())
236-
{
237-
code = (c & 0x7u) << 18;
238-
c=in[i++];
239-
code += (c & 0x3Fu) << 12;
240-
c=in[i++];
241-
code += (c & 0x3Fu) << 6;
242-
c=in[i++];
243-
code += c & 0x3Fu;
244-
}
245-
else
246-
{
247-
// The string is not a valid UTF8 string! Either it has some characters
248-
// missing from a multi-character unicode symbol, or it has a char with
249-
// too high value.
250-
// For now, let's replace the character with a space
251-
code=32;
252-
}
253-
254-
utf16_append_code(code, result);
212+
// if it's one character, then code is exactly the value
213+
code = c;
214+
}
215+
else if(c <= 0xDF && i < in.size())
216+
{ // in other cases, we need to read the right number of chars and decode
217+
// note: if we wanted to make sure that we capture incorrect strings,
218+
// we should check that whatever follows first character starts with
219+
// bits 10.
220+
code = (c & 0x1Fu) << 6;
221+
c = in[i++];
222+
code += c & 0x3Fu;
223+
}
224+
else if(c <= 0xEF && i + 1 < in.size())
225+
{
226+
code = (c & 0xFu) << 12;
227+
c = in[i++];
228+
code += (c & 0x3Fu) << 6;
229+
c = in[i++];
230+
code += c & 0x3Fu;
231+
}
232+
else if(c <= 0xF7 && i + 2 < in.size())
233+
{
234+
code = (c & 0x7u) << 18;
235+
c = in[i++];
236+
code += (c & 0x3Fu) << 12;
237+
c = in[i++];
238+
code += (c & 0x3Fu) << 6;
239+
c = in[i++];
240+
code += c & 0x3Fu;
241+
}
242+
else
243+
{
244+
// The string is not a valid UTF8 string! Either it has some characters
245+
// missing from a multi-character unicode symbol, or it has a char with
246+
// too high value.
247+
// For now, let's replace the character with a space
248+
code = 32;
255249
}
256250

257-
return result;
251+
utf16_append_code(code, result);
252+
}
253+
254+
return result;
258255
}
259256

260257
/// Escapes non-printable characters, whitespace except for spaces, double

src/util/unicode.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ Author: Daniel Kroening, kroening@kroening.com
66
77
\*******************************************************************/
88

9-
109
#ifndef CPROVER_UTIL_UNICODE_H
1110
#define CPROVER_UTIL_UNICODE_H
1211

@@ -57,10 +56,8 @@ std::vector<const char *> to_c_str_array(It b, It e)
5756
{
5857
// Assumes that walking the range will be faster than repeated allocation
5958
std::vector<const char *> ret(std::distance(b, e) + 1, nullptr);
60-
std::transform(b, e, std::begin(ret), [] (const std::string & s)
61-
{
62-
return s.c_str();
63-
});
59+
std::transform(
60+
b, e, std::begin(ret), [](const std::string &s) { return s.c_str(); });
6461
return ret;
6562
}
6663

0 commit comments

Comments
 (0)