Skip to content

Commit 6a6a6bd

Browse files
committed
Add more robust type name to type identifier conversion
In order to certain that we don't emmit C harnesses containing invalid identifiers. The unit test includes both a function to generate the string of all printable characters and the same string as a string literal in order to show that the string does include all printable characters and to show what these characters are.
1 parent 83e2756 commit 6a6a6bd

File tree

2 files changed

+80
-18
lines changed

2 files changed

+80
-18
lines changed

src/ansi-c/type2name.cpp

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Author: Daniel Kroening, kroening@cs.cmu.edu
1919
#include <util/std_types.h>
2020
#include <util/symbol_table.h>
2121

22+
#include <regex>
23+
2224
typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;
2325

2426
static std::string type2name(
@@ -279,26 +281,47 @@ std::string type2name(const typet &type, const namespacet &ns)
279281
/// This replaces some invalid characters that can appear in type2name output.
280282
std::string type_name2type_identifier(const std::string &name)
281283
{
282-
std::string result{};
283-
for(char c : name)
284-
{
285-
switch(c)
284+
const auto replace_special_characters = [](const std::string &name) {
285+
std::string result{};
286+
for(char c : name)
286287
{
287-
case '*':
288-
result += "_ptr_";
289-
break;
290-
case '{':
291-
result += "_start_sub_";
292-
break;
293-
case '}':
294-
result += "_end_sub_";
295-
break;
296-
default:
297-
result += c;
298-
break;
288+
switch(c)
289+
{
290+
case '*':
291+
result += "_ptr_";
292+
break;
293+
case '{':
294+
result += "_start_sub_";
295+
break;
296+
case '}':
297+
result += "_end_sub_";
298+
break;
299+
default:
300+
result += c;
301+
break;
302+
}
299303
}
300-
}
301-
return result;
304+
return result;
305+
};
306+
const auto replace_invalid_characters_with_underscore =
307+
[](const std::string &identifier) {
308+
static const std::regex non_alpha_numeric{"[^A-Za-z0-9]"};
309+
return std::regex_replace(identifier, non_alpha_numeric, "_");
310+
};
311+
const auto remove_duplicate_underscores = [](const std::string &identifier) {
312+
static const std::regex duplicate_underscore{"_+"};
313+
return std::regex_replace(identifier, duplicate_underscore, "_");
314+
};
315+
const auto strip_leading_non_letters = [](const std::string &identifier) {
316+
static const std::regex identifier_regex{"[A-Za-z][A-Za-z0-9_]*"};
317+
std::smatch match_results;
318+
bool found = std::regex_search(identifier, match_results, identifier_regex);
319+
POSTCONDITION(found);
320+
return match_results.str(0);
321+
};
322+
return strip_leading_non_letters(
323+
remove_duplicate_underscores(replace_invalid_characters_with_underscore(
324+
replace_special_characters(name))));
302325
}
303326

304327
std::string type2identifier(const typet &type, const namespacet &ns)

unit/ansi-c/type2name.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Author: Thomas Spriggs
88

99
#include <testing-utils/use_catch.h>
1010

11+
#include <numeric>
12+
1113
extern std::string type_name2type_identifier(const std::string &name);
1214

1315
TEST_CASE(
@@ -24,3 +26,40 @@ TEST_CASE(
2426
CHECK(type_name2type_identifier("char*") == "char_ptr_");
2527
CHECK(type_name2type_identifier("foo{bar}") == "foo_start_sub_bar_end_sub_");
2628
}
29+
30+
/**
31+
* @return A string containing all 7-bit ascii printable characters.
32+
*/
33+
static std::string all_printable_characters()
34+
{
35+
const char first = 32;
36+
const char last = 127;
37+
std::string printable_characters(last - first, ' ');
38+
std::iota(printable_characters.begin(), printable_characters.end(), first);
39+
return printable_characters;
40+
}
41+
42+
TEST_CASE(
43+
"type_name2type_identifier invalid characters",
44+
"[core][ansi-c][type_name2type_identifier]")
45+
{
46+
const std::string printable_characters = all_printable_characters();
47+
CHECK(
48+
printable_characters ==
49+
R"( !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`)"
50+
R"(abcdefghijklmnopqrstuvwxyz{|}~)");
51+
CHECK(
52+
type_name2type_identifier(printable_characters) ==
53+
"ptr_0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_"
54+
"start_sub_end_sub_");
55+
}
56+
57+
TEST_CASE(
58+
"type_name2type_identifier leading digits",
59+
"[core][ansi-c][type_name2type_identifier]")
60+
{
61+
CHECK(
62+
type_name2type_identifier(
63+
"0123456789_banana_0123456789_split_0123456789") ==
64+
"banana_0123456789_split_0123456789");
65+
}

0 commit comments

Comments
 (0)