diff --git a/regression/cbmc/xml-nonprintable-chars/main.c b/regression/cbmc/xml-nonprintable-chars/main.c new file mode 100644 index 00000000000..2f25ac558c9 --- /dev/null +++ b/regression/cbmc/xml-nonprintable-chars/main.c @@ -0,0 +1,8 @@ +#include + +int main(void) +{ + const unsigned char *str = (unsigned char *)"\x00"; + assert(*str == '\x01'); + return 0; +} diff --git a/regression/cbmc/xml-nonprintable-chars/test.desc b/regression/cbmc/xml-nonprintable-chars/test.desc new file mode 100644 index 00000000000..65b43da4ea5 --- /dev/null +++ b/regression/cbmc/xml-nonprintable-chars/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--trace --xml-ui +^EXIT=10$ +^SIGNAL=0$ +VERIFICATION FAILED +-- +XML does not support escaping non-printable character +-- +Test that cbmc with --trace --xml-ui handles non-printable characters +gracefully without crashing when they appear in counterexample traces. +This test verifies that the fix for character 0x01 allows XML generation +to complete successfully by using numeric character references. diff --git a/src/util/xml.cpp b/src/util/xml.cpp index 803037b68a1..6781fd38ee8 100644 --- a/src/util/xml.cpp +++ b/src/util/xml.cpp @@ -107,11 +107,16 @@ void xmlt::escape(const std::string &s, std::ostream &out) break; default: - DATA_INVARIANT( - static_cast(ch) >= 32u, - "XML does not support escaping non-printable character " + - std::to_string((unsigned char)ch)); - out << ch; + // Handle non-printable characters by escaping them as numeric character + // references instead of crashing + if(static_cast(ch) < 32u) + { + out << "&#" << std::to_string((unsigned char)ch) << ';'; + } + else + { + out << ch; + } } } } @@ -148,11 +153,16 @@ void xmlt::escape_attribute(const std::string &s, std::ostream &out) break; default: - DATA_INVARIANT( - static_cast(ch) >= 32u, - "XML does not support escaping non-printable character " + - std::to_string((unsigned char)ch)); - out << ch; + // Handle non-printable characters by escaping them as numeric character + // references instead of crashing + if(static_cast(ch) < 32u) + { + out << "&#" << std::to_string((unsigned char)ch) << ';'; + } + else + { + out << ch; + } } } } diff --git a/unit/util/xml.cpp b/unit/util/xml.cpp index d12eaa21391..48712aa9736 100644 --- a/unit/util/xml.cpp +++ b/unit/util/xml.cpp @@ -6,9 +6,12 @@ Author: Thomas Kiley \*******************************************************************/ -#include #include +#include + +#include + TEST_CASE("xml_equal", "[core][util][xml]") { SECTION("Empty xml") @@ -97,3 +100,48 @@ TEST_CASE("xml_equal", "[core][util][xml]") } } } + +TEST_CASE("xml_escape_nonprintable", "[core][util][xml]") +{ + SECTION("Escaping non-printable characters in attributes") + { + xmlt node{"test"}; + node.set_attribute("value", std::string("\x01\x02\x1F", 3)); + + std::ostringstream out; + node.output(out); + + std::string result = out.str(); + // Should contain numeric character references for non-printable chars + REQUIRE(result.find("") != std::string::npos); + REQUIRE(result.find("") != std::string::npos); + REQUIRE(result.find("") != std::string::npos); + } + + SECTION("Escaping non-printable characters in data") + { + xmlt node{"test"}; + node.data = std::string("\x01\x02\x1F", 3); + + std::ostringstream out; + node.output(out); + + std::string result = out.str(); + // Should contain numeric character references for non-printable chars + REQUIRE(result.find("") != std::string::npos); + REQUIRE(result.find("") != std::string::npos); + REQUIRE(result.find("") != std::string::npos); + } + + SECTION("Standard printable characters unchanged") + { + xmlt node{"test"}; + node.set_attribute("value", "Hello World!"); + + std::ostringstream out; + node.output(out); + + std::string result = out.str(); + REQUIRE(result.find("Hello World!") != std::string::npos); + } +}