Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 9 additions & 18 deletions src/main/cpp/charsetdecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,6 @@ class TrivialCharsetDecoder : public CharsetDecoder
TrivialCharsetDecoder& operator=(const TrivialCharsetDecoder&);
};


#if LOG4CXX_LOGCHAR_IS_UTF8
typedef TrivialCharsetDecoder UTF8CharsetDecoder;
#else
/**
* Converts from UTF-8 to std::wstring
*
Expand Down Expand Up @@ -333,7 +329,6 @@ class UTF8CharsetDecoder : public CharsetDecoder
UTF8CharsetDecoder(const UTF8CharsetDecoder&);
UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
};
#endif

/**
* Converts from ISO-8859-1 to LogString.
Expand Down Expand Up @@ -504,7 +499,11 @@ CharsetDecoder::~CharsetDecoder()
CharsetDecoder* CharsetDecoder::createDefaultDecoder()
{
#if LOG4CXX_CHARSET_UTF8
#if LOG4CXX_LOGCHAR_IS_UTF8
return new TrivialCharsetDecoder();
#else
return new UTF8CharsetDecoder();
#endif
#elif LOG4CXX_CHARSET_ISO88591 || defined(_WIN32_WCE)
return new ISOLatinCharsetDecoder();
#elif LOG4CXX_CHARSET_USASCII
Expand Down Expand Up @@ -535,19 +534,7 @@ CharsetDecoderPtr CharsetDecoder::getDefaultDecoder()

CharsetDecoderPtr CharsetDecoder::getUTF8Decoder()
{
static WideLife<CharsetDecoderPtr> decoder(new UTF8CharsetDecoder());

//
// if invoked after static variable destruction
// (if logging is called in the destructor of a static object)
// then create a new decoder.
//
if (decoder.value() == 0)
{
return std::make_shared<UTF8CharsetDecoder>();
}

return decoder;
return std::make_shared<UTF8CharsetDecoder>();
}

CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
Expand All @@ -562,7 +549,11 @@ CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset)
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF8"), LOG4CXX_STR("utf8")) ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("CP65001"), LOG4CXX_STR("cp65001")))
{
#if LOG4CXX_LOGCHAR_IS_UTF8
return std::make_shared<TrivialCharsetDecoder>();
#else
return std::make_shared<UTF8CharsetDecoder>();
#endif
}
else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
charset == LOG4CXX_STR("646") ||
Expand Down
2 changes: 1 addition & 1 deletion src/main/cpp/domconfigurator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct DOMConfigurator::DOMConfiguratorPrivate
bool appenderAdded{ false };
AppenderMap appenders;
Pool p;
CharsetDecoderPtr utf8Decoder{ CharsetDecoder::getUTF8Decoder() };
CharsetDecoderPtr utf8Decoder{ CharsetDecoder::getDecoder(LOG4CXX_STR("UTF-8")) };
apr_xml_doc* doc{ nullptr };

public: // ...structor
Expand Down
39 changes: 23 additions & 16 deletions src/main/cpp/inputstreamreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <log4cxx/helpers/exception.h>
#include <log4cxx/helpers/pool.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/stringhelper.h>

using namespace LOG4CXX_NS;
using namespace LOG4CXX_NS::helpers;
Expand Down Expand Up @@ -74,30 +75,36 @@ LogString InputStreamReader::read(Pool& p)
const size_t BUFSIZE = 4096;
ByteBuffer buf(p.pstralloc(BUFSIZE), BUFSIZE);
LogString output;
log4cxx_status_t stat{ 0 };

// read whole file
while (m_priv->in->read(buf) >= 0)
{
buf.flip();
log4cxx_status_t stat = m_priv->dec->decode(buf, output);

if (stat != 0)
{
throw IOException(LOG4CXX_STR("decode"), stat);
}

if (buf.remaining() > 0)
auto lastAvailableCount = buf.remaining();
stat = m_priv->dec->decode(buf, output);
if (buf.remaining() == lastAvailableCount)
{
if (buf.remaining() == BUFSIZE)
{
throw IOException(LOG4CXX_STR("Decoder made no progress"));
}
buf.carry();
if (stat == 0)
stat = -1;
break;
}
else
buf.carry();
}
if (stat != 0 && 0 < buf.remaining())
{
auto toHexDigit = [](int ch) -> int
{
buf.clear();
}
return (10 <= ch ? (0x61 - 10) : 0x30) + ch;
};
LogString msg(LOG4CXX_STR("Unable to decode character 0x"));
auto ch = static_cast<unsigned int>(*buf.current());
msg.push_back(toHexDigit((ch & 0xF0) >> 4));
msg.push_back(toHexDigit((ch & 0xF)));
msg += LOG4CXX_STR(" at offset ");
Pool p;
StringHelper::toString(output.size(), p, msg);
throw RuntimeException(msg);
}

return output;
Expand Down
67 changes: 66 additions & 1 deletion src/test/cpp/filetestcase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <log4cxx/helpers/inputstreamreader.h>
#include <log4cxx/helpers/fileinputstream.h>
#include <log4cxx/helpers/loglog.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/transcoder.h>

#if LOG4CXX_CFSTRING_API
#include <CoreFoundation/CFString.h>
Expand Down Expand Up @@ -58,6 +60,8 @@ LOGUNIT_CLASS(FileTestCase)
LOGUNIT_TEST(copyConstructor);
LOGUNIT_TEST(assignment);
LOGUNIT_TEST(deleteBackslashedFileName);
LOGUNIT_TEST(testSplitMultibyteUtf8);
LOGUNIT_TEST(testInvalidUtf8);
LOGUNIT_TEST_SUITE_END();

#ifdef _DEBUG
Expand Down Expand Up @@ -102,6 +106,8 @@ LOGUNIT_CLASS(FileTestCase)
}
catch (IOException& ex)
{
LOG4CXX_DECODE_CHAR(msg, ex.what());
LogLog::debug(msg);
}
}

Expand Down Expand Up @@ -206,7 +212,66 @@ LOGUNIT_CLASS(FileTestCase)
Pool pool;
/*bool deleted = */file.deleteFile(pool);
}
};

class MockInputStream : public InputStream
{
ByteBuffer m_data;
public:
MockInputStream(const char* data, size_t charCount)
: m_data(const_cast<char*>(data), charCount)
{}

int read(ByteBuffer& dst) override
{
auto availableBytes = m_data.remaining();
if (availableBytes < 1)
return -1;
int count = 0;
for (auto p = m_data.current(); count < availableBytes && dst.put(*p); ++p)
++count;
m_data.increment_position(count);
return count;
}

void close() override {}
};

/**
* Tests behavior when a multibyte UTF-8 sequence occurs on a read boundary
*/
void testSplitMultibyteUtf8()
{
Pool p;
// InputStreamReader uses a buffer of size 4096
std::string input( 4094, 'A' );
// räksmörgås.josefsson.org
input.append("\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147");
InputStreamReader reader(std::make_shared<MockInputStream>(input.c_str(), input.size()), CharsetDecoder::getUTF8Decoder());
auto contentLS = reader.read(p);
LOG4CXX_ENCODE_CHAR(content, contentLS);
LOGUNIT_ASSERT_EQUAL(input, content);
}

/**
* Tests behavior given an incomplete multibyte UTF-8 sequence in the input
*/
void testInvalidUtf8()
{
Pool p;
// 0xC2 is a generic start byte for a 2-byte sequence in UTF-8.
char input[] = { 'A', (char)0xC2, 'B', 'C', 0 };
InputStreamReader reader(std::make_shared<MockInputStream>(input, 4), CharsetDecoder::getUTF8Decoder());
try
{
reader.read(p);
LOGUNIT_ASSERT(false);
}
catch (const Exception& ex)
{
LOG4CXX_DECODE_CHAR(msg, ex.what());
LogLog::debug(msg);
}
}
};

LOGUNIT_TEST_SUITE_REGISTRATION(FileTestCase);
Loading