From 53ae900ae36e2fe48e26b7474d62ed96ce910306 Mon Sep 17 00:00:00 2001 From: huntxu Date: Thu, 23 Aug 2012 00:48:03 +0800 Subject: [PATCH 1/2] liblwqq/unicode.c: reduce memory reallocation times in ucs4toutf8 Make function ucs4toutf8() at most twice reallocates the memory to make it more efficient. Signed-off-by: huntxu --- src/liblwqq/unicode.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/liblwqq/unicode.c b/src/liblwqq/unicode.c index d01c8e4..2cf32da 100644 --- a/src/liblwqq/unicode.c +++ b/src/liblwqq/unicode.c @@ -116,7 +116,16 @@ char *ucs4toutf8(const char *from) return NULL; } - char *out = NULL; + /* Assuming *from at least contains a single byte of '\0', in that case + * we return the same. + * As strlen("\uXXXX") = 6 >= strlen(utf8("\uXXXX")), the converted + * string would never be longer than the original one, thus + * (strlen(from) + 1) could be a proper size to be initially allocated. */ + char *out = s_realloc(NULL, strlen(from) + 1); + if (!out) { + /* allocation failed */ + return NULL; + } int outlen = 0; const char *c; @@ -124,19 +133,17 @@ char *ucs4toutf8(const char *from) char *s; if (*c == '\\' && *(c + 1) == 'u') { s = do_ucs4toutf8(c); - out = s_realloc(out, outlen + strlen(s) + 1); snprintf(out + outlen, strlen(s) + 1, "%s", s); - outlen = strlen(out); + outlen += strlen(s); s_free(s); c += 5; } else { - out = s_realloc(out, outlen + 2); - out[outlen] = *c; - out[outlen + 1] = '\0'; - outlen++; - continue; + out[outlen++] = *c; } } + /* always end a string even if it's empty */ + out[outlen++] = '\0'; + out = s_realloc(out, outlen); return out; } From 95fa791dea94cf87f6edc86abbb675b9de918a5e Mon Sep 17 00:00:00 2001 From: huntxu Date: Thu, 23 Aug 2012 01:11:07 +0800 Subject: [PATCH 2/2] liblwqq/unicode.c: unescape escaped JSON_STRING In function ucs4toutf8, unescape \newline and '\' itself from escaped JSON_STRING which is the input message. Signed-off-by: huntxu --- src/liblwqq/unicode.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/liblwqq/unicode.c b/src/liblwqq/unicode.c index 2cf32da..a1cba7c 100644 --- a/src/liblwqq/unicode.c +++ b/src/liblwqq/unicode.c @@ -131,12 +131,28 @@ char *ucs4toutf8(const char *from) for (c = from; *c != '\0'; ++c) { char *s; - if (*c == '\\' && *(c + 1) == 'u') { - s = do_ucs4toutf8(c); - snprintf(out + outlen, strlen(s) + 1, "%s", s); - outlen += strlen(s); - s_free(s); - c += 5; + if (*c == '\\') { + switch (*(c + 1)) { + case 'u': + s = do_ucs4toutf8(c); + snprintf(out + outlen, strlen(s) + 1, "%s", s); + outlen += strlen(s); + s_free(s); + c += 5; + break; + case 'n': + /* treat "\\n \0" as only "\0" */ + out[outlen++] = (*(c+3) == '\0') ? '\0' : '\n'; + c += 1; + break; + case '\\': + c += 1; + /* fall through */ + default: + /* XXX: unknown escape, keep the content */ + out[outlen++] = '\\'; + break; + } } else { out[outlen++] = *c; }