From 53ae900ae36e2fe48e26b7474d62ed96ce910306 Mon Sep 17 00:00:00 2001
From: huntxu <mhuntxu@gmail.com>
Date: Thu, 23 Aug 2012 00:48:03 +0800
Subject: [PATCH 1/2] liblwqq/unicode.c: reduce memory reallocation times in
 ucs4toutf8

Make function ucs4toutf8() at most twice reallocates the memory
to make it more efficient.

Signed-off-by: huntxu <mhuntxu@gmail.com>
---
 src/liblwqq/unicode.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/liblwqq/unicode.c b/src/liblwqq/unicode.c
index d01c8e4..2cf32da 100644
--- a/src/liblwqq/unicode.c
+++ b/src/liblwqq/unicode.c
@@ -116,7 +116,16 @@ char *ucs4toutf8(const char *from)
         return NULL;
     }
     
-    char *out = NULL;
+    /* Assuming *from at least contains a single byte of '\0', in that case
+     * we return the same.
+     * As strlen("\uXXXX") = 6 >= strlen(utf8("\uXXXX")), the converted
+     * string would never be longer than the original one, thus
+     * (strlen(from) + 1) could be a proper size to be initially allocated. */
+    char *out = s_realloc(NULL, strlen(from) + 1);
+    if (!out) {
+        /* allocation failed */
+        return NULL;
+    }
     int outlen = 0;
     const char *c;
     
@@ -124,19 +133,17 @@ char *ucs4toutf8(const char *from)
         char *s;
         if (*c == '\\' && *(c + 1) == 'u') {
             s = do_ucs4toutf8(c);
-            out = s_realloc(out, outlen + strlen(s) + 1);
             snprintf(out + outlen, strlen(s) + 1, "%s", s);
-            outlen = strlen(out);
+            outlen += strlen(s);
             s_free(s);
             c += 5;
         } else {
-            out = s_realloc(out, outlen + 2);
-            out[outlen] = *c;
-            out[outlen + 1] = '\0';
-            outlen++;
-            continue;
+            out[outlen++] = *c;
         }
     }
 
+    /* always end a string even if it's empty */
+    out[outlen++] = '\0';
+    out = s_realloc(out, outlen);
     return out;
 }

From 95fa791dea94cf87f6edc86abbb675b9de918a5e Mon Sep 17 00:00:00 2001
From: huntxu <mhuntxu@gmail.com>
Date: Thu, 23 Aug 2012 01:11:07 +0800
Subject: [PATCH 2/2] liblwqq/unicode.c: unescape escaped JSON_STRING

In function ucs4toutf8, unescape \newline and '\' itself from escaped
JSON_STRING which is the input message.

Signed-off-by: huntxu <mhuntxu@gmail.com>
---
 src/liblwqq/unicode.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/liblwqq/unicode.c b/src/liblwqq/unicode.c
index 2cf32da..a1cba7c 100644
--- a/src/liblwqq/unicode.c
+++ b/src/liblwqq/unicode.c
@@ -131,12 +131,28 @@ char *ucs4toutf8(const char *from)
     
     for (c = from; *c != '\0'; ++c) {
         char *s;
-        if (*c == '\\' && *(c + 1) == 'u') {
-            s = do_ucs4toutf8(c);
-            snprintf(out + outlen, strlen(s) + 1, "%s", s);
-            outlen += strlen(s);
-            s_free(s);
-            c += 5;
+        if (*c == '\\') {
+            switch (*(c + 1)) {
+            case 'u':
+                s = do_ucs4toutf8(c);
+                snprintf(out + outlen, strlen(s) + 1, "%s", s);
+                outlen += strlen(s);
+                s_free(s);
+                c += 5;
+                break;
+            case 'n':
+                /* treat "\\n \0" as only "\0" */
+                out[outlen++] = (*(c+3) == '\0') ? '\0' : '\n';
+                c += 1;
+                break;
+            case '\\':
+                c += 1;
+                /* fall through */
+            default:
+                /* XXX: unknown escape, keep the content */
+                out[outlen++] = '\\';
+                break;
+            }
         } else {
             out[outlen++] = *c;
         }