From 9c2a2c4e5b5aeb161dffaefbcb6a953bc401b2d6 Mon Sep 17 00:00:00 2001 From: Ben Fisher Date: Wed, 1 Mar 2017 14:10:21 -0800 Subject: [PATCH] Splitting string that contains consecutive delimiters Expected behavior: split("a SPLITSPLIT b", "SPLIT") ---> ["a ", "", " b"] Observed behavior: split("a SPLITSPLIT b", "SPLIT") ---> ["a ", "SPLIT b"] Within the loop in bsplitstrcb, i is sent too far forward upon finding a match. Here is a proposed fix including tests, open to suggestions. A few more details in the GH pull request. --- bstest.c | 16 ++++++++++++++++ bstrlib.c | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/bstest.c b/bstest.c index 773768d..4ef40ba 100644 --- a/bstest.c +++ b/bstest.c @@ -1680,6 +1680,12 @@ static int test21 (void) { struct tagbstring is = bsStatic ("is"); struct tagbstring ng = bsStatic ("ng"); struct tagbstring commas = bsStatic (",,,,"); +struct tagbstring delim = bsStatic ("aa"); +struct tagbstring beginWithDelim = bsStatic ("aaabcdaa1"); +struct tagbstring endWithDelim = bsStatic ("1aaabcdaa"); +struct tagbstring conseqDelim = bsStatic ("1aaaa1"); +struct tagbstring oneCharLeft = bsStatic ("aaaaaaa"); +struct tagbstring allDelim = bsStatic ("aaaaaa"); int ret = 0; printf ("TEST: struct bstrList * bsplit (const_bstring str, unsigned char splitChar);\n"); @@ -1707,6 +1713,16 @@ int ret = 0; ret += test21_1 (&longBstring, &is, 3); ret += test21_1 (&longBstring, &ng, 5); + /* corner cases */ + ret += test21_1 (&shortBstring, &emptyBstring, shortBstring.slen); + ret += test21_1 (&emptyBstring, &delim, 1); + ret += test21_1 (&delim, &delim, 2); + ret += test21_1 (&beginWithDelim, &delim, 3); + ret += test21_1 (&endWithDelim, &delim, 3); + ret += test21_1 (&conseqDelim, &delim, 3); + ret += test21_1 (&oneCharLeft, &delim, 4); + ret += test21_1 (&allDelim, &delim, 4); + if (0 == ret) { struct bstrList * l; unsigned char c; diff --git a/bstrlib.c b/bstrlib.c index 82fbc05..2f1f275 100644 --- a/bstrlib.c +++ b/bstrlib.c @@ -2768,13 +2768,17 @@ int i, p, ret; if (splitStr->slen == 1) return bsplitcb (str, splitStr->data[0], pos, cb, parm); - for (i=p=pos; i <= str->slen - splitStr->slen; i++) { + i = p = pos; + while (i <= str->slen - splitStr->slen) { if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { if ((ret = cb (parm, p, i - p)) < 0) return ret; i += splitStr->slen; p = i; } + else { + i++; + } } if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; return BSTR_OK;