diff --git a/NEWS.md b/NEWS.md index 197b16e2c..ea2a8a8ee 100644 --- a/NEWS.md +++ b/NEWS.md @@ -28,6 +28,8 @@ 4. `sum()` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results! +5. `fread(text=)` could segfault when reading text input ending with a `\x1a` (ASCII SUB) character after a long line, [#7407](https://github.com/Rdatatable/data.table/issues/7407) which is solved by adding check for eof. Thanks @aitap for the report and @manmita for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 58f9b021f..1e2b985a4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22031,3 +22031,7 @@ if (test_bit64) local({ merged$gforce_mean, merged$true_mean ) }) + +# 7407 Test for fread() handling \x1A (ASCII SUB) at end of input +txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A") +test(2359.1, nchar(fread(txt)$foo), 409600L) diff --git a/src/fread.c b/src/fread.c index 077341aa0..5c2107f4f 100644 --- a/src/fread.c +++ b/src/fread.c @@ -349,9 +349,9 @@ static inline bool end_of_field(const char *ch) // We use eol() because that looks at eol_one_r inside it w.r.t. \r // \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof. // Comment characters terminate a field immediately and take precedence over separators. - return *ch == sep || ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) || (commentChar && *ch == commentChar); if (*ch == sep) return true; - if ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) return true; + if (ch == eof) return true; + if ((uint8_t)*ch <= 13 && eol(&ch)) return true; if (!commentChar) return false; return *ch == commentChar; }