From b87930a52c9645f8eed6eb8fc83d4e42ea1f1c6b Mon Sep 17 00:00:00 2001 From: manmita Date: Fri, 9 Jan 2026 01:27:14 +0530 Subject: [PATCH 1/7] merge from master --- NEWS.md | 1 + inst/tests/tests.Rraw | 7 +++++++ src/fread.c | 6 +----- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 256c7450ac..d0f143e164 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,7 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. 4. `sum()` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results! +5. `fread(text=)` could segfault when reading text input ending with a `\x1a` (ASCII SUB) character after a long line, [#7407](https://github.com/Rdatatable/data.table/issues/7407)which is solved by adding check for `\x1A` at `end_of_field`. Thanks @aitap for the report and @manmita for the fix. ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aba5720a60..2a748138da 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22030,3 +22030,10 @@ if (test_bit64) local({ merged$gforce_mean, merged$true_mean ) }) + +# 7407 Test for fread() handling \x1A (ASCII SUB) at end of input +fread_sub_test_txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A") +test(2359.1, { + dt <- fread(text = fread_sub_test_txt) + nrow(dt) == 1 && names(dt) == "foo" && dt$foo[1] == paste0(strrep("a", 4096 * 100)) +}, TRUE) diff --git a/src/fread.c b/src/fread.c index 077341aa0d..be8a2a6d78 100644 --- a/src/fread.c +++ b/src/fread.c @@ -349,11 +349,7 @@ static inline bool end_of_field(const char *ch) // We use eol() because that looks at eol_one_r inside it w.r.t. \r // \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof. // Comment characters terminate a field immediately and take precedence over separators. - return *ch == sep || ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) || (commentChar && *ch == commentChar); - if (*ch == sep) return true; - if ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) return true; - if (!commentChar) return false; - return *ch == commentChar; + return *ch == sep || *ch == '\x1A'|| ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) || (commentChar && *ch == commentChar); } static inline const char *end_NA_string(const char *start) From c694a9274588dc800060fe1d93ac54d8d94d9baf Mon Sep 17 00:00:00 2001 From: manmita Date: Fri, 9 Jan 2026 01:15:51 +0530 Subject: [PATCH 2/7] fix(7407): added eof instead of x1A --- src/fread.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index be8a2a6d78..32a99ecbb0 100644 --- a/src/fread.c +++ b/src/fread.c @@ -349,7 +349,11 @@ static inline bool end_of_field(const char *ch) // We use eol() because that looks at eol_one_r inside it w.r.t. \r // \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof. // Comment characters terminate a field immediately and take precedence over separators. - return *ch == sep || *ch == '\x1A'|| ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch))) || (commentChar && *ch == commentChar); + if (*ch == sep) return true; + if (ch == eof) return true; // Check eof first to avoid reading past #7407 + if ((uint8_t)*ch <= 13 && eol(&ch)) return true; + if (!commentChar) return false; + return *ch == commentChar; } static inline const char *end_NA_string(const char *start) From 1e6f207e8ad0fd407755c1a9e8a3901ffe5f016c Mon Sep 17 00:00:00 2001 From: manmita Date: Fri, 9 Jan 2026 01:24:42 +0530 Subject: [PATCH 3/7] fix(7407): refined the test and news --- inst/tests/tests.Rraw | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2a748138da..d943b43071 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22032,8 +22032,8 @@ if (test_bit64) local({ }) # 7407 Test for fread() handling \x1A (ASCII SUB) at end of input -fread_sub_test_txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A") -test(2359.1, { - dt <- fread(text = fread_sub_test_txt) - nrow(dt) == 1 && names(dt) == "foo" && dt$foo[1] == paste0(strrep("a", 4096 * 100)) -}, TRUE) +txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A") +test(2359.1, + nchar(fread(txt)$foo[1]) == 4096 * 100, + TRUE +) From fb991867832d32f1cf36931faacb7e3a7f19e1b0 Mon Sep 17 00:00:00 2001 From: Manmita Das <34617961+manmita@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:36:58 +0530 Subject: [PATCH 4/7] Update NEWS.md for version v1.18.0 Updated NEWS.md with fixes and enhancements for fread and sum functions. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d0f143e164..f7485bcfa5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,7 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. 4. `sum()` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results! + 5. `fread(text=)` could segfault when reading text input ending with a `\x1a` (ASCII SUB) character after a long line, [#7407](https://github.com/Rdatatable/data.table/issues/7407)which is solved by adding check for `\x1A` at `end_of_field`. Thanks @aitap for the report and @manmita for the fix. ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 From b402590dce50556fc22f04cbbd7a3a99a34ff664 Mon Sep 17 00:00:00 2001 From: Manmita Das <34617961+manmita@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:38:27 +0530 Subject: [PATCH 5/7] Update NEWS.md for fread segfault issue Fixed formatting and clarified the segfault issue for fread. --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f7485bcfa5..6963c4f78a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ 4. `sum()` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results! -5. `fread(text=)` could segfault when reading text input ending with a `\x1a` (ASCII SUB) character after a long line, [#7407](https://github.com/Rdatatable/data.table/issues/7407)which is solved by adding check for `\x1A` at `end_of_field`. Thanks @aitap for the report and @manmita for the fix. +5. `fread(text=)` could segfault when reading text input ending with a `\x1a` (ASCII SUB) character after a long line, [#7407](https://github.com/Rdatatable/data.table/issues/7407) which is solved by adding check for eof. Thanks @aitap for the report and @manmita for the fix. ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 From 868e5be403c803137bdbdcb3866e2b18738dbda2 Mon Sep 17 00:00:00 2001 From: Manmita Das <34617961+manmita@users.noreply.github.com> Date: Fri, 9 Jan 2026 02:02:00 +0530 Subject: [PATCH 6/7] fix(7407): Remove comment at eof check in fread.c --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 32a99ecbb0..5c2107f4f1 100644 --- a/src/fread.c +++ b/src/fread.c @@ -350,7 +350,7 @@ static inline bool end_of_field(const char *ch) // \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof. // Comment characters terminate a field immediately and take precedence over separators. if (*ch == sep) return true; - if (ch == eof) return true; // Check eof first to avoid reading past #7407 + if (ch == eof) return true; if ((uint8_t)*ch <= 13 && eol(&ch)) return true; if (!commentChar) return false; return *ch == commentChar; From c3fa11df18b9fb3f6ee176212a37843116b9e51d Mon Sep 17 00:00:00 2001 From: Manmita Das <34617961+manmita@users.noreply.github.com> Date: Fri, 9 Jan 2026 23:42:43 +0530 Subject: [PATCH 7/7] fix(7407): Changed the test to single line --- inst/tests/tests.Rraw | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9950c2575c..1e2b985a49 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22034,7 +22034,4 @@ if (test_bit64) local({ # 7407 Test for fread() handling \x1A (ASCII SUB) at end of input txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A") -test(2359.1, - nchar(fread(txt)$foo[1]) == 4096 * 100, - TRUE -) +test(2359.1, nchar(fread(txt)$foo), 409600L)