From c4fd110dd4d3ad60b2f5a9edfa190a833c835231 Mon Sep 17 00:00:00 2001 From: Greg Faletto Date: Tue, 19 May 2026 22:44:26 -0700 Subject: [PATCH 1/2] fix: validate quantile_by_key in arx_forecaster and flatline_forecaster (#229) arx_forecaster() and flatline_forecaster() now error at the forecaster boundary when quantile_by_key contains columns that are not in key_colnames(epi_data), instead of silently dropping them downstream. For flatline_forecaster() in particular the layer-level warning was invisible because forecast(wf) is wrapped in suppressWarnings(), so users got no signal at all on invalid input. Also adds a warning when arx_forecaster() is called with a quantile- output trainer (quantile_reg() or rand_forest with engine "grf_quantiles") plus a non-empty quantile_by_key. The argument is silently dropped in that code branch; the warning surfaces the no-op without breaking working code. The 2023 headline failure (flatline crashing on a valid key) no longer reproduces - residuals.flatline carries geo_value through and the call works cleanly. cdc_baseline_forecaster uses a different layer with the same parallel-structure vulnerability; deferred to a follow-up issue. Version bumped 0.2.5 -> 0.2.7 (skipping 0.2.6 for the open PR #480 collision). Co-Authored-By: Claude Opus 4.7 (1M context) --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ R/arx_forecaster.R | 25 +++++++++++++++++++++++ R/flatline_forecaster.R | 14 +++++++++++++ tests/testthat/test-arx_forecaster.R | 21 +++++++++++++++++++ tests/testthat/test-flatline_forecaster.R | 10 +++++++++ 6 files changed, 76 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 07e86da4..95e42583 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.2.5 +Version: 0.2.7 Authors@R: c( person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 38adef1b..00f74538 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,11 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicate PR's. +# epipredict 0.2.7 + +- `arx_forecaster()` and `flatline_forecaster()` now error early when `quantile_by_key` contains columns that are not keys of the input `epi_df`, rather than silently dropping the invalid keys (#229). +- `arx_forecaster()` now warns when `quantile_by_key` is supplied with a quantile-output trainer (`quantile_reg()`, `rand_forest()` with engine `"grf_quantiles"`), where the argument would otherwise be silently ignored (#229). + # epipredict 0.2.5 - Fix `arx_forecaster()` and `arx_fcast_epi_workflow()` so that the error raised when `forecast_date + ahead != target_date` reports the actual validation message rather than a cryptic `cli` template-evaluation error (#473). diff --git a/R/arx_forecaster.R b/R/arx_forecaster.R index 4252b2b0..bc479143 100644 --- a/R/arx_forecaster.R +++ b/R/arx_forecaster.R @@ -127,6 +127,20 @@ arx_fcast_epi_workflow <- function( if (!(is.null(trainer) || is_regression(trainer))) { cli_abort("`trainer` must be a {.pkg parsnip} model of mode 'regression'.") } + if (length(args_list$quantile_by_key) > 0L) { + valid_keys <- key_colnames(epi_data) + missing_keys <- setdiff(args_list$quantile_by_key, valid_keys) + if (length(missing_keys) > 0L) { + cli_abort( + c( + "Some {.arg quantile_by_key} columns are not key columns of the input {.cls epi_df}.", + "!" = "Missing: {.val {missing_keys}}.", + i = "Available keys: {.val {valid_keys}}." + ), + class = "epipredict__arx_forecaster__quantile_by_key_invalid" + ) + } + } # forecast_date is above all what they set; # if they don't and they're not adjusting latency, it defaults to the max time_value # if they're adjusting, it defaults to the as_of @@ -194,6 +208,17 @@ arx_fcast_epi_workflow <- function( f <- frosting() %>% layer_predict() # %>% layer_naomit() is_quantile_reg <- inherits(trainer, "quantile_reg") | (inherits(trainer, "rand_forest") & trainer$engine == "grf_quantiles") + if (is_quantile_reg && length(args_list$quantile_by_key) > 0L) { + cli_warn( + paste0( + "{.arg quantile_by_key} (set to {.val {args_list$quantile_by_key}}) ", + "has no effect when the trainer produces quantile distributions ", + "directly (e.g., {.fn quantile_reg}, {.fn rand_forest} with engine ", + "{.val grf_quantiles}). The argument is being ignored." + ), + class = "epipredict__arx_forecaster__quantile_by_key_ignored" + ) + } if (is_quantile_reg) { # add all quantile_level to the forecaster and update postprocessor if (inherits(trainer, "quantile_reg")) { diff --git a/R/flatline_forecaster.R b/R/flatline_forecaster.R index 617d703e..2f6d4d2e 100644 --- a/R/flatline_forecaster.R +++ b/R/flatline_forecaster.R @@ -59,6 +59,20 @@ flatline_forecaster <- function( if (!inherits(args_list, c("flat_fcast", "alist"))) { cli_abort("`args_list` was not created using `flatline_args_list()`.") } + if (length(args_list$quantile_by_key) > 0L) { + valid_keys <- key_colnames(epi_data) + missing_keys <- setdiff(args_list$quantile_by_key, valid_keys) + if (length(missing_keys) > 0L) { + cli_abort( + c( + "Some {.arg quantile_by_key} columns are not key columns of the input {.cls epi_df}.", + "!" = "Missing: {.val {missing_keys}}.", + i = "Available keys: {.val {valid_keys}}." + ), + class = "epipredict__flatline_forecaster__quantile_by_key_invalid" + ) + } + } keys <- key_colnames(epi_data) ek <- kill_time_value(keys) outcome <- rlang::sym(outcome) diff --git a/tests/testthat/test-arx_forecaster.R b/tests/testthat/test-arx_forecaster.R index be087297..3a14109f 100644 --- a/tests/testthat/test-arx_forecaster.R +++ b/tests/testthat/test-arx_forecaster.R @@ -44,6 +44,27 @@ test_that("warns if there's not enough data to predict", { ) }) +test_that("arx_forecaster errors on invalid quantile_by_key columns (issue #229)", { + jhu <- epidatasets::covid_case_death_rates + expect_error( + arx_forecaster(jhu, "death_rate", c("death_rate"), + args_list = arx_args_list(quantile_by_key = "nonexistent_column") + ), + class = "epipredict__arx_forecaster__quantile_by_key_invalid" + ) +}) + +test_that("arx_forecaster warns when quantile_by_key is used with quantile_reg trainer (issue #229)", { + jhu <- epidatasets::covid_case_death_rates + expect_warning( + arx_forecaster(jhu, "death_rate", c("death_rate"), + trainer = quantile_reg(), + args_list = arx_args_list(quantile_by_key = "geo_value") + ), + class = "epipredict__arx_forecaster__quantile_by_key_ignored" + ) +}) + test_that("arx_forecaster errors with documented class when forecast_date + ahead != target_date (issue #473)", { df <- tibble( geo_value = "ri", diff --git a/tests/testthat/test-flatline_forecaster.R b/tests/testthat/test-flatline_forecaster.R index f6d2de24..d6e73147 100644 --- a/tests/testthat/test-flatline_forecaster.R +++ b/tests/testthat/test-flatline_forecaster.R @@ -20,3 +20,13 @@ test_that("flatline_forecaster returns one prediction per geo with trailing NAs counts <- res$predictions %>% dplyr::count(geo_value, target_date) expect_true(all(counts$n == 1L)) }) + +test_that("flatline_forecaster errors on invalid quantile_by_key columns (issue #229)", { + jhu <- epidatasets::covid_case_death_rates + expect_error( + flatline_forecaster(jhu, "death_rate", + flatline_args_list(quantile_by_key = "nonexistent_column") + ), + class = "epipredict__flatline_forecaster__quantile_by_key_invalid" + ) +}) From 470fefc351f3569351d0bd98fb7613b28def9054 Mon Sep 17 00:00:00 2001 From: Greg Faletto Date: Tue, 19 May 2026 22:46:49 -0700 Subject: [PATCH 2/2] chore: switch version bump from 0.2.7 to 0.2.6 Switches the version bump for #229 from 0.2.7 to 0.2.6 so the maintainer can choose the merge order against the still-open PR #480 (doc regen, also targeting 0.2.6). Whichever PR merges first claims 0.2.6; the other rebases to 0.2.7 at merge time. Co-Authored-By: Claude Opus 4.7 (1M context) --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 95e42583..2166122a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.2.7 +Version: 0.2.6 Authors@R: c( person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 00f74538..cf4c2994 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicate PR's. -# epipredict 0.2.7 +# epipredict 0.2.6 - `arx_forecaster()` and `flatline_forecaster()` now error early when `quantile_by_key` contains columns that are not keys of the input `epi_df`, rather than silently dropping the invalid keys (#229). - `arx_forecaster()` now warns when `quantile_by_key` is supplied with a quantile-output trainer (`quantile_reg()`, `rand_forest()` with engine `"grf_quantiles"`), where the argument would otherwise be silently ignored (#229).