diff --git a/DESCRIPTION b/DESCRIPTION index ec393c6e..ac7aa435 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: epipredict Title: Basic epidemiology forecasting methods -Version: 0.2.3 +Version: 0.2.4 Authors@R: c( person("Daniel J.", "McDonald", , "daniel@stat.ubc.ca", role = c("aut", "cre")), person("Ryan", "Tibshirani", , "ryantibs@cmu.edu", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 973f7cd1..d6959a85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicate PR's. +# epipredict 0.2.4 + +- Fix `flatline_forecaster()` to return one prediction per geographic key when the input `epi_df` has trailing rows with `NA`s in the outcome (#454). Previously, the forecast was duplicated once per trailing-NA day. + # epipredict 0.2.3 - Fix `print.canned_epipred()` so the latency-adjustment information actually displays for canned forecasters that include `step_adjust_latency` in their recipe (#447). diff --git a/R/get_test_data.R b/R/get_test_data.R index 17430824..e5cc9516 100644 --- a/R/get_test_data.R +++ b/R/get_test_data.R @@ -66,13 +66,13 @@ get_test_data <- function(recipe, x) { # If we skip NA completion, we remove undesirably early time values # Happens globally, over all groups - x <- filter(x, max_time_value - time_value <= keep) + x <- filter(x, max_time_value - time_value <= keep, time_value <= max_time_value) # If all(lags > 0), then we get rid of recent data if (min_lags > 0 && min_lags < Inf) { x <- filter(x, max_time_value - time_value >= min_lags) } - filter(x, max_time_value - time_value <= keep) %>% + filter(x, max_time_value - time_value <= keep, time_value <= max_time_value) %>% epiprocess::ungroup() } diff --git a/tests/testthat/test-flatline_forecaster.R b/tests/testthat/test-flatline_forecaster.R new file mode 100644 index 00000000..f6d2de24 --- /dev/null +++ b/tests/testthat/test-flatline_forecaster.R @@ -0,0 +1,22 @@ +test_that("flatline_forecaster returns one prediction per geo with trailing NAs (issue #454)", { + jhu <- epidatasets::covid_case_death_rates %>% + dplyr::filter(time_value >= as.Date("2021-11-01")) + geos <- unique(jhu$geo_value) + one_day <- tibble::tibble( + geo_value = geos, + time_value = as.Date("2022-01-01"), + case_rate = NA_real_, + death_rate = 0 + ) + two_day <- one_day %>% dplyr::mutate(time_value = as.Date("2022-01-02")) + jhu_nad <- jhu %>% + tibble::as_tibble() %>% + dplyr::bind_rows(one_day, two_day) %>% + epiprocess::as_epi_df() + + res <- flatline_forecaster(jhu_nad, "case_rate") + + expect_equal(nrow(res$predictions), length(geos)) + counts <- res$predictions %>% dplyr::count(geo_value, target_date) + expect_true(all(counts$n == 1L)) +})