From 1e15e3646f7a825ab7dce4c9145630e69b6ab0a8 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Mon, 24 Feb 2025 12:18:58 +0000 Subject: [PATCH 1/9] Fix for no detections in one or more strata --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS.md | 1 + R/varNhat.R | 18 ++++++++++++++---- tests/testthat/test_dht2.R | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 73145e5..98aeef6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9001 +Version: 2.0.0.9002 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 3f6e9ca..c3e8894 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,8 +48,10 @@ importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) +importFrom(dplyr,pull) importFrom(dplyr,reframe) importFrom(dplyr,select) +importFrom(dplyr,summarize) importFrom(dplyr,summarize_all) importFrom(dplyr,ungroup) importFrom(dplyr,vars) diff --git a/NEWS.md b/NEWS.md index fcd15ce..5540091 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ * Fixes issue with print dht2 when multipliers are a data.frame (Issue #179) * Fixes bug when including a uniform with no adjustment terms in the summarize_ds_models function (Issue #180) +* dht2 can now deal with strata where there are no observations (Issue #194) # Distance 2.0.0 diff --git a/R/varNhat.R b/R/varNhat.R index 9d2b45a..780ad15 100644 --- a/R/varNhat.R +++ b/R/varNhat.R @@ -1,6 +1,6 @@ # Calculate the variance contribution of the detection function # to abundance estimates -#' @importFrom dplyr reframe +#' @importFrom dplyr reframe group_by across all_of summarize pull varNhat <- function(data, model){ # format the data @@ -21,7 +21,13 @@ varNhat <- function(data, model){ distinct() %>% reframe(Covered_area = sum(.data$Covered_area), Area = .data$Area) %>% - distinct() + distinct() %>% + + # Add column giving number of obs per stratum + mutate(n_obs = data %>% + dplyr::group_by(across(all_of(strat_vars))) %>% + dplyr::summarize(n_obs = sum(!is.na(object))) %>% + dplyr::pull(n_obs)) # join the per-stratum data onto the frame data$Covered_area <- NULL @@ -29,7 +35,7 @@ varNhat <- function(data, model){ data <- left_join(data, grp_dat, by=strat_vars) # function to calculate Nhat - dhtd <- function(par, data, model){ + dhtd <- function(par, data, model, grp_dat){ # set par model$par <- par @@ -42,6 +48,10 @@ varNhat <- function(data, model){ reframe(N = (.data$Area/.data$Covered_area) * sum(.data$Nc, na.rm=TRUE)) %>% distinct() + + # Include an NA for strata with no obs and then convert to 0. + res <- left_join(grp_dat, res, by = colnames(grp_dat)[1]) + res$N[is.na(res$N)] <- 0 res$N } @@ -54,7 +64,7 @@ varNhat <- function(data, model){ # calculate variance dm <- DeltaMethod(model$par, dhtd, vcov, sqrt(.Machine$double.eps), - model=model, data=data) + model=model, data=data, grp_dat=grp_dat) attr(dm, "vardat_str") <- vardat_str # fiddle with variance data.frame diff --git a/tests/testthat/test_dht2.R b/tests/testthat/test_dht2.R index bf478b1..d258d45 100644 --- a/tests/testthat/test_dht2.R +++ b/tests/testthat/test_dht2.R @@ -260,3 +260,40 @@ test_that("can accept data.frame and scalar sampling fractions", { minke_dht2 }) + + +test_that("Deal with strata with no detections", { + + # Add a third Region with two samples but no observations. + # Adding two samples instead of just one to prevent a warning unstable ER var + new.row1 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 1, + Effort = 10, + distance = NA, + object = NA + ) + + new.row2 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 2, + Effort = 10, + distance = NA, + object = NA + ) + + minke <- rbind(minke, new.row1, new.row2) + # Fit a detection function + detfn <- ds(data=minke, truncation=1.5, key="hr", adjustment=NULL) + + result <- dht2(ddf = detfn, + flatfile = minke, + strat_formula = ~ Region.Label, + stratification = "geographical") + + expect_true(is(result, "dht_result")) +}) From bc110600b70ca24bbf76395babdba0d7381a4eb3 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Wed, 19 Mar 2025 16:34:39 +0000 Subject: [PATCH 2/9] Activate missing warnings immediately --- R/ER_var_f.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ER_var_f.R b/R/ER_var_f.R index 6185296..3bfb975 100644 --- a/R/ER_var_f.R +++ b/R/ER_var_f.R @@ -25,9 +25,9 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){ # sort the data if we use O2/O3 estimators if(any(erdat$er_est %in% c("O2", "O3"))){ - warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful") + warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE) if(!is.numeric(erdat$Sample.Label)){ - warning("Additionally, Sample.Label is not numeric, this may cause additional issues") + warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE) } erdat <- erdat %>% mutate(.originalorder = 1:nrow(erdat)) %>% From b757d43cd350298dd214be521c2e5c57a9713a13 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Wed, 19 Mar 2025 16:36:13 +0000 Subject: [PATCH 3/9] Runs for O2 ER var now Reference issue #174 --- R/ER_var_f.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/ER_var_f.R b/R/ER_var_f.R index 3bfb975..a295dce 100644 --- a/R/ER_var_f.R +++ b/R/ER_var_f.R @@ -29,8 +29,10 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){ if(!is.numeric(erdat$Sample.Label)){ warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE) } + # Add in a column indexing the original order + erdat$.orig.order <- 1:nrow(erdat) erdat <- erdat %>% - mutate(.originalorder = 1:nrow(erdat)) %>% + # Sort by sample label arrange(.data$Sample.Label) } From a7227e22c070c7c9039752b94354ae07f2565300 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 17:09:57 +0100 Subject: [PATCH 4/9] make no obs fix CRAN compliant reference #194 --- NAMESPACE | 1 + R/dht2.R | 2 +- R/varNhat.R | 14 +++++++++----- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index af3bf36..b7b342d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,7 @@ importFrom(mrds,ddf) importFrom(mrds,dht) importFrom(rlang,.data) importFrom(stats,AIC) +importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,logLik) importFrom(stats,median) diff --git a/R/dht2.R b/R/dht2.R index 3442fea..6e52bca 100644 --- a/R/dht2.R +++ b/R/dht2.R @@ -1093,7 +1093,7 @@ if(mult){ # warn if we only had one transect in one or more strata if(any(res$k == 1)){ - warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance") + warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance", call. = FALSE) } # fix area == covered area for compatibility with mrds::dht diff --git a/R/varNhat.R b/R/varNhat.R index 780ad15..fb4a48f 100644 --- a/R/varNhat.R +++ b/R/varNhat.R @@ -1,6 +1,7 @@ # Calculate the variance contribution of the detection function # to abundance estimates #' @importFrom dplyr reframe group_by across all_of summarize pull +#' @importFrom stats aggregate varNhat <- function(data, model){ # format the data @@ -21,13 +22,16 @@ varNhat <- function(data, model){ distinct() %>% reframe(Covered_area = sum(.data$Covered_area), Area = .data$Area) %>% - distinct() %>% + distinct() # Add column giving number of obs per stratum - mutate(n_obs = data %>% - dplyr::group_by(across(all_of(strat_vars))) %>% - dplyr::summarize(n_obs = sum(!is.na(object))) %>% - dplyr::pull(n_obs)) + n_obs <- aggregate(!is.na(data$object), + by = data[strat_vars], + FUN = sum) + names(n_obs)[length(n_obs)] <- "n_obs" + + # Merge into grp_dat + grp_dat <- left_join(grp_dat, n_obs, by = strat_vars) # join the per-stratum data onto the frame data$Covered_area <- NULL From 10f06756bfec6143ac6a9c07baa556685abe6256 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Wed, 11 Jun 2025 15:21:10 +0100 Subject: [PATCH 5/9] Removes NaNs from results table when no obs in stratum Reference #194 --- R/dht2.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/dht2.R b/R/dht2.R index 6e52bca..db94678 100644 --- a/R/dht2.R +++ b/R/dht2.R @@ -649,7 +649,10 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL, var(.data$size, na.rm=TRUE)/ sum(!is.na(.data$size)), 0), - group_mean = mean(.data$size, na.rm=TRUE)) %>% + group_mean = if_else(.data$n_observations>0, + mean(.data$size, na.rm=TRUE), + 1)) %>% + #group_mean = mean(.data$size, na.rm=TRUE)) %>% # report n as n_observations mutate(n = .data$n_observations) # if we didn't have any areas, then set to 1 and estimate density From e08ae070d27311d6037cc1f374833e83b8dfbda7 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Wed, 11 Jun 2025 15:59:39 +0100 Subject: [PATCH 6/9] Fix NaNs in variance contributions table Reference #194 --- R/dht2.R | 7 ++++--- R/variance_contributions.R | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/dht2.R b/R/dht2.R index db94678..c88c92f 100644 --- a/R/dht2.R +++ b/R/dht2.R @@ -652,7 +652,6 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL, group_mean = if_else(.data$n_observations>0, mean(.data$size, na.rm=TRUE), 1)) %>% - #group_mean = mean(.data$size, na.rm=TRUE)) %>% # report n as n_observations mutate(n = .data$n_observations) # if we didn't have any areas, then set to 1 and estimate density @@ -748,11 +747,13 @@ if(mult){ mutate(ER = .data$n/.data$Effort) } res <- res %>% - mutate(ER_CV = sqrt(.data$ER_var)/.data$ER, + mutate(ER_CV = if_else(.data$ER == 0, 0, + sqrt(.data$ER_var)/.data$ER), ER_df = compute_df(.data$k, type=.data$er_est)) %>% # calculate stratum abundance estimate mutate(Abundance = (.data$Area/.data$Covered_area) * .data$Nc) %>% - mutate(df_CV = sqrt(.data$df_var)/.data$Abundance) %>% + mutate(df_CV = if_else(.data$Abundance == 0, 0, + sqrt(.data$df_var)/.data$Abundance)) %>% mutate(group_CV = if_else(.data$group_var==0, 0, sqrt(.data$group_var)/.data$group_mean)) diff --git a/R/variance_contributions.R b/R/variance_contributions.R index 7dfcaac..c9deba1 100644 --- a/R/variance_contributions.R +++ b/R/variance_contributions.R @@ -18,8 +18,9 @@ variance_contributions <- function(res){ # get the total CV_cont$Total <- sqrt(rowSums(CV_cont^2)) - # make that into percentages - CV_cont <- (CV_cont^2/CV_cont[["Total"]]^2)*100 + # make that into percentages (for non zero rows) + index <- which(CV_cont[["Total"]]!=0) + CV_cont[index,] <- (CV_cont[index,]^2/CV_cont[index,"Total"]^2)*100 CV_cont[["Total"]] <- NULL # zero ER contributions if only one sample From 7cd81c83139b07d216c3965327872aa9112c97c1 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 17 Jun 2025 14:17:21 +0100 Subject: [PATCH 7/9] Additional numeric tests for strata with no observations in dht2 Assuming geographic stratification reference #194 --- tests/testthat/test_dht2.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test_dht2.R b/tests/testthat/test_dht2.R index d258d45..12b9757 100644 --- a/tests/testthat/test_dht2.R +++ b/tests/testthat/test_dht2.R @@ -296,4 +296,27 @@ test_that("Deal with strata with no detections", { stratification = "geographical") expect_true(is(result, "dht_result")) + + # Check values in output + new.row3 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 3, + Effort = 10, + distance = NA, + object = NA + ) + + minke2 <- rbind(minke, new.row1, new.row2, new.row3) + detfn <- ds(data=minke2, truncation=1.5, key="hr", adjustment=NULL, optimizer = "R") + dht2.results <- dht2(ddf = detfn, + flatfile = minke2, + strat_formula = ~ Region.Label, + stratification = "geographical") + + expect_equal(dht2.results$Abundance[1], 0) + expect_equal(dht2.results$Abundance_se[4], 4834.4118, , tolerance = 0.0001) + expect_equal(dht2.results$df[4],15.25496, tolerance = 0.00001) + }) From 2036ca5560eb73bd1c6916e6643ebe4aefad5760 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 17 Jun 2025 14:20:00 +0100 Subject: [PATCH 8/9] bump package version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 816743e..1f8fb1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9011 +Version: 2.0.0.9012 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB From 51403f76f66b104e59121eba4435db70628d27d4 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Wed, 18 Jun 2025 13:10:52 +0100 Subject: [PATCH 9/9] Modify tests to avoid warnings --- tests/testthat/test_systematic_var2.R | 5 +++-- tests/testthat/test_variance.R | 12 +++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_systematic_var2.R b/tests/testthat/test_systematic_var2.R index ef76147..78a13e7 100644 --- a/tests/testthat/test_systematic_var2.R +++ b/tests/testthat/test_systematic_var2.R @@ -56,8 +56,9 @@ cu <- 1/(cu[3]/(cu[1]*cu[2])) test_that("no strat",{ - df <- ds(Systematic_variance_2, convert_units=cu) - + # No adjustments to avoid model selection which generates a monotonicity warning. (The HN model with no adjustments is selected based on minimum AIC any way.) + df <- ds(Systematic_variance_2, nadj = 0, + convert_units=cu, truncation = 10) # fiddle with region labels obs.table$Region.Label <- NULL diff --git a/tests/testthat/test_variance.R b/tests/testthat/test_variance.R index fa91c5c..bc895cd 100644 --- a/tests/testthat/test_variance.R +++ b/tests/testthat/test_variance.R @@ -10,8 +10,10 @@ test_that("variance 2",{ convert.units <- Systematic_variance_2_units cu <- convert.units[, 3] cu <- 1/(cu[3]/(cu[1]*cu[2])) - # first fit a model - sysvar_df <- ds(Systematic_variance_2, adjustment="cos", convert_units=cu) + # first fit a model - to avoid warnings use no adjustments (HN model was selected any way based on minimum AIC) + sysvar_df <- ds(Systematic_variance_2, + nadj = 0, + convert_units=cu) systematic_var2 <- Systematic_variance_2 unflat <- unflatten(systematic_var2) @@ -30,9 +32,9 @@ test_that("variance 2",{ unflat$obs.table$Sample.Label <- as.numeric(unflat$obs.table$Sample.Label) unflat$data$Sample.Label <- as.numeric(unflat$data$Sample.Label) - # fit the detection function - sysvar_df <- ds(unflat$data, adjustment="cos", convert_units=cu) - + # fit the detection function (again avoid adjustment terms to supress warning - no adjustments is selected by AIC any way) + sysvar_df <- ds(unflat$data, nadj = 0, convert_units=cu) + ## no stratification #Nhat_nostrat_eff <- dht2(sysvar_df, transects=unflat$sample.table, # geo_strat=unflat$region.table, observations=unflat$obs.table,