diff --git a/DESCRIPTION b/DESCRIPTION index 816743e..1f8fb1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9011 +Version: 2.0.0.9012 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 6ecccc9..b7b342d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,8 +49,10 @@ importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) +importFrom(dplyr,pull) importFrom(dplyr,reframe) importFrom(dplyr,select) +importFrom(dplyr,summarize) importFrom(dplyr,summarize_all) importFrom(dplyr,ungroup) importFrom(dplyr,vars) @@ -61,6 +63,7 @@ importFrom(mrds,ddf) importFrom(mrds,dht) importFrom(rlang,.data) importFrom(stats,AIC) +importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,logLik) importFrom(stats,median) diff --git a/NEWS.md b/NEWS.md index e615e02..2436bf5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ * Fixes issue with print dht2 when multipliers are a data.frame (Issue #179) * Fixes bug when including a uniform with no adjustment terms in the summarize_ds_models function (Issue #180) +* dht2 can now deal with strata where there are no observations (Issue #194) * Users can alternatively pass a list of models to summarize_ds_models rather than passing them individually. (Issue #149) * Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175) * print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178) diff --git a/R/ER_var_f.R b/R/ER_var_f.R index 6185296..a295dce 100644 --- a/R/ER_var_f.R +++ b/R/ER_var_f.R @@ -25,12 +25,14 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){ # sort the data if we use O2/O3 estimators if(any(erdat$er_est %in% c("O2", "O3"))){ - warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful") + warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE) if(!is.numeric(erdat$Sample.Label)){ - warning("Additionally, Sample.Label is not numeric, this may cause additional issues") + warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE) } + # Add in a column indexing the original order + erdat$.orig.order <- 1:nrow(erdat) erdat <- erdat %>% - mutate(.originalorder = 1:nrow(erdat)) %>% + # Sort by sample label arrange(.data$Sample.Label) } diff --git a/R/dht2.R b/R/dht2.R index 3442fea..c88c92f 100644 --- a/R/dht2.R +++ b/R/dht2.R @@ -649,7 +649,9 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL, var(.data$size, na.rm=TRUE)/ sum(!is.na(.data$size)), 0), - group_mean = mean(.data$size, na.rm=TRUE)) %>% + group_mean = if_else(.data$n_observations>0, + mean(.data$size, na.rm=TRUE), + 1)) %>% # report n as n_observations mutate(n = .data$n_observations) # if we didn't have any areas, then set to 1 and estimate density @@ -745,11 +747,13 @@ if(mult){ mutate(ER = .data$n/.data$Effort) } res <- res %>% - mutate(ER_CV = sqrt(.data$ER_var)/.data$ER, + mutate(ER_CV = if_else(.data$ER == 0, 0, + sqrt(.data$ER_var)/.data$ER), ER_df = compute_df(.data$k, type=.data$er_est)) %>% # calculate stratum abundance estimate mutate(Abundance = (.data$Area/.data$Covered_area) * .data$Nc) %>% - mutate(df_CV = sqrt(.data$df_var)/.data$Abundance) %>% + mutate(df_CV = if_else(.data$Abundance == 0, 0, + sqrt(.data$df_var)/.data$Abundance)) %>% mutate(group_CV = if_else(.data$group_var==0, 0, sqrt(.data$group_var)/.data$group_mean)) @@ -1093,7 +1097,7 @@ if(mult){ # warn if we only had one transect in one or more strata if(any(res$k == 1)){ - warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance") + warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance", call. = FALSE) } # fix area == covered area for compatibility with mrds::dht diff --git a/R/varNhat.R b/R/varNhat.R index 9d2b45a..fb4a48f 100644 --- a/R/varNhat.R +++ b/R/varNhat.R @@ -1,6 +1,7 @@ # Calculate the variance contribution of the detection function # to abundance estimates -#' @importFrom dplyr reframe +#' @importFrom dplyr reframe group_by across all_of summarize pull +#' @importFrom stats aggregate varNhat <- function(data, model){ # format the data @@ -22,6 +23,15 @@ varNhat <- function(data, model){ reframe(Covered_area = sum(.data$Covered_area), Area = .data$Area) %>% distinct() + + # Add column giving number of obs per stratum + n_obs <- aggregate(!is.na(data$object), + by = data[strat_vars], + FUN = sum) + names(n_obs)[length(n_obs)] <- "n_obs" + + # Merge into grp_dat + grp_dat <- left_join(grp_dat, n_obs, by = strat_vars) # join the per-stratum data onto the frame data$Covered_area <- NULL @@ -29,7 +39,7 @@ varNhat <- function(data, model){ data <- left_join(data, grp_dat, by=strat_vars) # function to calculate Nhat - dhtd <- function(par, data, model){ + dhtd <- function(par, data, model, grp_dat){ # set par model$par <- par @@ -42,6 +52,10 @@ varNhat <- function(data, model){ reframe(N = (.data$Area/.data$Covered_area) * sum(.data$Nc, na.rm=TRUE)) %>% distinct() + + # Include an NA for strata with no obs and then convert to 0. + res <- left_join(grp_dat, res, by = colnames(grp_dat)[1]) + res$N[is.na(res$N)] <- 0 res$N } @@ -54,7 +68,7 @@ varNhat <- function(data, model){ # calculate variance dm <- DeltaMethod(model$par, dhtd, vcov, sqrt(.Machine$double.eps), - model=model, data=data) + model=model, data=data, grp_dat=grp_dat) attr(dm, "vardat_str") <- vardat_str # fiddle with variance data.frame diff --git a/R/variance_contributions.R b/R/variance_contributions.R index 7dfcaac..c9deba1 100644 --- a/R/variance_contributions.R +++ b/R/variance_contributions.R @@ -18,8 +18,9 @@ variance_contributions <- function(res){ # get the total CV_cont$Total <- sqrt(rowSums(CV_cont^2)) - # make that into percentages - CV_cont <- (CV_cont^2/CV_cont[["Total"]]^2)*100 + # make that into percentages (for non zero rows) + index <- which(CV_cont[["Total"]]!=0) + CV_cont[index,] <- (CV_cont[index,]^2/CV_cont[index,"Total"]^2)*100 CV_cont[["Total"]] <- NULL # zero ER contributions if only one sample diff --git a/tests/testthat/test_dht2.R b/tests/testthat/test_dht2.R index bf478b1..12b9757 100644 --- a/tests/testthat/test_dht2.R +++ b/tests/testthat/test_dht2.R @@ -260,3 +260,63 @@ test_that("can accept data.frame and scalar sampling fractions", { minke_dht2 }) + + +test_that("Deal with strata with no detections", { + + # Add a third Region with two samples but no observations. + # Adding two samples instead of just one to prevent a warning unstable ER var + new.row1 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 1, + Effort = 10, + distance = NA, + object = NA + ) + + new.row2 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 2, + Effort = 10, + distance = NA, + object = NA + ) + + minke <- rbind(minke, new.row1, new.row2) + # Fit a detection function + detfn <- ds(data=minke, truncation=1.5, key="hr", adjustment=NULL) + + result <- dht2(ddf = detfn, + flatfile = minke, + strat_formula = ~ Region.Label, + stratification = "geographical") + + expect_true(is(result, "dht_result")) + + # Check values in output + new.row3 <- minke[1, ] %>% + mutate( + Region.Label = "East", + Area = 10000, + Sample.Label = max(minke$Sample.Label) + 3, + Effort = 10, + distance = NA, + object = NA + ) + + minke2 <- rbind(minke, new.row1, new.row2, new.row3) + detfn <- ds(data=minke2, truncation=1.5, key="hr", adjustment=NULL, optimizer = "R") + dht2.results <- dht2(ddf = detfn, + flatfile = minke2, + strat_formula = ~ Region.Label, + stratification = "geographical") + + expect_equal(dht2.results$Abundance[1], 0) + expect_equal(dht2.results$Abundance_se[4], 4834.4118, , tolerance = 0.0001) + expect_equal(dht2.results$df[4],15.25496, tolerance = 0.00001) + +}) diff --git a/tests/testthat/test_systematic_var2.R b/tests/testthat/test_systematic_var2.R index ef76147..78a13e7 100644 --- a/tests/testthat/test_systematic_var2.R +++ b/tests/testthat/test_systematic_var2.R @@ -56,8 +56,9 @@ cu <- 1/(cu[3]/(cu[1]*cu[2])) test_that("no strat",{ - df <- ds(Systematic_variance_2, convert_units=cu) - + # No adjustments to avoid model selection which generates a monotonicity warning. (The HN model with no adjustments is selected based on minimum AIC any way.) + df <- ds(Systematic_variance_2, nadj = 0, + convert_units=cu, truncation = 10) # fiddle with region labels obs.table$Region.Label <- NULL diff --git a/tests/testthat/test_variance.R b/tests/testthat/test_variance.R index fa91c5c..bc895cd 100644 --- a/tests/testthat/test_variance.R +++ b/tests/testthat/test_variance.R @@ -10,8 +10,10 @@ test_that("variance 2",{ convert.units <- Systematic_variance_2_units cu <- convert.units[, 3] cu <- 1/(cu[3]/(cu[1]*cu[2])) - # first fit a model - sysvar_df <- ds(Systematic_variance_2, adjustment="cos", convert_units=cu) + # first fit a model - to avoid warnings use no adjustments (HN model was selected any way based on minimum AIC) + sysvar_df <- ds(Systematic_variance_2, + nadj = 0, + convert_units=cu) systematic_var2 <- Systematic_variance_2 unflat <- unflatten(systematic_var2) @@ -30,9 +32,9 @@ test_that("variance 2",{ unflat$obs.table$Sample.Label <- as.numeric(unflat$obs.table$Sample.Label) unflat$data$Sample.Label <- as.numeric(unflat$data$Sample.Label) - # fit the detection function - sysvar_df <- ds(unflat$data, adjustment="cos", convert_units=cu) - + # fit the detection function (again avoid adjustment terms to supress warning - no adjustments is selected by AIC any way) + sysvar_df <- ds(unflat$data, nadj = 0, convert_units=cu) + ## no stratification #Nhat_nostrat_eff <- dht2(sysvar_df, transects=unflat$sample.table, # geo_strat=unflat$region.table, observations=unflat$obs.table,