From 1e15e3646f7a825ab7dce4c9145630e69b6ab0a8 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Mon, 24 Feb 2025 12:18:58 +0000
Subject: [PATCH 1/9] Fix for no detections in one or more strata

---
 DESCRIPTION                |  2 +-
 NAMESPACE                  |  2 ++
 NEWS.md                    |  1 +
 R/varNhat.R                | 18 ++++++++++++++----
 tests/testthat/test_dht2.R | 37 +++++++++++++++++++++++++++++++++++++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 73145e5..98aeef6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
     Horvitz-Thompson-like estimator) if survey area information is provided. See
     Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
     methods and <https://examples.distancesampling.org/> for example analyses.
-Version: 2.0.0.9001
+Version: 2.0.0.9002
 URL: https://github.com/DistanceDevelopment/Distance/
 BugReports: https://github.com/DistanceDevelopment/Distance/issues
 Language: en-GB
diff --git a/NAMESPACE b/NAMESPACE
index 3f6e9ca..c3e8894 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,8 +48,10 @@ importFrom(dplyr,inner_join)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_if)
+importFrom(dplyr,pull)
 importFrom(dplyr,reframe)
 importFrom(dplyr,select)
+importFrom(dplyr,summarize)
 importFrom(dplyr,summarize_all)
 importFrom(dplyr,ungroup)
 importFrom(dplyr,vars)
diff --git a/NEWS.md b/NEWS.md
index fcd15ce..5540091 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,7 @@
 
 * Fixes issue with print dht2 when multipliers are a data.frame (Issue #179)
 * Fixes bug when including a uniform with no adjustment terms in the summarize_ds_models function (Issue #180)
+* dht2 can now deal with strata where there are no observations (Issue #194)
 
 # Distance 2.0.0
 
diff --git a/R/varNhat.R b/R/varNhat.R
index 9d2b45a..780ad15 100644
--- a/R/varNhat.R
+++ b/R/varNhat.R
@@ -1,6 +1,6 @@
 # Calculate the variance contribution of the detection function
 #  to abundance estimates
-#' @importFrom dplyr reframe
+#' @importFrom dplyr reframe group_by across all_of summarize pull
 varNhat <- function(data, model){
 
   # format the data
@@ -21,7 +21,13 @@ varNhat <- function(data, model){
     distinct() %>%
     reframe(Covered_area = sum(.data$Covered_area),
               Area         = .data$Area) %>%
-    distinct()
+    distinct() %>%
+  
+  # Add column giving number of obs per stratum
+  mutate(n_obs = data %>%
+           dplyr::group_by(across(all_of(strat_vars))) %>%
+           dplyr::summarize(n_obs = sum(!is.na(object))) %>%
+           dplyr::pull(n_obs))
 
   # join the per-stratum data onto the frame
   data$Covered_area <- NULL
@@ -29,7 +35,7 @@ varNhat <- function(data, model){
   data <- left_join(data, grp_dat, by=strat_vars)
 
   # function to calculate Nhat
-  dhtd <- function(par, data, model){
+  dhtd <- function(par, data, model, grp_dat){
     # set par
     model$par <- par
 
@@ -42,6 +48,10 @@ varNhat <- function(data, model){
       reframe(N = (.data$Area/.data$Covered_area) *
                      sum(.data$Nc, na.rm=TRUE)) %>%
       distinct()
+    
+    # Include an NA for strata with no obs and then convert to 0.
+    res <- left_join(grp_dat, res, by = colnames(grp_dat)[1])
+    res$N[is.na(res$N)] <- 0
 
     res$N
   }
@@ -54,7 +64,7 @@ varNhat <- function(data, model){
 
   # calculate variance
   dm <- DeltaMethod(model$par, dhtd, vcov, sqrt(.Machine$double.eps),
-                    model=model, data=data)
+                    model=model, data=data, grp_dat=grp_dat)
   attr(dm, "vardat_str") <- vardat_str
 
   # fiddle with variance data.frame
diff --git a/tests/testthat/test_dht2.R b/tests/testthat/test_dht2.R
index bf478b1..d258d45 100644
--- a/tests/testthat/test_dht2.R
+++ b/tests/testthat/test_dht2.R
@@ -260,3 +260,40 @@ test_that("can accept data.frame and scalar sampling fractions", {
   minke_dht2
   
 })
+
+
+test_that("Deal with strata with no detections", {
+  
+  # Add a third Region with two samples but no observations.
+  # Adding two samples instead of just one to prevent a warning unstable ER var
+  new.row1 <- minke[1, ] %>%
+    mutate(
+      Region.Label = "East",
+      Area = 10000,
+      Sample.Label = max(minke$Sample.Label) + 1,
+      Effort = 10,
+      distance = NA,
+      object = NA
+    )
+  
+  new.row2 <- minke[1, ] %>%
+    mutate(
+      Region.Label = "East",
+      Area = 10000,
+      Sample.Label = max(minke$Sample.Label) + 2,
+      Effort = 10,
+      distance = NA,
+      object = NA
+    )
+  
+  minke <- rbind(minke, new.row1, new.row2)
+  # Fit a detection function
+  detfn <- ds(data=minke, truncation=1.5, key="hr", adjustment=NULL)
+  
+  result <- dht2(ddf = detfn,
+                 flatfile = minke,
+                 strat_formula =  ~ Region.Label,
+                 stratification = "geographical")
+  
+  expect_true(is(result, "dht_result"))
+})

From bc110600b70ca24bbf76395babdba0d7381a4eb3 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Wed, 19 Mar 2025 16:34:39 +0000
Subject: [PATCH 2/9] Activate missing warnings immediately

---
 R/ER_var_f.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ER_var_f.R b/R/ER_var_f.R
index 6185296..3bfb975 100644
--- a/R/ER_var_f.R
+++ b/R/ER_var_f.R
@@ -25,9 +25,9 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){
 
     # sort the data if we use O2/O3 estimators
     if(any(erdat$er_est %in% c("O2", "O3"))){
-      warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful")
+      warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE)
       if(!is.numeric(erdat$Sample.Label)){
-        warning("Additionally, Sample.Label is not numeric, this may cause additional issues")
+        warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE)
       }
       erdat <- erdat %>%
         mutate(.originalorder = 1:nrow(erdat)) %>%

From b757d43cd350298dd214be521c2e5c57a9713a13 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Wed, 19 Mar 2025 16:36:13 +0000
Subject: [PATCH 3/9] Runs for O2 ER var now

Reference issue #174
---
 R/ER_var_f.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/ER_var_f.R b/R/ER_var_f.R
index 3bfb975..a295dce 100644
--- a/R/ER_var_f.R
+++ b/R/ER_var_f.R
@@ -29,8 +29,10 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){
       if(!is.numeric(erdat$Sample.Label)){
         warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE)
       }
+      # Add in a column indexing the original order
+      erdat$.orig.order <- 1:nrow(erdat)
       erdat <- erdat %>%
-        mutate(.originalorder = 1:nrow(erdat)) %>%
+        # Sort by sample label
         arrange(.data$Sample.Label)
     }
 

From a7227e22c070c7c9039752b94354ae07f2565300 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Tue, 13 May 2025 17:09:57 +0100
Subject: [PATCH 4/9] make no obs fix CRAN compliant

reference #194
---
 NAMESPACE   |  1 +
 R/dht2.R    |  2 +-
 R/varNhat.R | 14 +++++++++-----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index af3bf36..b7b342d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -63,6 +63,7 @@ importFrom(mrds,ddf)
 importFrom(mrds,dht)
 importFrom(rlang,.data)
 importFrom(stats,AIC)
+importFrom(stats,aggregate)
 importFrom(stats,as.formula)
 importFrom(stats,logLik)
 importFrom(stats,median)
diff --git a/R/dht2.R b/R/dht2.R
index 3442fea..6e52bca 100644
--- a/R/dht2.R
+++ b/R/dht2.R
@@ -1093,7 +1093,7 @@ if(mult){
 
   # warn if we only had one transect in one or more strata
   if(any(res$k == 1)){
-    warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance")
+    warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance", call. = FALSE)
   }
 
   # fix area == covered area for compatibility with mrds::dht
diff --git a/R/varNhat.R b/R/varNhat.R
index 780ad15..fb4a48f 100644
--- a/R/varNhat.R
+++ b/R/varNhat.R
@@ -1,6 +1,7 @@
 # Calculate the variance contribution of the detection function
 #  to abundance estimates
 #' @importFrom dplyr reframe group_by across all_of summarize pull
+#' @importFrom stats aggregate
 varNhat <- function(data, model){
 
   # format the data
@@ -21,13 +22,16 @@ varNhat <- function(data, model){
     distinct() %>%
     reframe(Covered_area = sum(.data$Covered_area),
               Area         = .data$Area) %>%
-    distinct() %>%
+    distinct()
   
   # Add column giving number of obs per stratum
-  mutate(n_obs = data %>%
-           dplyr::group_by(across(all_of(strat_vars))) %>%
-           dplyr::summarize(n_obs = sum(!is.na(object))) %>%
-           dplyr::pull(n_obs))
+  n_obs <- aggregate(!is.na(data$object), 
+                     by = data[strat_vars], 
+                     FUN = sum)
+  names(n_obs)[length(n_obs)] <- "n_obs"
+  
+  # Merge into grp_dat
+  grp_dat <- left_join(grp_dat, n_obs, by = strat_vars)
 
   # join the per-stratum data onto the frame
   data$Covered_area <- NULL

From 10f06756bfec6143ac6a9c07baa556685abe6256 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Wed, 11 Jun 2025 15:21:10 +0100
Subject: [PATCH 5/9] Removes NaNs from results table when no obs in stratum

Reference #194
---
 R/dht2.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/dht2.R b/R/dht2.R
index 6e52bca..db94678 100644
--- a/R/dht2.R
+++ b/R/dht2.R
@@ -649,7 +649,10 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL,
                                       var(.data$size, na.rm=TRUE)/
                                        sum(!is.na(.data$size)),
                                       0),
-             group_mean     = mean(.data$size, na.rm=TRUE)) %>%
+             group_mean     = if_else(.data$n_observations>0,
+                                      mean(.data$size, na.rm=TRUE),
+                                      1)) %>%
+             #group_mean     = mean(.data$size, na.rm=TRUE)) %>%
       # report n as n_observations
       mutate(n = .data$n_observations)
   # if we didn't have any areas, then set to 1 and estimate density

From e08ae070d27311d6037cc1f374833e83b8dfbda7 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Wed, 11 Jun 2025 15:59:39 +0100
Subject: [PATCH 6/9] Fix NaNs in variance contributions table

Reference #194
---
 R/dht2.R                   | 7 ++++---
 R/variance_contributions.R | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/dht2.R b/R/dht2.R
index db94678..c88c92f 100644
--- a/R/dht2.R
+++ b/R/dht2.R
@@ -652,7 +652,6 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL,
              group_mean     = if_else(.data$n_observations>0,
                                       mean(.data$size, na.rm=TRUE),
                                       1)) %>%
-             #group_mean     = mean(.data$size, na.rm=TRUE)) %>%
       # report n as n_observations
       mutate(n = .data$n_observations)
   # if we didn't have any areas, then set to 1 and estimate density
@@ -748,11 +747,13 @@ if(mult){
       mutate(ER = .data$n/.data$Effort)
   }
   res <- res %>%
-    mutate(ER_CV = sqrt(.data$ER_var)/.data$ER,
+    mutate(ER_CV = if_else(.data$ER == 0, 0, 
+                           sqrt(.data$ER_var)/.data$ER),
            ER_df = compute_df(.data$k, type=.data$er_est)) %>%
     # calculate stratum abundance estimate
     mutate(Abundance = (.data$Area/.data$Covered_area) * .data$Nc) %>%
-    mutate(df_CV = sqrt(.data$df_var)/.data$Abundance) %>%
+    mutate(df_CV = if_else(.data$Abundance == 0, 0,
+                           sqrt(.data$df_var)/.data$Abundance)) %>%
     mutate(group_CV = if_else(.data$group_var==0, 0,
                               sqrt(.data$group_var)/.data$group_mean))
 
diff --git a/R/variance_contributions.R b/R/variance_contributions.R
index 7dfcaac..c9deba1 100644
--- a/R/variance_contributions.R
+++ b/R/variance_contributions.R
@@ -18,8 +18,9 @@ variance_contributions <- function(res){
   # get the total
   CV_cont$Total <- sqrt(rowSums(CV_cont^2))
 
-  # make that into percentages
-  CV_cont <- (CV_cont^2/CV_cont[["Total"]]^2)*100
+  # make that into percentages (for non zero rows)
+  index <- which(CV_cont[["Total"]]!=0)
+  CV_cont[index,] <- (CV_cont[index,]^2/CV_cont[index,"Total"]^2)*100
   CV_cont[["Total"]] <- NULL
 
   # zero ER contributions if only one sample

From 7cd81c83139b07d216c3965327872aa9112c97c1 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Tue, 17 Jun 2025 14:17:21 +0100
Subject: [PATCH 7/9] Additional numeric tests for strata with no observations
 in dht2

Assuming geographic stratification reference #194
---
 tests/testthat/test_dht2.R | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/testthat/test_dht2.R b/tests/testthat/test_dht2.R
index d258d45..12b9757 100644
--- a/tests/testthat/test_dht2.R
+++ b/tests/testthat/test_dht2.R
@@ -296,4 +296,27 @@ test_that("Deal with strata with no detections", {
                  stratification = "geographical")
   
   expect_true(is(result, "dht_result"))
+  
+  # Check values in output
+  new.row3 <- minke[1, ] %>%
+    mutate(
+      Region.Label = "East",
+      Area = 10000,
+      Sample.Label = max(minke$Sample.Label) + 3,
+      Effort = 10,
+      distance = NA,
+      object = NA
+    )
+  
+  minke2 <- rbind(minke, new.row1, new.row2, new.row3)
+  detfn <- ds(data=minke2, truncation=1.5, key="hr", adjustment=NULL, optimizer = "R")
+  dht2.results <- dht2(ddf = detfn,
+                       flatfile = minke2,
+                       strat_formula =  ~ Region.Label,
+                       stratification = "geographical")
+  
+  expect_equal(dht2.results$Abundance[1], 0)
+  expect_equal(dht2.results$Abundance_se[4], 4834.4118, , tolerance = 0.0001)
+  expect_equal(dht2.results$df[4],15.25496, tolerance = 0.00001)
+  
 })

From 2036ca5560eb73bd1c6916e6643ebe4aefad5760 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Tue, 17 Jun 2025 14:20:00 +0100
Subject: [PATCH 8/9] bump package version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 816743e..1f8fb1b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
     Horvitz-Thompson-like estimator) if survey area information is provided. See
     Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
     methods and <https://distancesampling.org/resources/vignettes.html> for example analyses.
-Version: 2.0.0.9011
+Version: 2.0.0.9012
 URL: https://github.com/DistanceDevelopment/Distance/
 BugReports: https://github.com/DistanceDevelopment/Distance/issues
 Language: en-GB

From 51403f76f66b104e59121eba4435db70628d27d4 Mon Sep 17 00:00:00 2001
From: Laura Marshall <L_H_Marshall@yahoo.co.uk>
Date: Wed, 18 Jun 2025 13:10:52 +0100
Subject: [PATCH 9/9] Modify tests to avoid warnings

---
 tests/testthat/test_systematic_var2.R |  5 +++--
 tests/testthat/test_variance.R        | 12 +++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/testthat/test_systematic_var2.R b/tests/testthat/test_systematic_var2.R
index ef76147..78a13e7 100644
--- a/tests/testthat/test_systematic_var2.R
+++ b/tests/testthat/test_systematic_var2.R
@@ -56,8 +56,9 @@ cu <- 1/(cu[3]/(cu[1]*cu[2]))
 
 test_that("no strat",{
 
-  df <- ds(Systematic_variance_2, convert_units=cu)
-
+  # No adjustments to avoid model selection which generates a monotonicity warning. (The HN model with no adjustments is selected based on minimum AIC any way.)
+  df <- ds(Systematic_variance_2, nadj = 0, 
+           convert_units=cu, truncation = 10)
 
   # fiddle with region labels
   obs.table$Region.Label <- NULL
diff --git a/tests/testthat/test_variance.R b/tests/testthat/test_variance.R
index fa91c5c..bc895cd 100644
--- a/tests/testthat/test_variance.R
+++ b/tests/testthat/test_variance.R
@@ -10,8 +10,10 @@ test_that("variance 2",{
   convert.units <- Systematic_variance_2_units
   cu <- convert.units[, 3]
   cu <- 1/(cu[3]/(cu[1]*cu[2]))
-  # first fit a model
-  sysvar_df <- ds(Systematic_variance_2, adjustment="cos", convert_units=cu)
+  # first fit a model - to avoid warnings use no adjustments (HN model was selected any way based on minimum AIC)
+  sysvar_df <- ds(Systematic_variance_2, 
+                  nadj = 0, 
+                  convert_units=cu)
 
   systematic_var2 <- Systematic_variance_2
   unflat <- unflatten(systematic_var2)
@@ -30,9 +32,9 @@ test_that("variance 2",{
   unflat$obs.table$Sample.Label <- as.numeric(unflat$obs.table$Sample.Label)
   unflat$data$Sample.Label <- as.numeric(unflat$data$Sample.Label)
 
-  # fit the detection function
-  sysvar_df <- ds(unflat$data, adjustment="cos", convert_units=cu)
-
+  # fit the detection function (again avoid adjustment terms to supress warning - no adjustments is selected by AIC any way)
+  sysvar_df <- ds(unflat$data, nadj = 0, convert_units=cu)
+  
   ## no stratification
   #Nhat_nostrat_eff <- dht2(sysvar_df, transects=unflat$sample.table,
   #                             geo_strat=unflat$region.table, observations=unflat$obs.table,