DistanceDevelopment · LHMarshall · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
     Horvitz-Thompson-like estimator) if survey area information is provided. See
     Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
     methods and <https://distancesampling.org/resources/vignettes.html> for example analyses.
-Version: 2.0.0.9012
+Version: 2.0.0.9013
 URL: https://github.com/DistanceDevelopment/Distance/
 BugReports: https://github.com/DistanceDevelopment/Distance/issues
 Language: en-GB

diff --git a/NEWS.md b/NEWS.md
@@ -11,6 +11,7 @@
 Enhancements
 
 * Warnings and documentation clarification regarding ER variance estimation when there is only a single transect. (Issue #192 and mrds Issue #115)
+* Code is more robust to the different ways of defining a binned analysis and documentation has been clarified. (Issue #144)
 
 # Distance 2.0.0
 

diff --git a/R/checkdata.R b/R/checkdata.R
@@ -25,19 +25,27 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
     }
   }
 
-
-  # Make sure that the user has only specified either distance or distend / distbegin (need to do this check first as then Distance creates the distance column)
+  # Make sure if the user is using distbegin and distend they have supplied both.
+  if(any(!is.null(data$distbegin), !is.null(data$distend)) && !all(!is.null(data$distbegin), !is.null(data$distend))){
+    stop("You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.", call. = FALSE)
+  }
+  # Make sure that the user has only specified either distance or distend / distbegin
   if(!is.null(data$distance) && !is.null(data$distbegin) && !is.null(data$distend)){
-    stop("You can only specify either a 'distance' column or 'distbegin' and 'distend' columns in your data.", call. = FALSE)
+    warning("You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.", call. = FALSE, immediate. = TRUE)
+    data$distance <- NULL
   }
 
   # make sure that the data are in the right format first
   if(is.null(data$distance)){
     if(is.null(data$distend) & is.null(data$distbegin)){
       stop("Your data must (at least) have a column called 'distance' or 'distbegin' and 'distend'!", call. = FALSE)
     }else{
-      data$distance <- (data$distend + data$distbegin)/2
+      #data$distance <- (data$distend + data$distbegin)/2
     }
+    # distance column name to avoid creating a distance column
+    distance.col <- "distbegin"
+  }else{
+    distance.col <- "distance"
   }
 
   # make sure that we have a data.frame()
@@ -58,7 +66,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
     # check that the object IDs are unique
     # first need to remove the rows with NA distances used for padding
     # below
-    data_no_NA <- data[!is.na(data$distance), ]
+    data_no_NA <- data[!is.na(data[[distance.col]]), ]
     if(length(data_no_NA$object) != length(unique(data_no_NA$object))){
       stop("Not all object IDs are unique, check data.")
     }
@@ -130,7 +138,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
       rownames(data) <- 1:nrow(data)
 
       # remove the NA rows
-      data <- data[!is.na(data$distance),]
+      data <- data[!is.na(data[[distance.col]]),]
     }else if(all(tolower(c("Region.Label", "Sample.Label",
                            "Effort", "object")) %in%
                  tolower(colnames(data)))){

diff --git a/R/ds.R b/R/ds.R
@@ -58,11 +58,10 @@
 #' is the recommended approach for all standard binned analyses.
 #' Ensure that the first element is 0 (or the left truncation
 #' distance) and the last is the distance to the end of the furthest bin.
-#' (Default `NULL`, no binning.) If you have provided `distbegin` and `distend`
-#' columns in your data (note this should only be used when your cutpoints 
-#' are not constant across all your data, e.g. planes flying at differing 
-#' altitudes) then do not specify the cutpoints argument as this will cause
-#' the `distbegin` and `distend` columns in your data to be overwritten. 
+#' (Default `NULL`, no binning.) Provide `distbegin` and `distend` columns 
+#' in your data only when your cutpoints are not constant across all your 
+#' data, e.g. planes flying at differing altitudes then do not specify the
+#' cutpoints argument. 
 #' @param monotonicity should the detection function be constrained for
 #' monotonicity weakly (`"weak"`), strictly (`"strict"`) or not at all
 #' (`"none"` or `FALSE`). See Monotonicity, below. (Default `"strict"`). By
@@ -321,11 +320,11 @@
 #' AIC(ds.model.cos2)
 #' AIC(ds.model.cos23)
 #'}
-ds <- function(data, truncation=ifelse(is.null(cutpoints),
-                                     ifelse(is.null(data$distend),
-                                            max(data$distance),
-                                            max(data$distend)),
-                                     max(cutpoints)),
+ds <- function(data, truncation=ifelse(is.null(data$distend),
+                                       ifelse(is.null(cutpoints),
+                                              max(data$distance),
+                                              max(cutpoints)),
+                                       max(data$distend)),
              transect="line",
              formula=~1, key=c("hn", "hr", "unif"),
              adjustment=c("cos", "herm", "poly"),
@@ -383,6 +382,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
   sample_table <- data$sample.table
   obs_table    <- data$obs.table
   data         <- data$data
+
+  # remove distbegin and distend if they already exist
+  if(any(names(data)=="distend") && any(names(data)=="distbegin") && !is.null(cutpoints)){
+    warning("Data has distend and distbegin columns, cutpoints argument will be ignored.", immediate. = TRUE, call. = FALSE)
+    cutpoints <- NULL
+  }
 
   # setup left and right truncation (width)
   truncation <- get_truncation(truncation, cutpoints, data)
@@ -395,7 +400,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
       message("Columns \"distbegin\" and \"distend\" in data: performing a binned analysis...")
       binned <- TRUE
       breaks <- sort(unique(c(data$distend, data$distbegin)))
-      data$distance <- (data$distend + data$distbegin)/2
     }else{
       binned <- FALSE
       breaks <- NULL
@@ -409,13 +413,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
     }else if(cutpoints[1]!=0){
       stop("The first cutpoint must be 0 or the left truncation distance!")
     }
-
-    # remove distbegin and distend if they already exist
-    if(any(names(data)=="distend") & any(names(data)=="distbegin")){
-      message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.")
-      data$distend <- NULL
-      data$distbegin <- NULL
-    }
     # send off to create_bins to make the correct columns in data
     data <- create_bins(data, cutpoints)
     binned <- TRUE
@@ -784,7 +781,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
       message("No survey area information supplied, only estimating detection function.\n")
     }
   }
-
+
+  # Now add distance column after all the fitting if using distbegin/distend
+  if(!is.null(data$distbegin)){
+    model$data$distance <- (model$data$distend + model$data$distbegin)/2
+  }
+
   # construct return object
   ret.obj <- list(ddf  = model,
                   dht  = dht.res,

diff --git a/man/ds.Rd b/man/ds.Rd
diff --git a/tests/testthat/test_binned.R b/tests/testthat/test_binned.R
@@ -0,0 +1,67 @@
+par.tol <- 1e-5
+N.tol <- 1e-3
+lnl.tol <- 1e-4
+
+# Create test dataset
+bin.data <- data.frame(Region.Label = "Region",
+                       Area = 100,
+                       Sample.Label = c(rep("A",10), rep("B",10), rep("C",13)),
+                       Effort = 10,
+                       distance = c(rep(5,11), rep(15,10), rep(35,9), rep(75,3)))
+
+
+context("Binned Analyses")
+
+test_that("binned analysis arguments are interpreted correctly",{
+  skip_on_cran()
+
+  # Analysis using distance and cutpoints
+  bin.fit <- ds(bin.data,
+                truncation = 100,
+                formula = ~1,
+                key = "hr",
+                nadj = 0,
+                cutpoints = c(0,10,20,50,100))
+
+  #add distbegin and distend to the data
+  BEbin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
+  BEbin.data$distance <- NULL
+
+  # Should produce the same analysis
+  bin.fit2 <- ds(BEbin.data,
+                 truncation = 100,
+                 formula = ~1,
+                 key = "hr",
+                 nadj = 0)
+
+  library(testthat)
+  expect_equal(bin.fit$ddf$par, bin.fit2$ddf$par)
+  expect_equal(bin.fit$ddf$meta.data, bin.fit2$ddf$meta.data)
+
+  # Test warning if user provides a distance column as well as distbegin and distend
+  both.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
+  expect_warning(bin.fit3 <- ds(both.bin.data,
+                                truncation = 100,
+                                formula = ~1,
+                                key = "hr",
+                                nadj = 0),
+                 "You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.")
+
+  expect_equal(bin.fit$ddf$meta.data, bin.fit3$ddf$meta.data)
+
+  # Check there is an error if both distbegin and distend are not supplied together
+  begin.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
+  begin.bin.data$distend <- NULL
+  expect_error(ds(begin.bin.data, truncation = 100,
+                  formula = ~1, key = "hr", nadj = 0),
+               "You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.")
+
+  # Check that is it not using cutpoints when distbegin and distend are in the data
+  expect_warning(bin.fit4 <- ds(BEbin.data, truncation = 100,
+                                formula = ~1, key = "hr", nadj = 0, 
+                                cutpoints = c(0,20,50,100)),
+                 "Data has distend and distbegin columns, cutpoints argument will be ignored.")
+
+  expect_equal(bin.fit$ddf$meta.data, bin.fit4$ddf$meta.data)
+
+})