diff --git a/DESCRIPTION b/DESCRIPTION index 1f8fb1b..c0d583e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9012 +Version: 2.0.0.9013 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB diff --git a/NEWS.md b/NEWS.md index 2436bf5..29f9230 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ Enhancements * Warnings and documentation clarification regarding ER variance estimation when there is only a single transect. (Issue #192 and mrds Issue #115) +* Code is more robust to the different ways of defining a binned analysis and documentation has been clarified. (Issue #144) # Distance 2.0.0 diff --git a/R/checkdata.R b/R/checkdata.R index e123225..1196c79 100644 --- a/R/checkdata.R +++ b/R/checkdata.R @@ -25,10 +25,14 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL, } } - - # Make sure that the user has only specified either distance or distend / distbegin (need to do this check first as then Distance creates the distance column) + # Make sure if the user is using distbegin and distend they have supplied both. + if(any(!is.null(data$distbegin), !is.null(data$distend)) && !all(!is.null(data$distbegin), !is.null(data$distend))){ + stop("You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.", call. = FALSE) + } + # Make sure that the user has only specified either distance or distend / distbegin if(!is.null(data$distance) && !is.null(data$distbegin) && !is.null(data$distend)){ - stop("You can only specify either a 'distance' column or 'distbegin' and 'distend' columns in your data.", call. = FALSE) + warning("You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.", call. = FALSE, immediate. = TRUE) + data$distance <- NULL } # make sure that the data are in the right format first @@ -36,8 +40,12 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL, if(is.null(data$distend) & is.null(data$distbegin)){ stop("Your data must (at least) have a column called 'distance' or 'distbegin' and 'distend'!", call. = FALSE) }else{ - data$distance <- (data$distend + data$distbegin)/2 + #data$distance <- (data$distend + data$distbegin)/2 } + # distance column name to avoid creating a distance column + distance.col <- "distbegin" + }else{ + distance.col <- "distance" } # make sure that we have a data.frame() @@ -58,7 +66,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL, # check that the object IDs are unique # first need to remove the rows with NA distances used for padding # below - data_no_NA <- data[!is.na(data$distance), ] + data_no_NA <- data[!is.na(data[[distance.col]]), ] if(length(data_no_NA$object) != length(unique(data_no_NA$object))){ stop("Not all object IDs are unique, check data.") } @@ -130,7 +138,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL, rownames(data) <- 1:nrow(data) # remove the NA rows - data <- data[!is.na(data$distance),] + data <- data[!is.na(data[[distance.col]]),] }else if(all(tolower(c("Region.Label", "Sample.Label", "Effort", "object")) %in% tolower(colnames(data)))){ diff --git a/R/ds.R b/R/ds.R index 3306ce4..346a8bc 100644 --- a/R/ds.R +++ b/R/ds.R @@ -58,11 +58,10 @@ #' is the recommended approach for all standard binned analyses. #' Ensure that the first element is 0 (or the left truncation #' distance) and the last is the distance to the end of the furthest bin. -#' (Default `NULL`, no binning.) If you have provided `distbegin` and `distend` -#' columns in your data (note this should only be used when your cutpoints -#' are not constant across all your data, e.g. planes flying at differing -#' altitudes) then do not specify the cutpoints argument as this will cause -#' the `distbegin` and `distend` columns in your data to be overwritten. +#' (Default `NULL`, no binning.) Provide `distbegin` and `distend` columns +#' in your data only when your cutpoints are not constant across all your +#' data, e.g. planes flying at differing altitudes then do not specify the +#' cutpoints argument. #' @param monotonicity should the detection function be constrained for #' monotonicity weakly (`"weak"`), strictly (`"strict"`) or not at all #' (`"none"` or `FALSE`). See Monotonicity, below. (Default `"strict"`). By @@ -321,11 +320,11 @@ #' AIC(ds.model.cos2) #' AIC(ds.model.cos23) #'} -ds <- function(data, truncation=ifelse(is.null(cutpoints), - ifelse(is.null(data$distend), - max(data$distance), - max(data$distend)), - max(cutpoints)), +ds <- function(data, truncation=ifelse(is.null(data$distend), + ifelse(is.null(cutpoints), + max(data$distance), + max(cutpoints)), + max(data$distend)), transect="line", formula=~1, key=c("hn", "hr", "unif"), adjustment=c("cos", "herm", "poly"), @@ -383,6 +382,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints), sample_table <- data$sample.table obs_table <- data$obs.table data <- data$data + + # remove distbegin and distend if they already exist + if(any(names(data)=="distend") && any(names(data)=="distbegin") && !is.null(cutpoints)){ + warning("Data has distend and distbegin columns, cutpoints argument will be ignored.", immediate. = TRUE, call. = FALSE) + cutpoints <- NULL + } # setup left and right truncation (width) truncation <- get_truncation(truncation, cutpoints, data) @@ -395,7 +400,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints), message("Columns \"distbegin\" and \"distend\" in data: performing a binned analysis...") binned <- TRUE breaks <- sort(unique(c(data$distend, data$distbegin))) - data$distance <- (data$distend + data$distbegin)/2 }else{ binned <- FALSE breaks <- NULL @@ -409,13 +413,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints), }else if(cutpoints[1]!=0){ stop("The first cutpoint must be 0 or the left truncation distance!") } - - # remove distbegin and distend if they already exist - if(any(names(data)=="distend") & any(names(data)=="distbegin")){ - message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.") - data$distend <- NULL - data$distbegin <- NULL - } # send off to create_bins to make the correct columns in data data <- create_bins(data, cutpoints) binned <- TRUE @@ -784,7 +781,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints), message("No survey area information supplied, only estimating detection function.\n") } } - + + # Now add distance column after all the fitting if using distbegin/distend + if(!is.null(data$distbegin)){ + model$data$distance <- (model$data$distend + model$data$distbegin)/2 + } + # construct return object ret.obj <- list(ddf = model, dht = dht.res, diff --git a/man/ds.Rd b/man/ds.Rd index 7481a77..8a29ccc 100644 --- a/man/ds.Rd +++ b/man/ds.Rd @@ -7,8 +7,8 @@ data} \usage{ ds( data, - truncation = ifelse(is.null(cutpoints), ifelse(is.null(data$distend), - max(data$distance), max(data$distend)), max(cutpoints)), + truncation = ifelse(is.null(data$distend), ifelse(is.null(cutpoints), + max(data$distance), max(cutpoints)), max(data$distend)), transect = "line", formula = ~1, key = c("hn", "hr", "unif"), @@ -107,11 +107,10 @@ the bins. Supplying a distance column in your data and specifying cutpoints is the recommended approach for all standard binned analyses. Ensure that the first element is 0 (or the left truncation distance) and the last is the distance to the end of the furthest bin. -(Default \code{NULL}, no binning.) If you have provided \code{distbegin} and \code{distend} -columns in your data (note this should only be used when your cutpoints -are not constant across all your data, e.g. planes flying at differing -altitudes) then do not specify the cutpoints argument as this will cause -the \code{distbegin} and \code{distend} columns in your data to be overwritten.} +(Default \code{NULL}, no binning.) Provide \code{distbegin} and \code{distend} columns +in your data only when your cutpoints are not constant across all your +data, e.g. planes flying at differing altitudes then do not specify the +cutpoints argument.} \item{dht_group}{should density abundance estimates consider all groups to be size 1 (abundance of groups) \code{dht_group=TRUE} or should the abundance of diff --git a/tests/testthat/test_binned.R b/tests/testthat/test_binned.R new file mode 100644 index 0000000..757378e --- /dev/null +++ b/tests/testthat/test_binned.R @@ -0,0 +1,67 @@ +par.tol <- 1e-5 +N.tol <- 1e-3 +lnl.tol <- 1e-4 + +# Create test dataset +bin.data <- data.frame(Region.Label = "Region", + Area = 100, + Sample.Label = c(rep("A",10), rep("B",10), rep("C",13)), + Effort = 10, + distance = c(rep(5,11), rep(15,10), rep(35,9), rep(75,3))) + + +context("Binned Analyses") + +test_that("binned analysis arguments are interpreted correctly",{ + skip_on_cran() + + # Analysis using distance and cutpoints + bin.fit <- ds(bin.data, + truncation = 100, + formula = ~1, + key = "hr", + nadj = 0, + cutpoints = c(0,10,20,50,100)) + + #add distbegin and distend to the data + BEbin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100)) + BEbin.data$distance <- NULL + + # Should produce the same analysis + bin.fit2 <- ds(BEbin.data, + truncation = 100, + formula = ~1, + key = "hr", + nadj = 0) + + library(testthat) + expect_equal(bin.fit$ddf$par, bin.fit2$ddf$par) + expect_equal(bin.fit$ddf$meta.data, bin.fit2$ddf$meta.data) + + # Test warning if user provides a distance column as well as distbegin and distend + both.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100)) + expect_warning(bin.fit3 <- ds(both.bin.data, + truncation = 100, + formula = ~1, + key = "hr", + nadj = 0), + "You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.") + + expect_equal(bin.fit$ddf$meta.data, bin.fit3$ddf$meta.data) + + # Check there is an error if both distbegin and distend are not supplied together + begin.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100)) + begin.bin.data$distend <- NULL + expect_error(ds(begin.bin.data, truncation = 100, + formula = ~1, key = "hr", nadj = 0), + "You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.") + + # Check that is it not using cutpoints when distbegin and distend are in the data + expect_warning(bin.fit4 <- ds(BEbin.data, truncation = 100, + formula = ~1, key = "hr", nadj = 0, + cutpoints = c(0,20,50,100)), + "Data has distend and distbegin columns, cutpoints argument will be ignored.") + + expect_equal(bin.fit$ddf$meta.data, bin.fit4$ddf$meta.data) + +})