Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
Horvitz-Thompson-like estimator) if survey area information is provided. See
Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
methods and <https://distancesampling.org/resources/vignettes.html> for example analyses.
Version: 2.0.0.9012
Version: 2.0.0.9013
URL: https://github.com/DistanceDevelopment/Distance/
BugReports: https://github.com/DistanceDevelopment/Distance/issues
Language: en-GB
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Enhancements

* Warnings and documentation clarification regarding ER variance estimation when there is only a single transect. (Issue #192 and mrds Issue #115)
* Code is more robust to the different ways of defining a binned analysis and documentation has been clarified. (Issue #144)

# Distance 2.0.0

Expand Down
20 changes: 14 additions & 6 deletions R/checkdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,27 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
}
}


# Make sure that the user has only specified either distance or distend / distbegin (need to do this check first as then Distance creates the distance column)
# Make sure if the user is using distbegin and distend they have supplied both.
if(any(!is.null(data$distbegin), !is.null(data$distend)) && !all(!is.null(data$distbegin), !is.null(data$distend))){
stop("You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.", call. = FALSE)
}
# Make sure that the user has only specified either distance or distend / distbegin
if(!is.null(data$distance) && !is.null(data$distbegin) && !is.null(data$distend)){
stop("You can only specify either a 'distance' column or 'distbegin' and 'distend' columns in your data.", call. = FALSE)
warning("You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.", call. = FALSE, immediate. = TRUE)
data$distance <- NULL
}

# make sure that the data are in the right format first
if(is.null(data$distance)){
if(is.null(data$distend) & is.null(data$distbegin)){
stop("Your data must (at least) have a column called 'distance' or 'distbegin' and 'distend'!", call. = FALSE)
}else{
data$distance <- (data$distend + data$distbegin)/2
#data$distance <- (data$distend + data$distbegin)/2
}
# distance column name to avoid creating a distance column
distance.col <- "distbegin"
}else{
distance.col <- "distance"
}

# make sure that we have a data.frame()
Expand All @@ -58,7 +66,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
# check that the object IDs are unique
# first need to remove the rows with NA distances used for padding
# below
data_no_NA <- data[!is.na(data$distance), ]
data_no_NA <- data[!is.na(data[[distance.col]]), ]
if(length(data_no_NA$object) != length(unique(data_no_NA$object))){
stop("Not all object IDs are unique, check data.")
}
Expand Down Expand Up @@ -130,7 +138,7 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
rownames(data) <- 1:nrow(data)

# remove the NA rows
data <- data[!is.na(data$distance),]
data <- data[!is.na(data[[distance.col]]),]
}else if(all(tolower(c("Region.Label", "Sample.Label",
"Effort", "object")) %in%
tolower(colnames(data)))){
Expand Down
40 changes: 21 additions & 19 deletions R/ds.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,10 @@
#' is the recommended approach for all standard binned analyses.
#' Ensure that the first element is 0 (or the left truncation
#' distance) and the last is the distance to the end of the furthest bin.
#' (Default `NULL`, no binning.) If you have provided `distbegin` and `distend`
#' columns in your data (note this should only be used when your cutpoints
#' are not constant across all your data, e.g. planes flying at differing
#' altitudes) then do not specify the cutpoints argument as this will cause
#' the `distbegin` and `distend` columns in your data to be overwritten.
#' (Default `NULL`, no binning.) Provide `distbegin` and `distend` columns
#' in your data only when your cutpoints are not constant across all your
#' data, e.g. planes flying at differing altitudes then do not specify the
#' cutpoints argument.
#' @param monotonicity should the detection function be constrained for
#' monotonicity weakly (`"weak"`), strictly (`"strict"`) or not at all
#' (`"none"` or `FALSE`). See Monotonicity, below. (Default `"strict"`). By
Expand Down Expand Up @@ -321,11 +320,11 @@
#' AIC(ds.model.cos2)
#' AIC(ds.model.cos23)
#'}
ds <- function(data, truncation=ifelse(is.null(cutpoints),
ifelse(is.null(data$distend),
max(data$distance),
max(data$distend)),
max(cutpoints)),
ds <- function(data, truncation=ifelse(is.null(data$distend),
ifelse(is.null(cutpoints),
max(data$distance),
max(cutpoints)),
max(data$distend)),
transect="line",
formula=~1, key=c("hn", "hr", "unif"),
adjustment=c("cos", "herm", "poly"),
Expand Down Expand Up @@ -383,6 +382,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
sample_table <- data$sample.table
obs_table <- data$obs.table
data <- data$data

# remove distbegin and distend if they already exist
if(any(names(data)=="distend") && any(names(data)=="distbegin") && !is.null(cutpoints)){
warning("Data has distend and distbegin columns, cutpoints argument will be ignored.", immediate. = TRUE, call. = FALSE)
cutpoints <- NULL
}

# setup left and right truncation (width)
truncation <- get_truncation(truncation, cutpoints, data)
Expand All @@ -395,7 +400,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
message("Columns \"distbegin\" and \"distend\" in data: performing a binned analysis...")
binned <- TRUE
breaks <- sort(unique(c(data$distend, data$distbegin)))
data$distance <- (data$distend + data$distbegin)/2
}else{
binned <- FALSE
breaks <- NULL
Expand All @@ -409,13 +413,6 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
}else if(cutpoints[1]!=0){
stop("The first cutpoint must be 0 or the left truncation distance!")
}

# remove distbegin and distend if they already exist
if(any(names(data)=="distend") & any(names(data)=="distbegin")){
message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.")
data$distend <- NULL
data$distbegin <- NULL
}
# send off to create_bins to make the correct columns in data
data <- create_bins(data, cutpoints)
binned <- TRUE
Expand Down Expand Up @@ -784,7 +781,12 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
message("No survey area information supplied, only estimating detection function.\n")
}
}


# Now add distance column after all the fitting if using distbegin/distend
if(!is.null(data$distbegin)){
model$data$distance <- (model$data$distend + model$data$distbegin)/2
}

# construct return object
ret.obj <- list(ddf = model,
dht = dht.res,
Expand Down
13 changes: 6 additions & 7 deletions man/ds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 67 additions & 0 deletions tests/testthat/test_binned.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
par.tol <- 1e-5
N.tol <- 1e-3
lnl.tol <- 1e-4

# Create test dataset
bin.data <- data.frame(Region.Label = "Region",
Area = 100,
Sample.Label = c(rep("A",10), rep("B",10), rep("C",13)),
Effort = 10,
distance = c(rep(5,11), rep(15,10), rep(35,9), rep(75,3)))


context("Binned Analyses")

test_that("binned analysis arguments are interpreted correctly",{
skip_on_cran()

# Analysis using distance and cutpoints
bin.fit <- ds(bin.data,
truncation = 100,
formula = ~1,
key = "hr",
nadj = 0,
cutpoints = c(0,10,20,50,100))

#add distbegin and distend to the data
BEbin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
BEbin.data$distance <- NULL

# Should produce the same analysis
bin.fit2 <- ds(BEbin.data,
truncation = 100,
formula = ~1,
key = "hr",
nadj = 0)

library(testthat)
expect_equal(bin.fit$ddf$par, bin.fit2$ddf$par)
expect_equal(bin.fit$ddf$meta.data, bin.fit2$ddf$meta.data)

# Test warning if user provides a distance column as well as distbegin and distend
both.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
expect_warning(bin.fit3 <- ds(both.bin.data,
truncation = 100,
formula = ~1,
key = "hr",
nadj = 0),
"You have supplied both a 'distance' column and 'distbegin' and 'distend' columns in your data, the distance column will be removed and not used in these analyses.")

expect_equal(bin.fit$ddf$meta.data, bin.fit3$ddf$meta.data)

# Check there is an error if both distbegin and distend are not supplied together
begin.bin.data <- create_bins(bin.data, cutpoints = c(0,10,20,50,100))
begin.bin.data$distend <- NULL
expect_error(ds(begin.bin.data, truncation = 100,
formula = ~1, key = "hr", nadj = 0),
"You have provided either a 'distbegin' or 'distend' column in your dataset but not both. Please provide both or remove these and provide a distance column and use the cutpoint argument.")

# Check that is it not using cutpoints when distbegin and distend are in the data
expect_warning(bin.fit4 <- ds(BEbin.data, truncation = 100,
formula = ~1, key = "hr", nadj = 0,
cutpoints = c(0,20,50,100)),
"Data has distend and distbegin columns, cutpoints argument will be ignored.")

expect_equal(bin.fit$ddf$meta.data, bin.fit4$ddf$meta.data)

})
Loading