Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ Description: A simple way of fitting detection functions to distance sampling
Horvitz-Thompson-like estimator) if survey area information is provided. See
Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
methods and <https://distancesampling.org/resources/vignettes.html> for example analyses.
Version: 2.0.0.9010
Version: 2.0.0.9011
URL: https://github.com/DistanceDevelopment/Distance/
BugReports: https://github.com/DistanceDevelopment/Distance/issues
Language: en-GB
Depends:
R (>= 3.5.0),
R (>= 4.1.0),
mrds (>= 3.0.0)
Imports:
dplyr,
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Users can alternatively pass a list of models to summarize_ds_models rather than passing them individually. (Issue #149)
* Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175)
* print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178)
* Issues a warning when truncation is provided as a character but without the explicit % sign. (Issue #166)

Enhancements

Expand Down
6 changes: 3 additions & 3 deletions R/ds.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#' file" and not supply `region_table`, `sample_table` and `obs_table`, see
#' "Data format", below and [`flatfile`][flatfile].
#' @param truncation either truncation distance (numeric, e.g. 5) or percentage
#' (as a string, e.g. "15%"). Can be supplied as a `list` with elements `left`
#' and `right` if left truncation is required (e.g. `list(left=1,right=20)` or
#' `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By
#' (as a string, e.g. "15%","15"). Can be supplied as a `list` with elements
#' `left` and `right` if left truncation is required (e.g. `list(left=1,right=20)`
#' or `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By
#' default for exact distances the maximum observed distance is used as the
#' right truncation. When the data is binned, the right truncation is the
#' largest bin end point. Default left truncation is set to zero.
Expand Down
114 changes: 58 additions & 56 deletions R/get_truncation.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,63 @@
get_truncation <- function(truncation, cutpoints, data){

if(is.null(truncation)){
stop("Please supply truncation distance or percentage.")
}else if(any(unlist(lapply(truncation, is.character))) &
(!is.null(cutpoints) |
any(c("distbegin", "distend") %in% colnames(data))
)){
stop("Truncation cannot be supplied as a percentage with binned data")
}else{
# if we have left truncation too...
if(is.list(truncation)){
if((any(names(truncation)=="left") &
any(names(truncation)=="right")) &
length(truncation)==2){

# check for each of left and right that we have % or distance...
# left
if(is.double(truncation$left) & length(truncation$left)==1){
left <- truncation$left
}else if(is.character(truncation$left) & length(truncation$left)==1){
# % string to number
truncation$left <- as.numeric(sub("%","",truncation$left))
left <- quantile(data$distance, probs=truncation$left/100,
na.rm=TRUE)
}else{
stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
}
# right
if(is.double(truncation$right) & length(truncation$right)==1){
width <- truncation$right
}else if(is.character(truncation$right) & length(truncation$right)==1){
# % string to number
truncation$right <- as.numeric(sub("%", "", truncation$right))
width <- quantile(data$distance, probs=1-(truncation$right/100),
na.rm=TRUE)
}else{
stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
}
}else{
stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
}

# just right truncation
}else if(is.numeric(truncation) & length(truncation)==1){
width <- truncation
left <- NULL
}else if(is.character(truncation) & length(truncation)==1){
# % string to number
truncation <- as.numeric(sub("%","",truncation))
width <- quantile(data$distance, probs=1-(truncation/100), na.rm=TRUE)
left <- NULL
}else{
stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
}

# Check truncation is specified
if (is.null(truncation)) {
stop("Please supply truncation distance or percentage.", call. = FALSE)
}

# Check the structure of the truncation
if (!(length(truncation) == 1 || (is.list(truncation) &&
length(truncation) == 2 &&
all(c("left", "right") %in% names(truncation)) &&
all(sapply(truncation, function(el) length(el) == 1))))) {
stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.", call. = FALSE)
}

# Check if truncation is character (i.e., contains %) and if used with binned data
if (any(sapply(truncation, is.character)) &&
(!is.null(cutpoints) || any(c("distbegin", "distend") %in% colnames(data)))) {
stop("Truncation cannot be supplied as a percentage with binned data.", call. = FALSE)
}

# Final check that truncation is not greater than largest cutpoint
# Check if any truncation value is a character but doesn't contains the "%" symbol
if(any(sapply(truncation, function(x){is.character(x) && !grepl("%", x)}))){
warning("Truncation values supplied as characters will be interpreted as % truncation values.", call. = FALSE)
}

# Helper function to convert percentage strings to quantiles
percent_trunc <- function(value, side = c("left", "right")) {
value_num <- as.numeric(sub("%", "", value))
prob <- if (side == "left") value_num / 100 else 1 - (value_num / 100)
quantile(data$distance, probs = prob, na.rm = TRUE)
}

# Get the truncation values
if(is.list(truncation)){

# Left truncation
if(is.numeric(truncation$left)){
left <- truncation$left
} else if(is.character(truncation$left)){
left <- percent_trunc(truncation$left, "left")
} else stop("Left truncation must be a number or percentage string.")

# Right truncation
if(is.numeric(truncation$right)){
width <- truncation$right
} else if(is.character(truncation$right)){
width <- percent_trunc(truncation$right, "right")
} else stop("Right truncation must be a number or percentage string.")

#If it is not a list
} else if(is.numeric(truncation)) {
width <- truncation
left <- NULL
} else if (is.character(truncation)){
width <- percent_trunc(truncation, "right")
left <- NULL
} else stop("Right truncation must be a number or percentage string.")

# Now check that truncation is not greater than largest cutpoint if binned data
if(!is.null(cutpoints)){
if(width > cutpoints[length(cutpoints)]){
warning(paste("Truncation width is greater than the largest bin distance, re-setting truncation to be largest cutpoint value: ", cutpoints[length(cutpoints)], sep = ""), immediate. = TRUE, call. = FALSE)
Expand All @@ -65,4 +67,4 @@ get_truncation <- function(truncation, cutpoints, data){
}

list(left=left, width=width)
}
}
6 changes: 3 additions & 3 deletions man/ds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 40 additions & 9 deletions tests/testthat/test_truncation.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,63 @@ test_that("Truncation errors are thrown correctly",{
# percentage truncation with bins
expect_error(ds(egdata, truncation="10%",
cutpoints=seq(1, max(egdata$distance), length.out=10)),
"Truncation cannot be supplied as a percentage with binned data")
"Truncation cannot be supplied as a percentage with binned data.")

# wrong format - vector not list for 2 values
expect_error(ds(egdata, truncation=c(left = 0, right = "10")),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.")

# too many left truncations
expect_error(ds(egdata, truncation=list(left=c(0, 0), right=3.5)),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.")

# too many right truncations
expect_error(ds(egdata, truncation=list(left=0, right=c(3.5, 3.5))),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.")

# too many characterleft truncations
expect_error(ds(egdata, truncation=list(left=c("0", "0"), right=3.5)),
expect_error(ds(egdata, truncation=list(left=c("0%", "0%"), right=3.5)),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
# too many character right truncations
expect_error(ds(egdata, truncation=list(left=0, right=c("3.5", "3.5"))),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
expect_error(ds(egdata, truncation=list(left=0, right=c("3.5%", "3.5%"))),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.")

# too many truncations
expect_error(ds(egdata, truncation=list(left=0, right="3.5", boop="3.5")),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".")
expect_error(ds(egdata, truncation=list(left=0, right="3.5%", boop="3.5%")),
"Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.")

# left and cutpoints[1] don't match
expect_error(ds(egdata, truncation=list(left=0, right=max(egdata$distance)),
cutpoints=seq(1, max(egdata$distance), length.out=10)),
"The first cutpoint must be 0 or the left truncation distance!")

# message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.")
# message("data already has distend and distbegin columns, removing them and applying binning as specified by cutpoints.")

# percentage truncation with no percent signs
expect_warning(eg.1 <- ds(egdata, truncation=list(left = 0, right = "10")),
"Truncation values supplied as characters will be interpreted as % truncation values.")

eg.2 <- ds(egdata, truncation=list(left = 0, right = "10%"))

# Should be the same
expect_equal(eg.1$ddf$Nhat, eg.2$ddf$Nhat)
# Check its calculated the quantile
expect_equal(quantile(egdata$distance, probs = 0.9), eg.1$ddf$meta.data$width)
expect_equal(quantile(egdata$distance, probs = 0.9), eg.2$ddf$meta.data$width)

expect_warning(eg.3 <- ds(egdata, truncation=list(left = "10", right = "10%")),
"Truncation values supplied as characters will be interpreted as % truncation values.")
# Check left truncation as a percentage
expect_equal(quantile(egdata$distance, probs = 0.1), eg.3$ddf$meta.data$left)

expect_warning(ds(egdata, truncation="10"),
"Truncation values supplied as characters will be interpreted as % truncation values.")

# Check integer and double work the same
eg.4 <- ds(egdata, truncation = 3L)
eg.5 <- ds(egdata, truncation = 3)
expect_equal(eg.4$ddf$fitted, eg.5$ddf$fitted)
expect_equal(eg.4$ddf$Nhat, eg.5$ddf$Nhat)

})

Expand Down
Loading