diff --git a/DESCRIPTION b/DESCRIPTION index 53d6205..816743e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,12 +16,12 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9010 +Version: 2.0.0.9011 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB Depends: - R (>= 3.5.0), + R (>= 4.1.0), mrds (>= 3.0.0) Imports: dplyr, diff --git a/NEWS.md b/NEWS.md index 0d8eef7..e615e02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * Users can alternatively pass a list of models to summarize_ds_models rather than passing them individually. (Issue #149) * Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175) * print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178) +* Issues a warning when truncation is provided as a character but without the explicit % sign. (Issue #166) Enhancements diff --git a/R/ds.R b/R/ds.R index 6a3f16e..3306ce4 100644 --- a/R/ds.R +++ b/R/ds.R @@ -13,9 +13,9 @@ #' file" and not supply `region_table`, `sample_table` and `obs_table`, see #' "Data format", below and [`flatfile`][flatfile]. #' @param truncation either truncation distance (numeric, e.g. 5) or percentage -#' (as a string, e.g. "15%"). Can be supplied as a `list` with elements `left` -#' and `right` if left truncation is required (e.g. `list(left=1,right=20)` or -#' `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By +#' (as a string, e.g. "15%","15"). Can be supplied as a `list` with elements +#' `left` and `right` if left truncation is required (e.g. `list(left=1,right=20)` +#' or `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By #' default for exact distances the maximum observed distance is used as the #' right truncation. When the data is binned, the right truncation is the #' largest bin end point. Default left truncation is set to zero. diff --git a/R/get_truncation.R b/R/get_truncation.R index 7662029..c35efd3 100644 --- a/R/get_truncation.R +++ b/R/get_truncation.R @@ -1,61 +1,63 @@ get_truncation <- function(truncation, cutpoints, data){ - - if(is.null(truncation)){ - stop("Please supply truncation distance or percentage.") - }else if(any(unlist(lapply(truncation, is.character))) & - (!is.null(cutpoints) | - any(c("distbegin", "distend") %in% colnames(data)) - )){ - stop("Truncation cannot be supplied as a percentage with binned data") - }else{ - # if we have left truncation too... - if(is.list(truncation)){ - if((any(names(truncation)=="left") & - any(names(truncation)=="right")) & - length(truncation)==2){ - - # check for each of left and right that we have % or distance... - # left - if(is.double(truncation$left) & length(truncation$left)==1){ - left <- truncation$left - }else if(is.character(truncation$left) & length(truncation$left)==1){ - # % string to number - truncation$left <- as.numeric(sub("%","",truncation$left)) - left <- quantile(data$distance, probs=truncation$left/100, - na.rm=TRUE) - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - # right - if(is.double(truncation$right) & length(truncation$right)==1){ - width <- truncation$right - }else if(is.character(truncation$right) & length(truncation$right)==1){ - # % string to number - truncation$right <- as.numeric(sub("%", "", truncation$right)) - width <- quantile(data$distance, probs=1-(truncation$right/100), - na.rm=TRUE) - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - - # just right truncation - }else if(is.numeric(truncation) & length(truncation)==1){ - width <- truncation - left <- NULL - }else if(is.character(truncation) & length(truncation)==1){ - # % string to number - truncation <- as.numeric(sub("%","",truncation)) - width <- quantile(data$distance, probs=1-(truncation/100), na.rm=TRUE) - left <- NULL - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } + + # Check truncation is specified + if (is.null(truncation)) { + stop("Please supply truncation distance or percentage.", call. = FALSE) + } + + # Check the structure of the truncation + if (!(length(truncation) == 1 || (is.list(truncation) && + length(truncation) == 2 && + all(c("left", "right") %in% names(truncation)) && + all(sapply(truncation, function(el) length(el) == 1))))) { + stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.", call. = FALSE) + } + + # Check if truncation is character (i.e., contains %) and if used with binned data + if (any(sapply(truncation, is.character)) && + (!is.null(cutpoints) || any(c("distbegin", "distend") %in% colnames(data)))) { + stop("Truncation cannot be supplied as a percentage with binned data.", call. = FALSE) } - # Final check that truncation is not greater than largest cutpoint + # Check if any truncation value is a character but doesn't contains the "%" symbol + if(any(sapply(truncation, function(x){is.character(x) && !grepl("%", x)}))){ + warning("Truncation values supplied as characters will be interpreted as % truncation values.", call. = FALSE) + } + + # Helper function to convert percentage strings to quantiles + percent_trunc <- function(value, side = c("left", "right")) { + value_num <- as.numeric(sub("%", "", value)) + prob <- if (side == "left") value_num / 100 else 1 - (value_num / 100) + quantile(data$distance, probs = prob, na.rm = TRUE) + } + + # Get the truncation values + if(is.list(truncation)){ + + # Left truncation + if(is.numeric(truncation$left)){ + left <- truncation$left + } else if(is.character(truncation$left)){ + left <- percent_trunc(truncation$left, "left") + } else stop("Left truncation must be a number or percentage string.") + + # Right truncation + if(is.numeric(truncation$right)){ + width <- truncation$right + } else if(is.character(truncation$right)){ + width <- percent_trunc(truncation$right, "right") + } else stop("Right truncation must be a number or percentage string.") + + #If it is not a list + } else if(is.numeric(truncation)) { + width <- truncation + left <- NULL + } else if (is.character(truncation)){ + width <- percent_trunc(truncation, "right") + left <- NULL + } else stop("Right truncation must be a number or percentage string.") + + # Now check that truncation is not greater than largest cutpoint if binned data if(!is.null(cutpoints)){ if(width > cutpoints[length(cutpoints)]){ warning(paste("Truncation width is greater than the largest bin distance, re-setting truncation to be largest cutpoint value: ", cutpoints[length(cutpoints)], sep = ""), immediate. = TRUE, call. = FALSE) @@ -65,4 +67,4 @@ get_truncation <- function(truncation, cutpoints, data){ } list(left=left, width=width) -} +} \ No newline at end of file diff --git a/man/ds.Rd b/man/ds.Rd index f109313..7481a77 100644 --- a/man/ds.Rd +++ b/man/ds.Rd @@ -54,9 +54,9 @@ file" and not supply \code{region_table}, \code{sample_table} and \code{obs_tabl "Data format", below and \code{\link{flatfile}}.} \item{truncation}{either truncation distance (numeric, e.g. 5) or percentage -(as a string, e.g. "15\%"). Can be supplied as a \code{list} with elements \code{left} -and \code{right} if left truncation is required (e.g. \code{list(left=1,right=20)} or -\code{list(left="1\%",right="15\%")} or even \code{list(left="1",right="15\%")}). By +(as a string, e.g. "15\%","15"). Can be supplied as a \code{list} with elements +\code{left} and \code{right} if left truncation is required (e.g. \code{list(left=1,right=20)} +or \code{list(left="1\%",right="15\%")} or even \code{list(left="1",right="15\%")}). By default for exact distances the maximum observed distance is used as the right truncation. When the data is binned, the right truncation is the largest bin end point. Default left truncation is set to zero.} diff --git a/tests/testthat/test_truncation.R b/tests/testthat/test_truncation.R index 3e51977..0239919 100644 --- a/tests/testthat/test_truncation.R +++ b/tests/testthat/test_truncation.R @@ -20,32 +20,63 @@ test_that("Truncation errors are thrown correctly",{ # percentage truncation with bins expect_error(ds(egdata, truncation="10%", cutpoints=seq(1, max(egdata$distance), length.out=10)), - "Truncation cannot be supplied as a percentage with binned data") + "Truncation cannot be supplied as a percentage with binned data.") + + # wrong format - vector not list for 2 values + expect_error(ds(egdata, truncation=c(left = 0, right = "10")), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many left truncations expect_error(ds(egdata, truncation=list(left=c(0, 0), right=3.5)), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") + # too many right truncations expect_error(ds(egdata, truncation=list(left=0, right=c(3.5, 3.5))), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many characterleft truncations - expect_error(ds(egdata, truncation=list(left=c("0", "0"), right=3.5)), + expect_error(ds(egdata, truncation=list(left=c("0%", "0%"), right=3.5)), "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") # too many character right truncations - expect_error(ds(egdata, truncation=list(left=0, right=c("3.5", "3.5"))), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + expect_error(ds(egdata, truncation=list(left=0, right=c("3.5%", "3.5%"))), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many truncations - expect_error(ds(egdata, truncation=list(left=0, right="3.5", boop="3.5")), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + expect_error(ds(egdata, truncation=list(left=0, right="3.5%", boop="3.5%")), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # left and cutpoints[1] don't match expect_error(ds(egdata, truncation=list(left=0, right=max(egdata$distance)), cutpoints=seq(1, max(egdata$distance), length.out=10)), "The first cutpoint must be 0 or the left truncation distance!") - # message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.") + # message("data already has distend and distbegin columns, removing them and applying binning as specified by cutpoints.") + + # percentage truncation with no percent signs + expect_warning(eg.1 <- ds(egdata, truncation=list(left = 0, right = "10")), + "Truncation values supplied as characters will be interpreted as % truncation values.") + + eg.2 <- ds(egdata, truncation=list(left = 0, right = "10%")) + + # Should be the same + expect_equal(eg.1$ddf$Nhat, eg.2$ddf$Nhat) + # Check its calculated the quantile + expect_equal(quantile(egdata$distance, probs = 0.9), eg.1$ddf$meta.data$width) + expect_equal(quantile(egdata$distance, probs = 0.9), eg.2$ddf$meta.data$width) + + expect_warning(eg.3 <- ds(egdata, truncation=list(left = "10", right = "10%")), + "Truncation values supplied as characters will be interpreted as % truncation values.") + # Check left truncation as a percentage + expect_equal(quantile(egdata$distance, probs = 0.1), eg.3$ddf$meta.data$left) + + expect_warning(ds(egdata, truncation="10"), + "Truncation values supplied as characters will be interpreted as % truncation values.") + + # Check integer and double work the same + eg.4 <- ds(egdata, truncation = 3L) + eg.5 <- ds(egdata, truncation = 3) + expect_equal(eg.4$ddf$fitted, eg.5$ddf$fitted) + expect_equal(eg.4$ddf$Nhat, eg.5$ddf$Nhat) })