From 6034fc694f8d7cbc87bc0b9d21341a221688e035 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Thu, 1 May 2025 14:31:32 +0100 Subject: [PATCH 1/8] CRAN requirement --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 53d6205..91bf559 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB Depends: - R (>= 3.5.0), + R (>= 4.1.0), mrds (>= 3.0.0) Imports: dplyr, From 3acb79742edd6ed033198b1957239bc5d1013c70 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Thu, 1 May 2025 14:32:07 +0100 Subject: [PATCH 2/8] Update ds docs on truncation --- R/ds.R | 6 +++--- man/ds.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/ds.R b/R/ds.R index 6a3f16e..3306ce4 100644 --- a/R/ds.R +++ b/R/ds.R @@ -13,9 +13,9 @@ #' file" and not supply `region_table`, `sample_table` and `obs_table`, see #' "Data format", below and [`flatfile`][flatfile]. #' @param truncation either truncation distance (numeric, e.g. 5) or percentage -#' (as a string, e.g. "15%"). Can be supplied as a `list` with elements `left` -#' and `right` if left truncation is required (e.g. `list(left=1,right=20)` or -#' `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By +#' (as a string, e.g. "15%","15"). Can be supplied as a `list` with elements +#' `left` and `right` if left truncation is required (e.g. `list(left=1,right=20)` +#' or `list(left="1%",right="15%")` or even `list(left="1",right="15%")`). By #' default for exact distances the maximum observed distance is used as the #' right truncation. When the data is binned, the right truncation is the #' largest bin end point. Default left truncation is set to zero. diff --git a/man/ds.Rd b/man/ds.Rd index f109313..7481a77 100644 --- a/man/ds.Rd +++ b/man/ds.Rd @@ -54,9 +54,9 @@ file" and not supply \code{region_table}, \code{sample_table} and \code{obs_tabl "Data format", below and \code{\link{flatfile}}.} \item{truncation}{either truncation distance (numeric, e.g. 5) or percentage -(as a string, e.g. "15\%"). Can be supplied as a \code{list} with elements \code{left} -and \code{right} if left truncation is required (e.g. \code{list(left=1,right=20)} or -\code{list(left="1\%",right="15\%")} or even \code{list(left="1",right="15\%")}). By +(as a string, e.g. "15\%","15"). Can be supplied as a \code{list} with elements +\code{left} and \code{right} if left truncation is required (e.g. \code{list(left=1,right=20)} +or \code{list(left="1\%",right="15\%")} or even \code{list(left="1",right="15\%")}). By default for exact distances the maximum observed distance is used as the right truncation. When the data is binned, the right truncation is the largest bin end point. Default left truncation is set to zero.} From e3270394165351ab83ef0d278c828051b51a020e Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:22:23 +0100 Subject: [PATCH 3/8] Re-write get_truncation function Add warning if truncation character without % sign --- R/get_truncation.R | 110 ++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/R/get_truncation.R b/R/get_truncation.R index 7662029..f2c28a7 100644 --- a/R/get_truncation.R +++ b/R/get_truncation.R @@ -1,61 +1,59 @@ get_truncation <- function(truncation, cutpoints, data){ - - if(is.null(truncation)){ - stop("Please supply truncation distance or percentage.") - }else if(any(unlist(lapply(truncation, is.character))) & - (!is.null(cutpoints) | - any(c("distbegin", "distend") %in% colnames(data)) - )){ - stop("Truncation cannot be supplied as a percentage with binned data") - }else{ - # if we have left truncation too... - if(is.list(truncation)){ - if((any(names(truncation)=="left") & - any(names(truncation)=="right")) & - length(truncation)==2){ - - # check for each of left and right that we have % or distance... - # left - if(is.double(truncation$left) & length(truncation$left)==1){ - left <- truncation$left - }else if(is.character(truncation$left) & length(truncation$left)==1){ - # % string to number - truncation$left <- as.numeric(sub("%","",truncation$left)) - left <- quantile(data$distance, probs=truncation$left/100, - na.rm=TRUE) - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - # right - if(is.double(truncation$right) & length(truncation$right)==1){ - width <- truncation$right - }else if(is.character(truncation$right) & length(truncation$right)==1){ - # % string to number - truncation$right <- as.numeric(sub("%", "", truncation$right)) - width <- quantile(data$distance, probs=1-(truncation$right/100), - na.rm=TRUE) - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } - - # just right truncation - }else if(is.numeric(truncation) & length(truncation)==1){ - width <- truncation - left <- NULL - }else if(is.character(truncation) & length(truncation)==1){ - # % string to number - truncation <- as.numeric(sub("%","",truncation)) - width <- quantile(data$distance, probs=1-(truncation/100), na.rm=TRUE) - left <- NULL - }else{ - stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") - } + + # Check truncation is specified + if (is.null(truncation)) { + stop("Please supply truncation distance or percentage.", call. = FALSE) + } + + # Check the structure of the truncation + if (!(length(truncation) == 1 || (is.list(truncation) && + length(truncation) == 2 && + all(c("left", "right") %in% names(truncation)) && + all(sapply(truncation, function(el) length(el) == 1))))) { + stop("Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.", call. = FALSE) + } + + # Check if truncation is character (i.e., contains %) and if used with binned data + if (any(sapply(truncation, is.character)) && + (!is.null(cutpoints) || any(c("distbegin", "distend") %in% colnames(data)))) { + stop("Truncation cannot be supplied as a percentage with binned data.", call. = FALSE) } - # Final check that truncation is not greater than largest cutpoint + + # Helper function to convert percentage strings to quantiles + percent_trunc <- function(value, side = c("left", "right")) { + value_num <- as.numeric(sub("%", "", value)) + prob <- if (side == "left") value_num / 100 else 1 - (value_num / 100) + quantile(data$distance, probs = prob, na.rm = TRUE) + } + + # Get the truncation values + if(is.list(truncation)){ + + # Left truncation + if(is.numeric(truncation$left)){ + left <- truncation$left + } else if(is.character(truncation$left)){ + left <- percent_trunc(truncation$left, "left") + } else stop("Left truncation must be a number or percentage string.") + + # Right truncation + if(is.numeric(truncation$right)){ + width <- truncation$right + } else if(is.character(truncation$right)){ + width <- percent_trunc(truncation$right, "right") + } else stop("Right truncation must be a number or percentage string.") + + #If it is not a list + } else if(is.numeric(truncation)) { + width <- truncation + left <- NULL + } else if (is.character(truncation)){ + width <- percent_trunc(truncation, "right") + left <- NULL + } else stop("Right truncation must be a number or percentage string.") + + # Now check that truncation is not greater than largest cutpoint if binned data if(!is.null(cutpoints)){ if(width > cutpoints[length(cutpoints)]){ warning(paste("Truncation width is greater than the largest bin distance, re-setting truncation to be largest cutpoint value: ", cutpoints[length(cutpoints)], sep = ""), immediate. = TRUE, call. = FALSE) @@ -65,4 +63,4 @@ get_truncation <- function(truncation, cutpoints, data){ } list(left=left, width=width) -} +} \ No newline at end of file From 5c8fa7ae1f47b4d747a9024fee65d5b6010fc71b Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:22:32 +0100 Subject: [PATCH 4/8] Add warning if truncation character without % sign --- R/get_truncation.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/get_truncation.R b/R/get_truncation.R index f2c28a7..c35efd3 100644 --- a/R/get_truncation.R +++ b/R/get_truncation.R @@ -19,6 +19,10 @@ get_truncation <- function(truncation, cutpoints, data){ stop("Truncation cannot be supplied as a percentage with binned data.", call. = FALSE) } + # Check if any truncation value is a character but doesn't contains the "%" symbol + if(any(sapply(truncation, function(x){is.character(x) && !grepl("%", x)}))){ + warning("Truncation values supplied as characters will be interpreted as % truncation values.", call. = FALSE) + } # Helper function to convert percentage strings to quantiles percent_trunc <- function(value, side = c("left", "right")) { From 22934d049b8479f5362ee02c1a2990ef0e201f33 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:22:55 +0100 Subject: [PATCH 5/8] Update and add tests on truncation values --- tests/testthat/test_truncation.R | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test_truncation.R b/tests/testthat/test_truncation.R index 3e51977..8e5b746 100644 --- a/tests/testthat/test_truncation.R +++ b/tests/testthat/test_truncation.R @@ -20,32 +20,63 @@ test_that("Truncation errors are thrown correctly",{ # percentage truncation with bins expect_error(ds(egdata, truncation="10%", cutpoints=seq(1, max(egdata$distance), length.out=10)), - "Truncation cannot be supplied as a percentage with binned data") + "Truncation cannot be supplied as a percentage with binned data.") + + # wrong format - vector not list for 2 values + expect_error(ds(egdata, truncation=c(left = 0, right = "10")), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many left truncations expect_error(ds(egdata, truncation=list(left=c(0, 0), right=3.5)), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") + # too many right truncations expect_error(ds(egdata, truncation=list(left=0, right=c(3.5, 3.5))), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many characterleft truncations - expect_error(ds(egdata, truncation=list(left=c("0", "0"), right=3.5)), + expect_error(ds(egdata, truncation=list(left=c("0%", "0%"), right=3.5)), "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") # too many character right truncations - expect_error(ds(egdata, truncation=list(left=0, right=c("3.5", "3.5"))), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + expect_error(ds(egdata, truncation=list(left=0, right=c("3.5%", "3.5%"))), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # too many truncations - expect_error(ds(egdata, truncation=list(left=0, right="3.5", boop="3.5")), - "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\".") + expect_error(ds(egdata, truncation=list(left=0, right="3.5%", boop="3.5%")), + "Truncation must be supplied as a single number/string or a list with elements \"left\" and \"right\", each being a single value.") # left and cutpoints[1] don't match expect_error(ds(egdata, truncation=list(left=0, right=max(egdata$distance)), cutpoints=seq(1, max(egdata$distance), length.out=10)), "The first cutpoint must be 0 or the left truncation distance!") - # message("data already has distend and distbegin columns, removing them and appling binning as specified by cutpoints.") + # message("data already has distend and distbegin columns, removing them and applying binning as specified by cutpoints.") + + # percentage truncation with no percent signs + expect_warning(eg.1 <- ds(egdata, truncation=list(left = 0, right = "10")), + "Truncation values supplied as characters will be interpreted as % truncation values.") + + eg.2 <- ds(egdata, truncation=list(left = 0, right = "10%")) + + # Should be the same + expect_equal(eg.1$ddf, eg.2$ddf) + # Check its calculated the quantile + expect_equal(quantile(egdata$distance, probs = 0.9), eg.1$ddf$meta.data$width) + expect_equal(quantile(egdata$distance, probs = 0.9), eg.2$ddf$meta.data$width) + + expect_warning(eg.3 <- ds(egdata, truncation=list(left = "10", right = "10%")), + "Truncation values supplied as characters will be interpreted as % truncation values.") + # Check left truncation as a percentage + expect_equal(quantile(egdata$distance, probs = 0.1), eg.3$ddf$meta.data$left) + + expect_warning(ds(egdata, truncation="10"), + "Truncation values supplied as characters will be interpreted as % truncation values.") + + # Check integer and double work the same + eg.4 <- ds(egdata, truncation = 3L) + eg.5 <- ds(egdata, truncation = 3) + expect_equal(eg.4$ddf$fitted, eg.5$ddf$fitted) + expect_equal(eg.4$ddf$Nhat, eg.5$ddf$Nhat) }) From 8d75c0b60c6d278927efee826a94df09fea7b371 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:41:37 +0100 Subject: [PATCH 6/8] Fix test --- tests/testthat/test_truncation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_truncation.R b/tests/testthat/test_truncation.R index 8e5b746..0239919 100644 --- a/tests/testthat/test_truncation.R +++ b/tests/testthat/test_truncation.R @@ -59,7 +59,7 @@ test_that("Truncation errors are thrown correctly",{ eg.2 <- ds(egdata, truncation=list(left = 0, right = "10%")) # Should be the same - expect_equal(eg.1$ddf, eg.2$ddf) + expect_equal(eg.1$ddf$Nhat, eg.2$ddf$Nhat) # Check its calculated the quantile expect_equal(quantile(egdata$distance, probs = 0.9), eg.1$ddf$meta.data$width) expect_equal(quantile(egdata$distance, probs = 0.9), eg.2$ddf$meta.data$width) From 48b70d316803fd117a572379c624053162f314aa Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:41:41 +0100 Subject: [PATCH 7/8] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 0d8eef7..e615e02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * Users can alternatively pass a list of models to summarize_ds_models rather than passing them individually. (Issue #149) * Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175) * print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178) +* Issues a warning when truncation is provided as a character but without the explicit % sign. (Issue #166) Enhancements From 0ee1983d7b41db2d0ba1e784870e4721405358c7 Mon Sep 17 00:00:00 2001 From: Laura Marshall Date: Tue, 13 May 2025 12:41:50 +0100 Subject: [PATCH 8/8] Bump package version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 91bf559..816743e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling Horvitz-Thompson-like estimator) if survey area information is provided. See Miller et al. (2019) for more information on methods and for example analyses. -Version: 2.0.0.9010 +Version: 2.0.0.9011 URL: https://github.com/DistanceDevelopment/Distance/ BugReports: https://github.com/DistanceDevelopment/Distance/issues Language: en-GB