From 119badcfd19c1b327621b79d22ff6e4632b17b00 Mon Sep 17 00:00:00 2001
From: Laura Marshall
Date: Wed, 2 Jul 2025 00:21:26 +0100
Subject: [PATCH 1/7] Order the data when using dht2 and ER variance estimators
O1, S1 and S2
Reference Issue #174
---
DESCRIPTION | 2 +-
NEWS.md | 1 +
R/ER_var_f.R | 4 ++--
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index c0d583e..a010832 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
Horvitz-Thompson-like estimator) if survey area information is provided. See
Miller et al. (2019) for more information on
methods and for example analyses.
-Version: 2.0.0.9013
+Version: 2.0.0.9014
URL: https://github.com/DistanceDevelopment/Distance/
BugReports: https://github.com/DistanceDevelopment/Distance/issues
Language: en-GB
diff --git a/NEWS.md b/NEWS.md
index 29f9230..597d381 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,7 @@
* Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175)
* print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178)
* Issues a warning when truncation is provided as a character but without the explicit % sign. (Issue #166)
+* Fix sample ordering issue in dht2 for the S1, S2 and O1 encounter rate variance estimators. (Issue #174)
Enhancements
diff --git a/R/ER_var_f.R b/R/ER_var_f.R
index a295dce..f37faa6 100644
--- a/R/ER_var_f.R
+++ b/R/ER_var_f.R
@@ -24,8 +24,8 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){
}else{
# sort the data if we use O2/O3 estimators
- if(any(erdat$er_est %in% c("O2", "O3"))){
- warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE)
+ if(any(erdat$er_est %in% c("O1", "O2", "O3", "S1", "S2"))){
+ warning("Using one of O1, O2, O3, S1 or S2 encounter rate variance estimators, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE)
if(!is.numeric(erdat$Sample.Label)){
warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE)
}
From eeb5db70e87bbea5f11cdd8668a2c3d1b5ed9977 Mon Sep 17 00:00:00 2001
From: Laura Marshall
Date: Wed, 2 Jul 2025 00:29:08 +0100
Subject: [PATCH 2/7] Update Test
Reference #174
---
R/ER_var_f.R | 2 +-
tests/testthat/test_variance.R | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/R/ER_var_f.R b/R/ER_var_f.R
index f37faa6..1171f8e 100644
--- a/R/ER_var_f.R
+++ b/R/ER_var_f.R
@@ -25,7 +25,7 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){
# sort the data if we use O2/O3 estimators
if(any(erdat$er_est %in% c("O1", "O2", "O3", "S1", "S2"))){
- warning("Using one of O1, O2, O3, S1 or S2 encounter rate variance estimators, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE)
+ warning("Using one of O1, O2, O3, S1 or S2 encounter rate variance estimators, assuming that sorting on Sample.Label is meaningful.", immediate. = TRUE, call. = FALSE)
if(!is.numeric(erdat$Sample.Label)){
warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE)
}
diff --git a/tests/testthat/test_variance.R b/tests/testthat/test_variance.R
index bc895cd..55cba1b 100644
--- a/tests/testthat/test_variance.R
+++ b/tests/testthat/test_variance.R
@@ -46,7 +46,7 @@ test_that("variance 2",{
observations=unflat$obs.table,
strat_formula=~1, convert_units=cu,
er_est="O2"),
- "Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful")
+ "Using one of O1, O2, O3, S1 or S2 encounter rate variance estimators, assuming that sorting on Sample.Label is meaningful.")
lr <- Nhat_O2[nrow(Nhat_O2), , drop=FALSE]
expect_equal(lr$Abundance, 1022, tol=1e-1)
From d400d1d4ba3b2a9ddc3d895ebfeedb7083eae97d Mon Sep 17 00:00:00 2001
From: Laura Marshall
Date: Sat, 5 Jul 2025 02:23:00 +0100
Subject: [PATCH 3/7] Bump package version
---
DESCRIPTION | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index a010832..24139ec 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
Horvitz-Thompson-like estimator) if survey area information is provided. See
Miller et al. (2019) for more information on
methods and for example analyses.
-Version: 2.0.0.9014
+Version: 2.0.1
URL: https://github.com/DistanceDevelopment/Distance/
BugReports: https://github.com/DistanceDevelopment/Distance/issues
Language: en-GB
From 0e5be52edfceea936c779d5d2b7eec73096377a2 Mon Sep 17 00:00:00 2001
From: Laura Marshall
Date: Sat, 5 Jul 2025 02:33:39 +0100
Subject: [PATCH 4/7] Fix spelling errors
---
vignettes/web-only/cues/cuecounts-distill.Rmd | 2 +-
vignettes/web-only/points/pointtransects-distill.Rmd | 4 ++--
vignettes/web-only/strata/strata-distill.Rmd | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/vignettes/web-only/cues/cuecounts-distill.Rmd b/vignettes/web-only/cues/cuecounts-distill.Rmd
index f3f0008..ef1dd4f 100644
--- a/vignettes/web-only/cues/cuecounts-distill.Rmd
+++ b/vignettes/web-only/cues/cuecounts-distill.Rmd
@@ -144,7 +144,7 @@ Note the distinct lack of fit to the song data. This is because of many detecti
# Notes regarding the cue count estimates of Montrave winter wrens
-This vignette uses the function `dht2` because that function knows how to incorporate multipliers such as cue rates and propogate the uncertainty in cue rate into overall uncertainty in density and abundance. Because there is uncertainty coming not only from encounter rate variability and uncertainty in detection function parameters, but also from cue rate variability, the relative contribution of each source of uncertainty is tablated. This is the last table produced by printing the `wren.estimate` object. For the Montrave winter wren data, only 4% of the uncertainty in the density estimate is attributable to the detection function, 24% attributable to encounter rate variability and 71% attributable to between-individual variability in call rate.
+This vignette uses the function `dht2` because that function knows how to incorporate multipliers such as cue rates and propagate the uncertainty in cue rate into overall uncertainty in density and abundance. Because there is uncertainty coming not only from encounter rate variability and uncertainty in detection function parameters, but also from cue rate variability, the relative contribution of each source of uncertainty is tablated. This is the last table produced by printing the `wren.estimate` object. For the Montrave winter wren data, only 4% of the uncertainty in the density estimate is attributable to the detection function, 24% attributable to encounter rate variability and 71% attributable to between-individual variability in call rate.
This insight suggests that if this survey was to be repeated, exerting more effort in measuring between-individual variation in call rate would likely yield the most benefits in tightening the precision in density estimates.
diff --git a/vignettes/web-only/points/pointtransects-distill.Rmd b/vignettes/web-only/points/pointtransects-distill.Rmd
index 01e9ce0..28e5b9f 100644
--- a/vignettes/web-only/points/pointtransects-distill.Rmd
+++ b/vignettes/web-only/points/pointtransects-distill.Rmd
@@ -138,7 +138,7 @@ On calling the `ds` function, information is provided to the screen reminding th
summary(sasp.hn)
```
-Visually inspect the fitted detection function with the `plot()` function, specifying the cutpoints histogram with argument `breaks`. Add the argument `pdf` so the plot shows the probability densiy function rather than the detection function. The probability density function is preferred for assessing model fit because the PDF incorporates information about the availability of animals to be detected. There are few animals available to be detected at small distances, therefore lack of fit at small distances is not as consequential for points as it is for lines (Figure \@ref(fig:modelfit)).
+Visually inspect the fitted detection function with the `plot()` function, specifying the cutpoints histogram with argument `breaks`. Add the argument `pdf` so the plot shows the probability density function rather than the detection function. The probability density function is preferred for assessing model fit because the PDF incorporates information about the availability of animals to be detected. There are few animals available to be detected at small distances, therefore lack of fit at small distances is not as consequential for points as it is for lines (Figure \@ref(fig:modelfit)).
```{r, modelfit, fig.dim=c(7,5), fig.cap="Fit of half normal detection function to savannah sparrow data."}
cutpoints <- c(0,5,10,15,20,30,40,max(Savannah_sparrow_1980$distance, na.rm=TRUE))
@@ -206,6 +206,6 @@ Key differences between analysis of line transect data and point transect data
- argument `transect` in `ds()` must be set to `"point"`,
- histogram of radial detection distances is characteristically "humped" because few individuals are available to be detected near the points,
- because of the hump shape (Figure \@ref(fig:basichist)), plotting to assess fit of data to detection distribution usually assessed via `pdf=TRUE` argument added to `plot()` function,
-- for the Arapaho National Refuge Savannah sparrow data, the three candidate models all provide adequeate fit to the data and produce comparable estimates of $P_a$.
+- for the Arapaho National Refuge Savannah sparrow data, the three candidate models all provide adequate fit to the data and produce comparable estimates of $P_a$.
# References
\ No newline at end of file
diff --git a/vignettes/web-only/strata/strata-distill.Rmd b/vignettes/web-only/strata/strata-distill.Rmd
index feb9904..39cb1f9 100644
--- a/vignettes/web-only/strata/strata-distill.Rmd
+++ b/vignettes/web-only/strata/strata-distill.Rmd
@@ -137,7 +137,7 @@ Further exploration of analyses involving stratification can be found in the [ex
# Comments
-Note there is a difference of `r round(model.pooled.AIC$AIC - model.separate.AIC)` AIC units between the model using stratum-specific detection functions and the model using a pooled detection function, with the stratum-specific detection function model being preferrable. To be thorough, absolute goodness of fit for the three stratum-specific detection functions is checked, and all models fit the data adequately.
+Note there is a difference of `r round(model.pooled.AIC$AIC - model.separate.AIC)` AIC units between the model using stratum-specific detection functions and the model using a pooled detection function, with the stratum-specific detection function model being preferable. To be thorough, absolute goodness of fit for the three stratum-specific detection functions is checked, and all models fit the data adequately.
This vignette focuses upon use of stratum-specific detection functions as a model selection exercise. Consequently, the vignette does not examine stratum-specific abundance or density estimates. That output is not included in this example analysis, but can easily be produced by continuing the analysis begun in this example.
From 82ec41f20d8b0efe76f61dd6fb003594e6c4da21 Mon Sep 17 00:00:00 2001
From: Laura Marshall
Date: Sat, 5 Jul 2025 03:11:07 +0100
Subject: [PATCH 5/7] pkgdown build
---
docs/404.html | 2 +-
docs/LICENSE-text.html | 2 +-
docs/articles/index.html | 2 +-
.../web-only/cues/cuecounts-distill.html | 13 +++++------
.../figure-html/fit-1.png | Bin 6643 -> 6643 bytes
.../figure-html/gof-1.png | Bin 4503 -> 4510 bytes
.../figure-html/hist-1.png | Bin 5211 -> 5213 bytes
.../points/pointtransects-distill.html | 8 +++----
.../web-only/strata/strata-distill.html | 7 +++---
docs/authors.html | 2 +-
docs/index.html | 2 +-
docs/pkgdown.yml | 6 ++---
docs/reference/AIC.dsmodel.html | 2 +-
docs/reference/ClusterExercise.html | 2 +-
docs/reference/CueCountingExample.html | 2 +-
docs/reference/Distance-package.html | 2 +-
docs/reference/DuikerCameraTraps.html | 2 +-
docs/reference/ETP_Dolphin.html | 2 +-
docs/reference/LTExercise.html | 2 +-
docs/reference/PTExercise.html | 2 +-
docs/reference/QAIC.html | 2 +-
docs/reference/Savannah_sparrow_1980.html | 2 +-
docs/reference/Stratify_example.html | 2 +-
docs/reference/Systematic_variance_1.html | 2 +-
docs/reference/add_df_covar_line.html | 2 +-
docs/reference/amakihi.html | 2 +-
docs/reference/bootdht.html | 2 +-
docs/reference/bootdht_Dhat_summarize.html | 2 +-
docs/reference/bootdht_Nhat_summarize.html | 2 +-
docs/reference/capercaillie.html | 2 +-
docs/reference/checkdata.html | 2 +-
docs/reference/convert_units.html | 2 +-
docs/reference/create.bins.html | 2 +-
docs/reference/create_bins.html | 2 +-
docs/reference/dht2.html | 2 +-
docs/reference/ds.gof.html | 2 +-
docs/reference/ds.html | 21 +++++++++---------
docs/reference/ducknest.html | 2 +-
docs/reference/dummy_ddf.html | 2 +-
docs/reference/flatfile.html | 2 +-
docs/reference/gof_ds.html | 2 +-
docs/reference/golftees.html | 2 +-
docs/reference/index.html | 2 +-
docs/reference/logLik.dsmodel.html | 2 +-
docs/reference/make_activity_fn.html | 2 +-
docs/reference/minke.html | 2 +-
docs/reference/p_dist_table.html | 2 +-
docs/reference/plot.dsmodel.html | 2 +-
docs/reference/predict.dsmodel.html | 2 +-
docs/reference/predict.fake_ddf.html | 2 +-
docs/reference/print.dht_result.html | 2 +-
docs/reference/print.dsmodel.html | 2 +-
docs/reference/print.summary.dsmodel.html | 2 +-
docs/reference/sikadeer.html | 2 +-
docs/reference/summarize_ds_models.html | 2 +-
docs/reference/summary.dht_bootstrap.html | 2 +-
docs/reference/summary.dsmodel.html | 2 +-
docs/reference/unflatten.html | 2 +-
docs/reference/unimak.html | 2 +-
docs/reference/units_table.html | 2 +-
docs/reference/wren.html | 2 +-
61 files changed, 79 insertions(+), 82 deletions(-)
diff --git a/docs/404.html b/docs/404.html
index ea24eec..11cfd02 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -28,7 +28,7 @@
Distance
- 2.0.0.9010
+ 2.0.1
-
+
Figure 1: Montrave study area; white circles are point count stations.
This vignette uses the function dht2 because that function knows how to incorporate multipliers such as cue rates and propogate the uncertainty in cue rate into overall uncertainty in density and abundance. Because there is uncertainty coming not only from encounter rate variability and uncertainty in detection function parameters, but also from cue rate variability, the relative contribution of each source of uncertainty is tablated. This is the last table produced by printing the wren.estimate object. For the Montrave winter wren data, only 4% of the uncertainty in the density estimate is attributable to the detection function, 24% attributable to encounter rate variability and 71% attributable to between-individual variability in call rate.
+
This vignette uses the function dht2 because that function knows how to incorporate multipliers such as cue rates and propagate the uncertainty in cue rate into overall uncertainty in density and abundance. Because there is uncertainty coming not only from encounter rate variability and uncertainty in detection function parameters, but also from cue rate variability, the relative contribution of each source of uncertainty is tablated. This is the last table produced by printing the wren.estimate object. For the Montrave winter wren data, only 4% of the uncertainty in the density estimate is attributable to the detection function, 24% attributable to encounter rate variability and 71% attributable to between-individual variability in call rate.
This insight suggests that if this survey was to be repeated, exerting more effort in measuring between-individual variation in call rate would likely yield the most benefits in tightening the precision in density estimates.
Also note the poor fit of the model to the data; the P-value for the Cramer von-Mises test is <<0.05. This is caused by over-dispersion in the distribution of detected call distances. A single individual may sit on a tree branch and emit many song bursts, leading to a jagged distribution of call distances that is not well fitted by a smooth detection function. That over-dispersion will not bias the density estimates.
Fitting a simple dete
## Density:## Label Estimate se cv lcl ucl df## 1 Total 2.674253 0.2625745 0.09818612 2.206266 3.241509 598.5905
-
Visually inspect the fitted detection function with the plot() function, specifying the cutpoints histogram with argument breaks. Add the argument pdf so the plot shows the probability densiy function rather than the detection function. The probability density function is preferred for assessing model fit because the PDF incorporates information about the availability of animals to be detected. There are few animals available to be detected at small distances, therefore lack of fit at small distances is not as consequential for points as it is for lines (Figure 3).
+
Visually inspect the fitted detection function with the plot() function, specifying the cutpoints histogram with argument breaks. Add the argument pdf so the plot shows the probability density function rather than the detection function. The probability density function is preferred for assessing model fit because the PDF incorporates information about the availability of animals to be detected. There are few animals available to be detected at small distances, therefore lack of fit at small distances is not as consequential for points as it is for lines (Figure 3).
cutpoints<-c(0,5,10,15,20,30,40,max(Savannah_sparrow_1980$distance, na.rm=TRUE))plot(sasp.hn, breaks=cutpoints, pdf=TRUE, main="Savannah sparrow point transect data.")
histogram of radial detection distances is characteristically “humped” because few individuals are available to be detected near the points,
because of the hump shape (Figure 2), plotting to assess fit of data to detection distribution usually assessed via pdf=TRUE argument added to plot() function,
-
for the Arapaho National Refuge Savannah sparrow data, the three candidate models all provide adequeate fit to the data and produce comparable estimates of \(P_a\).
+
for the Arapaho National Refuge Savannah sparrow data, the three candidate models all provide adequate fit to the data and produce comparable estimates of \(P_a\).
Note there is a difference of 14 AIC units between the model using stratum-specific detection functions and the model using a pooled detection function, with the stratum-specific detection function model being preferrable. To be thorough, absolute goodness of fit for the three stratum-specific detection functions is checked, and all models fit the data adequately.
+
Note there is a difference of 14 AIC units between the model using stratum-specific detection functions and the model using a pooled detection function, with the stratum-specific detection function model being preferable. To be thorough, absolute goodness of fit for the three stratum-specific detection functions is checked, and all models fit the data adequately.
This vignette focuses upon use of stratum-specific detection functions as a model selection exercise. Consequently, the vignette does not examine stratum-specific abundance or density estimates. That output is not included in this example analysis, but can easily be produced by continuing the analysis begun in this example.
either truncation distance (numeric, e.g. 5) or percentage
-(as a string, e.g. "15%"). Can be supplied as a list with elements left
-and right if left truncation is required (e.g. list(left=1,right=20) or
-list(left="1%",right="15%") or even list(left="1",right="15%")). By
+(as a string, e.g. "15%","15"). Can be supplied as a list with elements
+left and right if left truncation is required (e.g. list(left=1,right=20)
+or list(left="1%",right="15%") or even list(left="1",right="15%")). By
default for exact distances the maximum observed distance is used as the
right truncation. When the data is binned, the right truncation is the
largest bin end point. Default left truncation is set to zero.