From dce95a13c8257c5cd3e9c80969258eb6bb615d0c Mon Sep 17 00:00:00 2001 From: VisruthSK Date: Wed, 17 Jun 2026 15:47:56 -0700 Subject: [PATCH] Added sorted log ratio path for loo --- NAMESPACE | 1 + R/pareto_smooth.R | 109 ++++++++++++++++++++-------- man/ps_tail_sorted_log_ratios.Rd | 25 +++++++ tests/testthat/test-pareto_smooth.R | 14 ---- 4 files changed, 105 insertions(+), 44 deletions(-) create mode 100644 man/ps_tail_sorted_log_ratios.Rd diff --git a/NAMESPACE b/NAMESPACE index 216cf1ce..f72712e6 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -493,6 +493,7 @@ export(ps_khat_threshold) export(ps_min_ss) export(ps_tail) export(ps_tail_length) +export(ps_tail_sorted_log_ratios) export(qgeneralized_pareto) export(quantile2) export(r_scale) diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R index 7de50fa9..52dacc86 100644 --- a/R/pareto_smooth.R +++ b/R/pareto_smooth.R @@ -449,7 +449,6 @@ pareto_convergence_rate.rvar <- function(x, ...) { ps_convergence_rate(k, ndraws(x)) } - #' Pareto smooth tail #' function to Pareto smooth the tail of a vector. Exported #' for usage in other packages, not by users. @@ -481,14 +480,6 @@ ps_tail <- function(x, ... ) { - if (ndraws_tail < 5) { - warning_no_call( - "Can't fit generalized Pareto distribution ", - "because ndraws_tail is less than 5." - ) - return(list(x = x, k = NA)) - } - if (are_log_weights) { # shift log values for safe exponentiation x <- x - max(x) @@ -518,38 +509,96 @@ ps_tail <- function(x, cutoff <- cutoff - .Machine$double.eps } - max_tail <- max(draws_tail) - if (are_log_weights) { - draws_tail <- exp(draws_tail) - cutoff <- exp(cutoff) - } - fit <- gpdfit(draws_tail - cutoff, sort_x = FALSE, ...) - k <- fit$k - sigma <- fit$sigma - if (is.finite(k) && smooth_draws) { - p <- (seq_len(ndraws_tail) - 0.5) / ndraws_tail - smoothed <- qgeneralized_pareto(p = p, mu = cutoff, k = k, sigma = sigma) - if (are_log_weights) { - smoothed <- log(smoothed) - } + smoothed <- .ps_tail_sorted( + x = exp(draws_tail), + cutoff = exp(cutoff), + smooth_draws = smooth_draws, + ... + ) + smoothed$tail <- log(smoothed$tail) } else { - smoothed <- NULL + smoothed <- .ps_tail_sorted( + x = draws_tail, + cutoff = cutoff, + smooth_draws = smooth_draws, + ... + ) } - # truncate at max of raw draws - if (!is.null(smoothed)) { - smoothed[smoothed > max_tail] <- max_tail - x[ord$ix[tail_ids]] <- smoothed + k <- smoothed$k + + if (is.finite(k) && smooth_draws) { + x[ord$ix[tail_ids]] <- smoothed$tail } if (tail == "left") { x <- -x } - out <- list(x = x, k = k) + list(x = x, k = k) +} - return(out) +#' Pareto smooth sorted tail of log ratios +#' +#' Function to Pareto smooth a sorted tail of log ratios. Exported +#' for usage in other packages, not by users. +#' +#' @param x Vector of tail elements already sorted in ascending order. Already shifted to account for overflow. +#' @param cutoff Largest value smaller than the tail values. +#' @inheritParams ps_tail +#' @seealso [`ps_tail`] for the general tail smoothing function. +#' +#' @export +ps_tail_sorted_log_ratios <- function(x, cutoff, smooth_draws = TRUE, ...) { + if (is_constant(x)) { + return(list(tail = x, k = NA)) + } + + smoothed <- .ps_tail_sorted( + x = exp(x), + cutoff = exp(cutoff), + smooth_draws = smooth_draws, + ... + ) + + smoothed$tail <- log(smoothed$tail) + + smoothed +} + +#' Pareto smooth sorted tail +#' +#' Internal function to Pareto smooth a sorted tail. +#' +#' @param x Vector of tail elements already sorted in ascending order. +#' @param cutoff Largest value smaller than the tail values. +#' @param smooth_draws Should the tail values be smoothed? +#' @param ... Passed to [gpdfit()]. +#' +#' @noRd +.ps_tail_sorted <- function(x, cutoff, smooth_draws = TRUE, ...) { + len <- length(x) + + fit <- gpdfit(x - cutoff, sort_x = FALSE, ...) + k <- fit$k + sigma <- fit$sigma + + if (is.finite(k) && smooth_draws) { + tail <- qgeneralized_pareto( + p = (seq_len(len) - 0.5) / len, + mu = cutoff, + k = k, + sigma = sigma + ) + } else { + tail <- x + } + + # truncate at max of raw draws + tail[tail > max(x)] <- max(x) + + list(tail = tail, k = k) } #' Extra Pareto-k diagnostics diff --git a/man/ps_tail_sorted_log_ratios.Rd b/man/ps_tail_sorted_log_ratios.Rd new file mode 100644 index 00000000..caf7952d --- /dev/null +++ b/man/ps_tail_sorted_log_ratios.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pareto_smooth.R +\name{ps_tail_sorted_log_ratios} +\alias{ps_tail_sorted_log_ratios} +\title{Pareto smooth sorted tail of log ratios} +\usage{ +ps_tail_sorted_log_ratios(x, cutoff, smooth_draws = TRUE, ...) +} +\arguments{ +\item{x}{Vector of tail elements already sorted in ascending order. Already shifted to account for overflow.} + +\item{cutoff}{Largest value smaller than the tail values.} + +\item{smooth_draws}{(logical) Should the tails be smoothed? Default is +\code{TRUE}. If \code{FALSE}, \code{k} will be calculated but \code{x} will remain untouched.} + +\item{...}{Arguments passed to individual methods (if applicable).} +} +\description{ +Function to Pareto smooth a sorted tail of log ratios. Exported +for usage in other packages, not by users. +} +\seealso{ +\code{\link{ps_tail}} for the general tail smoothing function. +} diff --git a/tests/testthat/test-pareto_smooth.R b/tests/testthat/test-pareto_smooth.R index b4203097..ec53adb8 100644 --- a/tests/testthat/test-pareto_smooth.R +++ b/tests/testthat/test-pareto_smooth.R @@ -249,20 +249,6 @@ test_that("pareto_smooth works for log_weights", { }) -test_that("check ps_tail behavior for ndraws_tail less than 5", { - w <- c(1:25, 1e3, 1e3, 1e3) - lw <- log(w) - - # prints correct warning - expect_warning( - tail <- ps_tail(lw, ndraws_tail = 4, tail = "right"), - "Can't fit generalized Pareto distribution because ndraws_tail is less than 5." - ) - # output has expected shape and k = NA - expect_equal(names(tail), c("x", "k")) - expect_true(is.na(tail$k)) -}) - test_that("check ps_tail behavior for constant draws_tail", { x <- log(replicate(10, 0.3))