stan-dev · vinniott · Mar 17, 2026 · Mar 17, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -154,6 +154,7 @@ export(psislw)
 export(relative_eff)
 export(scrps)
 export(sis)
+export(srs_diff_est)
 export(stacking_weights)
 export(tis)
 export(waic)

diff --git a/NEWS.md b/NEWS.md
@@ -29,6 +29,7 @@
 * Added contribution section. by @VisruthSK in #286
 * Update LOO uncertainty paper to use BA doi by @avehtari in #311
 * Update documentation for `E_loo()` function by @avehtari in #312
+* Export `srs_diff_est()` function by @vinniott and @avehtari in #340
 
 
 # loo 2.8.0

diff --git a/R/loo_subsample.R b/R/loo_subsample.R
@@ -24,7 +24,7 @@
 #'   same length containing the posterior density and the approximation density
 #'   for the individual draws.
 #'
-#' @seealso [loo()], [psis()], [loo_compare()]
+#' @seealso [loo()], [psis()], [loo_compare()], [srs_diff_est()]
 #' @template loo-large-data-references
 #'
 #' @export loo_subsample loo_subsample.function
@@ -1166,12 +1166,86 @@ loo_subsample_estimation_diff_srs <- function(x) {
   update_psis_loo_ss_estimates(x)
 }
 
-#' Difference estimation using SRS-WOR sampling (Magnusson et al., 2020)
-#' @noRd
-#' @param y_approx Approximated values of all observations.
-#' @param y The values observed.
-#' @param y_idx The index of `y` in `y_approx`.
-#' @return A list with estimates.
+#' Difference estimator with simple random sampling without replacement.
+#'
+#' The difference estimator `srs_diff_est()` estimates
+#' the expectation \eqn{n E[y]} when we have \eqn{n} approximate values \eqn{\tilde{y}_i},
+#' \eqn{i = 1, \ldots, n} and \eqn{m < n} accurate values \eqn{y_j}, \eqn{j \in \mathcal{S}},
+#' where \eqn{m} is the subsample size and \eqn{\mathcal{S}} is
+#' a simple random subsample without replacement.  The original
+#' approach is by Cochran (1977) and we follow the equations 7--9 by
+#' Magnusson et al. (2020).
+#'
+#' @details In Magnusson et al. (2020) Eq (9) first row, the second `+` should
+#'   be a `-`; Supplementary Material Eq (6) has this correct.
+#'   As `srs_diff_est()` in the `loo` package is used for \eqn{n E[y]}, there is
+#'   a proportional difference of \eqn{1/n} compared to the paper.
+#'
+#' @param y_approx (numeric) `n` approximated values.
+#' @param y (numeric) `m<n` subsampled values.
+#' @param y_idx (integerish) The index of `y` in `y_approx`.
-#' @param y_idx (integerish) The index of `y` in `y_approx`.
+#' @param y_idx (integerish) The positions in `y_approx` corresponding to the subsampled values in `y`.
-#' @param y_idx (integerish) The index of `y` in `y_approx`.
+#' @param y_idx (integerish) The positions in `y_approx` corresponding to the subsampled values in `y`.
+#'
+#' @return A named list containing numeric values:
+#' * `y_hat`: estimated mean of \eqn{y} (Eq 7),
+#' * `v_y_hat`: variance of the mean estimate (Eq 8), and
+#' * `hat_v_y`: estimated variance of \eqn{y} (Eq 9).
+#'
+#' @references
+#' Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020).
+#' Leave-One-Out Cross-Validation for Model Comparison in Large Data.
+#' In _Proceedings of the 23rd International Conference on Artificial
+#' Intelligence and Statistics (AISTATS)_, PMLR 108:341-351.
+#'
+#' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley.
+#'
+#' Cortez, P., Cerdeira, A.L., Almeida, F., Matos, T., & Reis, J. (2009).
+#' Modeling wine preferences by data mining from physicochemical properties.
+#' _Decis. Support Syst._, _47_, 547-553.
+#'
+#' @seealso [loo_subsample()]
+#'
+#' @examples
+#' ## This example predicts wine quality (data from Cortez et al., 2009).
+#' \dontrun{
+#' library(brms)
+#' options(brms.backend = "cmdstanr")
+#' options(mc.cores = 4)
+#'
+#' wine_scaled <- read.delim(
+#'   "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",
+#'   sep = ";"
+#' ) |> unique() |> scale() |> as.data.frame()
+#'
+#' fitos <- brm(ordered(quality) ~ .,
+#'              family = cumulative("logit"),
+#'              prior = prior(R2D2(mean_R2 = 1/3, prec_R2 = 3)),
+#'              data = wine_scaled,
+#'              seed = 1,
+#'              silent = 2,
+#'              refresh = 0)
+#'
+#' log_lik_matrix <- log_lik(fitos)
+#'
+#' N <- nrow(wine_scaled)
+#' Nsub <- 100
+#'
+#' # posterior log-score
+#' lpd <- elpd(log_lik_matrix)
+#' sum(lpd$pointwise[,"elpd"])
+#'
+#' # Use PSIS-LOO for subsample of 100 randomly selected observations
+#' set.seed(1)
+#' idx <- sample(1:N, Nsub)
+#' elpd_loo_sub <- loo(log_lik_matrix[,idx])
+#' sum(elpd_loo_sub$pointwise[,"elpd_loo"]) / Nsub * N
+#'
+#' # Use difference estimator to combine fast result and subsampled accurate result
+#' srs_diff_est(lpd$pointwise[,"elpd"], elpd_loo_sub$pointwise[,"elpd_loo"], idx)
+#'
+#' # Comparison to using PSIS-LOO for all observations
+#' loo(log_lik_matrix)
+#' }
+#' @export
 srs_diff_est <- function(y_approx, y, y_idx) {
   checkmate::assert_numeric(y_approx)
   checkmate::assert_numeric(y, max.len = length(y_approx))

diff --git a/man/loo_subsample.Rd b/man/loo_subsample.Rd
diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd