diff --git a/.gitignore b/.gitignore
index 9c69942e..95150d9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ loo.Rproj
 # produced vignettes
 vignettes/loo2-lfo_cache/*
 vignettes/loo2-non-factorizable_cache/*
+vignettes/articles-online-only/data-for-vignettes/
+vignettes/articles-online-only/*.html
 vignettes/*.html
 vignettes/*.pdf
 inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index d64412da..775c5a02 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,6 +36,7 @@ Depends:
     R (>= 3.1.2)
 Imports:
     checkmate,
+    cli,
     matrixStats (>= 0.52),
     parallel,
     posterior (>= 1.7.0),
diff --git a/NAMESPACE b/NAMESPACE
index 3405d737..dc12a9df 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,15 +21,11 @@ S3method(as.psis_loo,psis_loo)
 S3method(as.psis_loo,psis_loo_ss)
 S3method(as.psis_loo_ss,psis_loo)
 S3method(as.psis_loo_ss,psis_loo_ss)
-S3method(crps,matrix)
-S3method(crps,numeric)
 S3method(dim,importance_sampling)
 S3method(dim,kfold)
 S3method(dim,loo)
 S3method(dim,psis_loo)
 S3method(dim,waic)
-S3method(elpd,array)
-S3method(elpd,matrix)
 S3method(importance_sampling,array)
 S3method(importance_sampling,default)
 S3method(importance_sampling,matrix)
@@ -41,11 +37,9 @@ S3method(loo_approximate_posterior,array)
 S3method(loo_approximate_posterior,matrix)
 S3method(loo_compare,default)
 S3method(loo_compare,psis_loo_ss_list)
-S3method(loo_crps,matrix)
 S3method(loo_model_weights,default)
 S3method(loo_moment_match,default)
 S3method(loo_predictive_metric,matrix)
-S3method(loo_scrps,matrix)
 S3method(loo_subsample,"function")
 S3method(nobs,psis_loo_ss)
 S3method(plot,loo)
@@ -57,8 +51,11 @@ S3method(print,compare.loo_ss)
 S3method(print,importance_sampling)
 S3method(print,importance_sampling_loo)
 S3method(print,kfold)
+S3method(print,kfold_pred_measure)
 S3method(print,loo)
+S3method(print,loo_pred_measure)
 S3method(print,pareto_k_table)
+S3method(print,pred_measure)
 S3method(print,pseudobma_bb_weights)
 S3method(print,pseudobma_weights)
 S3method(print,psis)
@@ -83,8 +80,6 @@ S3method(relative_eff,array)
 S3method(relative_eff,default)
 S3method(relative_eff,importance_sampling)
 S3method(relative_eff,matrix)
-S3method(scrps,matrix)
-S3method(scrps,numeric)
 S3method(sis,array)
 S3method(sis,default)
 S3method(sis,matrix)
@@ -100,14 +95,19 @@ export(.compute_point_estimate)
 export(.ndraws)
 export(.thin_draws)
 export(E_loo)
+export(acc)
+export(bacc)
+export(brier)
 export(compare)
-export(crps)
 export(elpd)
+export(elpd.array)
+export(elpd.matrix)
 export(example_loglik_array)
 export(example_loglik_matrix)
 export(extract_log_lik)
 export(find_model_names)
 export(gpdfit)
+export(insample_pred_measure)
 export(is.kfold)
 export(is.loo)
 export(is.psis)
@@ -116,6 +116,7 @@ export(is.sis)
 export(is.tis)
 export(is.waic)
 export(kfold)
+export(kfold_pred_measure)
 export(kfold_split_grouped)
 export(kfold_split_random)
 export(kfold_split_stratified)
@@ -128,17 +129,19 @@ export(loo_approximate_posterior.array)
 export(loo_approximate_posterior.function)
 export(loo_approximate_posterior.matrix)
 export(loo_compare)
-export(loo_crps)
 export(loo_i)
 export(loo_model_weights)
 export(loo_model_weights.default)
 export(loo_moment_match)
 export(loo_moment_match.default)
+export(loo_pred_measure)
 export(loo_predictive_metric)
-export(loo_scrps)
 export(loo_subsample)
 export(loo_subsample.function)
+export(mae)
 export(mcse_loo)
+export(mlpd)
+export(mse)
 export(nlist)
 export(obs_idx)
 export(pareto_k_ids)
@@ -146,15 +149,22 @@ export(pareto_k_influence_values)
 export(pareto_k_table)
 export(pareto_k_values)
 export(pointwise)
+export(pred_measure)
 export(print_dims)
 export(pseudobma_weights)
 export(psis)
 export(psis_n_eff_values)
 export(psislw)
+export(ptw_log_pred_density)
+export(r2)
 export(relative_eff)
-export(scrps)
+export(rmse)
+export(rps)
 export(sis)
+export(srps)
 export(stacking_weights)
+export(supported_measures_list)
+export(test_pred_measure)
 export(tis)
 export(waic)
 export(waic.array)
diff --git a/R/crps.R b/R/crps.R
deleted file mode 100644
index ff1c9e25..00000000
--- a/R/crps.R
+++ /dev/null
@@ -1,218 +0,0 @@
-#' Continuously ranked probability score
-#'
-#' The `crps()` and `scrps()` functions and their `loo_*()` counterparts can be
-#' used to compute the continuously ranked probability score (CRPS) and scaled
-#' CRPS (SCRPS) (as defined by Bolin and Wallin, 2023). CRPS is a proper scoring rule, and
-#' strictly proper when the first moment of the predictive distribution is
-#' finite. Both can be expressed in terms of samples form the predictive
-#' distribution. See, for example, a paper by Gneiting and Raftery (2007)
-#' for a comprehensive discussion on CRPS.
-#'
-#' To compute (S)CRPS, the user needs to provide two sets of draws, `x` and
-#' `x2`, from the predictive distribution. This is due to the fact that formulas
-#' used to compute CRPS involve an expectation of the absolute difference of `x`
-#' and `x2`, both having the same distribution. See the `permutations` argument,
-#' as well as Gneiting and Raftery (2007) for details.
-#'
-#' @export
-#' @param x A `S` by `N` matrix (draws by observations), or a vector of length
-#'   `S` when only single observation is provided in `y`.
-#' @param x2 Independent draws from the same distribution as draws in `x`.
-#'   Should be of the identical dimension.
-#' @param y A vector of observations or a single value.
-#' @param permutations An integer, with default value of 1,  specifying how many
-#'   times the expected value of  |X - X'| (`|x - x2|`) is computed. The row
-#'   order of `x2` is shuffled as elements `x` and `x2` are typically drawn
-#'   given the same values of parameters. This happens, e.g., when one calls
-#'   `posterior_predict()` twice for a fitted \pkg{rstanarm} or \pkg{brms}
-#'   model. Generating more permutations is expected to decrease the variance of
-#'   the computed expected value.
-#' @param ... Passed on to [E_loo()] in the `loo_*()` version of these
-#'   functions.
-#'
-#' @return A list containing two elements: `estimates` and `pointwise`.
-#'   The former reports estimator and standard error and latter the pointwise
-#'   values. Following Bolin & Wallin (2023), a larger value is better.
-#'
-#' @examples
-#' \dontrun{
-#' # An example using rstanarm
-#' library(rstanarm)
-#' data("kidiq")
-#' fit <- stan_glm(kid_score ~ mom_hs + mom_iq, data = kidiq)
-#' ypred1 <- posterior_predict(fit)
-#' ypred2 <- posterior_predict(fit)
-#' crps(ypred1, ypred2, y = fit$y)
-#' loo_crps(ypred1, ypred2, y = fit$y, log_lik = log_lik(fit))
-#' }
-#'
-#' @references
-#' Bolin, D., & Wallin, J. (2023). Local scale invariance and robustness of
-#' proper scoring rules. Statistical Science, 38(1):140-159.
-#'
-#' Gneiting, T., & Raftery, A. E. (2007). Strictly Proper Scoring Rules,
-#' Prediction, and Estimation. Journal of the American Statistical Association,
-#' 102(477), 359–378.
-crps <- function(x, ...) {
-  UseMethod("crps")
-}
-
-
-#' @rdname crps
-#' @export
-scrps <- function(x, ...) {
-  UseMethod("scrps")
-}
-
-#' @rdname crps
-#' @export
-loo_crps <- function(x, ...) {
-  UseMethod("loo_crps")
-}
-
-#' @rdname crps
-#' @export
-loo_scrps <- function(x, ...) {
-  UseMethod("loo_scrps")
-}
-
-
-#' @rdname crps
-#' @export
-crps.matrix <- function(x, x2, y, ..., permutations = 1) {
-  validate_crps_input(x, x2, y)
-  repeats <- replicate(permutations, EXX_compute(x, x2), simplify = F)
-  EXX <- Reduce(`+`, repeats) / permutations
-  EXy <- colMeans(abs(sweep(x, 2, y)))
-  crps_output(.crps_fun(EXX, EXy))
-}
-
-
-#' Method for a single data point
-#' @rdname crps
-#' @export
-crps.numeric <- function(x, x2, y, ..., permutations = 1) {
-  stopifnot(length(x) == length(x2),
-            length(y) == 1)
-  crps.matrix(as.matrix(x), as.matrix(x2), y, permutations)
-}
-
-
-#' @rdname crps
-#' @export
-#' @param log_lik A log-likelihood matrix the same size as `x`.
-#' @param r_eff An optional vector of relative effective sample size estimates
-#'   containing one element per observation. See [psis()] for details.
-#' @param cores The number of cores to use for parallelization of `[psis()]`.
-#'   See [psis()] for details.
-loo_crps.matrix <-
-  function(x,
-           x2,
-           y,
-           log_lik,
-           ...,
-           permutations = 1,
-           r_eff = 1,
-           cores = getOption("mc.cores", 1)) {
-  validate_crps_input(x, x2, y, log_lik)
-  repeats <- replicate(permutations,
-                       EXX_loo_compute(x, x2, log_lik, r_eff = r_eff, ...),
-                       simplify = F)
-  EXX <- Reduce(`+`, repeats) / permutations
-  psis_obj <- psis(-log_lik, r_eff = r_eff, cores = cores)
-  EXy <- E_loo(abs(sweep(x, 2, y)), psis_obj, log_ratios = -log_lik, ...)$value
-  crps_output(.crps_fun(EXX, EXy))
-}
-
-
-#' @rdname crps
-#' @export
-scrps.matrix <- function(x, x2, y, ..., permutations = 1) {
-  validate_crps_input(x, x2, y)
-  repeats <- replicate(permutations, EXX_compute(x, x2), simplify = F)
-  EXX <- Reduce(`+`, repeats) / permutations
-  EXy <- colMeans(abs(sweep(x, 2, y)))
-  crps_output(.crps_fun(EXX, EXy, scale = TRUE))
-}
-
-#' @rdname crps
-#' @export
-scrps.numeric <- function(x, x2, y, ..., permutations = 1) {
-  stopifnot(length(x) == length(x2),
-            length(y) == 1)
-  scrps.matrix(as.matrix(x), as.matrix(x2), y, permutations)
-}
-
-
-#' @rdname crps
-#' @export
-loo_scrps.matrix <-
-  function(
-    x,
-    x2,
-    y,
-    log_lik,
-    ...,
-    permutations = 1,
-    r_eff = 1,
-    cores = getOption("mc.cores", 1)) {
-  validate_crps_input(x, x2, y, log_lik)
-  repeats <- replicate(permutations,
-                       EXX_loo_compute(x, x2, log_lik, r_eff = r_eff, ...),
-                       simplify = F)
-  EXX <- Reduce(`+`, repeats) / permutations
-  psis_obj <- psis(-log_lik, r_eff = r_eff, cores = cores)
-  EXy <- E_loo(abs(sweep(x, 2, y)), psis_obj, log_ratios = -log_lik, ...)$value
-  crps_output(.crps_fun(EXX, EXy, scale = TRUE))
-}
-
-# ------------ Internals ----------------
-
-
-EXX_compute <- function(x, x2) {
-  S <- nrow(x)
-  colMeans(abs(x - x2[sample(1:S),]))
-}
-
-
-EXX_loo_compute <- function(x, x2, log_lik, r_eff = 1, ...) {
-  S <- nrow(x)
-  shuffle <- sample (1:S)
-  x2 <- x2[shuffle,]
-  log_lik2 <- log_lik[shuffle,]
-  psis_obj_joint <- psis(-log_lik - log_lik2 , r_eff = r_eff)
-  E_loo(abs(x - x2), psis_obj_joint, log_ratios = -log_lik - log_lik2, ...)$value
-}
-
-
-#' Function to compute crps and scrps
-#' @noRd
-.crps_fun <- function(EXX, EXy, scale = FALSE) {
-  if (scale) return(-EXy/EXX - 0.5 * log(EXX))
-  0.5 * EXX - EXy
-}
-
-#' Compute output data for crps functions
-#' @noRd
-crps_output <- function(crps_pw) {
-  n <- length(crps_pw)
-  out <- list()
-  out$estimates <- c(mean(crps_pw), sd(crps_pw) / sqrt(n))
-  names(out$estimates) <- c('Estimate', 'SE')
-  out$pointwise <- crps_pw
-  out
-}
-
-#' Validate input of CRPS functions
-#'
-#' Check that predictive draws and observed data are of compatible shape
-#' @noRd
-validate_crps_input <- function(x, x2, y, log_lik = NULL) {
-  stopifnot(is.numeric(x),
-            is.numeric(x2),
-            is.numeric(y),
-            identical(dim(x), dim(x2)),
-            ncol(x) == length(y),
-            ifelse(is.null(log_lik), TRUE, identical(dim(log_lik), dim(x)))
-            )
-}
diff --git a/R/elpd.R b/R/elpd.R
index 20724e71..a132ed71 100644
--- a/R/elpd.R
+++ b/R/elpd.R
@@ -4,7 +4,6 @@
 #' pointwise predictive density for a new dataset or the log pointwise
 #' predictive density of the observed data (an overestimate of the elpd).
 #'
-#' @export
 #' @param x A log-likelihood array or matrix. The **Methods (by class)**
 #'   section, below, has detailed descriptions of how to specify the inputs for
 #'   each method.
@@ -19,8 +18,8 @@
 #'
 #' @examples
 #' # Calculate the lpd of the observed data
-#' LLarr <- example_loglik_array()
-#' elpd(LLarr)
+#' # LLarr <- example_loglik_array()
+#' # elpd(LLarr)
 #'
 elpd <- function(x, ...) {
   UseMethod("elpd")
diff --git a/R/measures.R b/R/measures.R
new file mode 100644
index 00000000..9caab4d9
--- /dev/null
+++ b/R/measures.R
@@ -0,0 +1,821 @@
+## Density scores -------------------------------------------------------------
+
+#' Pointwise log predictive density (`lppd_i`)
+#'
+#' Computes pointwise log predictive density contributions from a matrix of
+#' log predictive densities/probabilities for posterior draws. When PSIS
+#' log-weights are supplied, they are used to form a weighted log-sum-exp per
+#' observation.
+#'
+#' @param ylp A numeric matrix of log predictive densities/probabilities with
+#'   dimensions draws x observations.
+#' @param psis_log_weights Optional numeric matrix of normalized PSIS log
+#'   weights with the same dimensions as `ylp`. Each column must sum to 1 on
+#'   the probability scale.
+#'
+#' @returns A numeric vector of length `ncol(ylp)` with pointwise log
+#'   predictive density values.
+#'
+#' @examples
+#' ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+#' ptw_log_pred_density(ylp)
+#'
+#' lw <- matrix(log(c(0.7, 0.3, 0.6, 0.4)), nrow = 2)
+#' ptw_log_pred_density(ylp, lw)
+#' @export
+ptw_log_pred_density <- function(ylp, psis_log_weights = NULL) {
+  .validate_numeric_matrix(ylp, arg = "ylp")
+  n_draws <- dim(ylp)[1]
+  
+  if (is.null(psis_log_weights)) {
+    return(matrixStats::colLogSumExps(ylp) - log(n_draws))
+  }
+  .validate_numeric_matrix(
+    psis_log_weights,
+    arg = "psis_log_weights",
+    nrow = n_draws,
+    ncol = ncol(ylp)
+  )
+  
+  col_sums <- colSums(exp(psis_log_weights))
+  if (!isTRUE(all.equal(col_sums, rep(1, ncol(psis_log_weights))))) {
+    cli::cli_abort(c(
+      "'psis_log_weights' must be normalized (column sums equal to 1).",
+      "i" = "Range of current column sums: [{min(col_sums)}, {max(col_sums)}]."
+    ))
+  }
+  
+  matrixStats::colLogSumExps(ylp + psis_log_weights)
+}
+
+#' Expected log pointwise predictive density (`elpd`)
+#'
+#' Computes ELPD as the sum of pointwise log predictive density contributions
+#' (lppd_i).
+#'
+#' @param ylp A numeric matrix of log predictive densities/probabilities with
+#'   dimensions draws x observations.
+#' @param log_weights Optional numeric matrix of unnormalized log-weights
+#'   with dimensions draws x observations.
+#' @param pointwise Optional numeric vector of precomputed pointwise
+#'   contributions. If provided, `ylp` and `log_weights` are ignored.
+#' @param revert_sign Logical; if `TRUE`, multiply the estimate and pointwise
+#'   values by -1 before returning.
+#'
+#' @returns A named list with elements `estimate` (sum over lppd_i), `se`
+#'   (standard error across lppd_i), and `pointwise` (lppd_i).
+#'
+#' @examples
+#' ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+#' elpd(ylp)
+#' @export
+elpd <- function(
+  ylp, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {  
+  if (!is.null(pointwise)) {
+    .validate_numeric_vector(pointwise, arg = "pointwise")
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(ylp = ylp, log_weights = log_weights),
+      fun_name = "elpd"
+    )
+    lppd_i <- pointwise
+  } else {
+    .validate_numeric_matrix(ylp, arg = "ylp")
+    if (!is.null(log_weights)) {
+      log_weights <- .normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(ylp),
+        n_obs = ncol(ylp)
+      )
+    }
+    lppd_i <- ptw_log_pred_density(ylp, log_weights)
+  }
+  
+  if (length(lppd_i) == 1L) {
+    cli::cli_warn("Only one pointwise value supplied; standard error is set to 0.")
+  }
+  
+  res <- list(
+    estimate = sum(lppd_i),
+    se = if (length(lppd_i) == 1L) 0 else sqrt(length(lppd_i) * var(lppd_i)),
+    pointwise = lppd_i
+  )
+  
+  .measure_output(res, revert_sign)
+}
+
+#' Mean log pointwise predictive density (`mlpd`)
+#'
+#' Computes MLPD as the average of pointwise log predictive density (lppd_i) 
+#' values. Inputs follow the same conventions as [elpd()].
+#'
+#' @param ylp A numeric matrix of log predictive densities/probabilities with
+#'   dimensions draws x observations.
+#' @param log_weights Optional numeric matrix of unnormalized log-weights
+#'   with dimensions draws x observations.
+#' @param pointwise Optional numeric vector of precomputed pointwise
+#'   contributions. If provided, `ylp` and `log_weights` are ignored.
+#' @param revert_sign Logical; if `TRUE`, multiply the estimate and pointwise
+#'   values by -1 before returning.
+#'
+#' @returns A named list with elements `estimate` (mean over lppd_i), `se`
+#'   (standard error), and `pointwise` (lppd_i).
+#'
+#' @examples
+#' ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+#' mlpd(ylp)
+#' @export
+mlpd <- function(
+  ylp, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  if (!is.null(pointwise)) {
+    .validate_numeric_vector(pointwise, arg = "pointwise")
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(ylp = ylp, log_weights = log_weights),
+      fun_name = "mlpd"
+    )
+    lppd_i <- pointwise
+  } else {
+    .validate_numeric_matrix(ylp, arg = "ylp")
+    if (!is.null(log_weights)) {
+      log_weights <- .normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(ylp),
+        n_obs = ncol(ylp)
+      )
+    }
+    lppd_i <- ptw_log_pred_density(ylp, log_weights)
+  }
+  
+  n_obs <- length(lppd_i)
+  if (n_obs == 1L) {
+    cli::cli_warn("Only one pointwise value supplied; standard error is set to 0.")
+  }
+  
+  res <- list(
+    estimate = sum(lppd_i) / n_obs,
+    se = if (n_obs == 1L) 0 else sqrt(n_obs * var(lppd_i)) / n_obs,
+    pointwise = lppd_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Classification accuracy (`acc`)
+#'
+#' Computes pointwise and average classification accuracy for binary or
+#' multiclass outcomes from posterior predictive class assignments. For binary
+#' outcomes, each draw is thresholded at 0.5. For multiclass outcomes, each
+#' draw is mapped to the most likely category via `which.max()`.
+#'
+#' @param y An integer vector of observed class labels.
+#' @param mupred A numeric array of posterior predictive means. For binary
+#'   outcomes use a draws x observations matrix. For multiclass outcomes use a
+#'   draws x observations x categories array.
+#' @param log_weights Optional numeric matrix of unnormalized log-weights
+#'   with dimensions draws x observations.
+#' @param pointwise Optional numeric vector of precomputed pointwise accuracy
+#'   contributions. If provided, `y`, `mupred`, and `log_weights` are ignored.
+#' @param revert_sign Logical; if `TRUE`, multiply the estimate and pointwise
+#'   values by -1 before returning.
+#'
+#' @returns A named list with elements `estimate` (mean accuracy), `se`
+#'   (standard error), and `pointwise` (pointwise accuracy).
+#'
+#' @examples
+#' y <- c(1L, 0L, 1L)
+#' mupred <- matrix(c(0.8, 0.3, 0.7, 0.6, 0.4, 0.9), nrow = 2)
+#' loo::acc(y, mupred)
+#' @export
+acc <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  if (!is.null(pointwise)) {
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(y = y, mupred = mupred, log_weights = log_weights),
+      fun_name = "acc"
+    )
+    acc_i <- pointwise
+  } else {
+    .validate_numeric_vector(y, arg = "y")
+    if (!is.numeric(mupred) || (length(dim(mupred)) != 2 && length(dim(mupred)) != 3)) {
+      cli::cli_abort("{.arg mupred} must be a numeric matrix or 3D numeric array.")
+    }
+    .validate_probs(mupred, arg = "mupred")
+    
+    if (!is.null(log_weights)) {
+      weights <- exp(.normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(mupred),
+        n_obs = dim(mupred)[2]
+      ))
+    } else {
+      weights <- rep(1 / nrow(mupred), nrow(mupred))
+    }
+    
+    if (length(dim(mupred)) == 3) {
+      # Multiclass: (draws × obs × categories) > argmax over categories
+      ypred <- apply(mupred, c(1, 2), which.max)
+    } else {
+      .validate_numeric_matrix(mupred, arg = "mupred")
+      # Binary: (draws × obs) > threshold at 0.5
+      ypred <- (mupred > 0.5) * 1L
+    }
+    acc_i <- colSums(sweep(ypred, 2, y, `==`) * weights)
+  }
+  
+  res <- list(
+    estimate = mean(acc_i),
+    se = sqrt(var(acc_i) / length(acc_i)),
+    pointwise = acc_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Balanced classification accuracy (`bacc`)
+#'
+#' Computes balanced accuracy by averaging class-specific mean accuracy, giving
+#' each observed class equal weight regardless of class frequency.
+#'
+#' @inheritParams acc
+#'
+#' @returns A named list with elements `estimate` (balanced accuracy), `se`
+#'   (standard error based on balanced pointwise contributions), and
+#'   `pointwise` (pointwise balanced accuracy terms).
+#'
+#' @examples
+#' y <- c(1L, 1L, 2L, 2L)
+#' mupred <- array(
+#'   c(0.8, 0.2, 0.7, 0.3, 0.3, 0.7, 0.2, 0.8),
+#'   dim = c(1, 4, 2)
+#' )
+#' bacc(y, mupred)
+#' @export
+bacc <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  if (!is.null(pointwise)) {
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(mupred = mupred, log_weights = log_weights),
+      fun_name = "bacc"
+    )
+    acc_i <- pointwise
+  } else {
+    .validate_numeric_vector(y, arg = "y")
+    classes <- sort(unique(y))
+    K <- length(classes)
+    if (K < 2) {
+      cli::cli_abort("{.fn bacc} requires at least two outcome classes.")
+    }
+    if (!is.numeric(mupred) || (length(dim(mupred)) != 2 && length(dim(mupred)) != 3)) {
+      cli::cli_abort("{.arg mupred} must be a numeric matrix or 3D numeric array.")
+    }
+    .validate_probs(mupred, arg = "mupred")
+    
+    if (!is.null(log_weights)) {
+      weights <- exp(.normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(mupred),
+        n_obs = dim(mupred)[2]
+      ))
+    } else {
+      weights <- rep(1 / nrow(mupred), nrow(mupred))
+    }
+    if (length(dim(mupred)) == 3) {
+      # Multiclass: (draws × obs × categories) > argmax over categories
+      ypred <- apply(mupred, c(1, 2), which.max)
+    } else {
+      .validate_numeric_matrix(mupred, arg = "mupred")
+      # Binary: (draws × obs) > threshold at 0.5
+      ypred <- (mupred > 0.5) * 1L
+    }
+    acc_i <- colSums(sweep(ypred, 2, y, `==`) * weights)
+  }
+  
+  acc_c <- vapply(classes, function(c) mean(acc_i[y == c]), numeric(1))
+  n_c <- tabulate(match(y, classes))
+  bacc_i <- acc_i / (K * n_c[match(y, classes)])
+  
+  res <- list(
+    estimate = mean(acc_c),
+    se = sqrt(var(bacc_i) / length(bacc_i)),
+    pointwise = acc_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Brier score (`brier`)
+#'
+#' Computes the Brier score for binary outcomes as squared error between the
+#' observed label and predicted event probability.
+#'
+#' @param y A numeric vector of binary outcomes coded as 0 or 1.
+#' @param ypred A numeric matrix of posterior predictive probabilities with
+#'   dimensions draws x observations.
+#' @param log_weights Optional numeric matrix of unnormalized log-weights
+#'   with dimensions draws x observations.
+#' @param pointwise Optional numeric vector of precomputed pointwise Brier
+#'   scores. If provided, `y`, `ypred`, and `log_weights` are ignored.
+#' @param revert_sign Logical; if `TRUE`, multiply the estimate and pointwise
+#'   values by -1 before returning.
+#'
+#' @returns A named list with elements `estimate` (mean Brier score), `se`
+#'   (standard error), and `pointwise` (pointwise brier score).
+#'
+#' @examples
+#' y <- c(1, 0, 1)
+#' ypred <- matrix(c(0.8, 0.2, 0.7, 0.9, 0.4, 0.6), nrow = 2)
+#' brier(y, ypred)
+#' @export
+brier <- function(
+  y, ypred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  if (!is.null(pointwise)) {
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(y = y, ypred = ypred, log_weights = log_weights),
+      fun_name = "brier"
+    )
+    bs_i <- pointwise
+  } else {
+    .validate_numeric_vector(y, arg = "y")
+    if (any(y != 0 & y != 1)) {
+      cli::cli_abort(c(
+        "The brier score expects binary data 'y'.",
+        "i" = "Observed range: [{min(y)}, {max(y)}]",
+        "x" = "All elements of {.arg y} must be 0 or 1."
+      ))
+    }
+    .validate_numeric_matrix(ypred, arg = "ypred", ncol = length(y))
+    .validate_probs(ypred, arg = "ypred")
+    if (!is.null(log_weights)) {
+      weights <- exp(.normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(ypred),
+        n_obs = ncol(ypred)
+      ))
+      prob_i <- colSums(ypred * weights)
+    } else {
+      prob_i <- colMeans(ypred)
+    }
+    bs_i <- (prob_i - y)^2  
+  }
+  
+  res <- list(
+    estimate = mean(bs_i),
+    se = sqrt(var(bs_i) / length(bs_i)),
+    pointwise = bs_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Mean absolute error (`mae`)
+#'
+#' Computes MAE between observed outcomes and posterior predictive point
+#' predictions. Point predictions are obtained by averaging `mupred` draws, or
+#' by PSIS-weighted averaging when `log_weights` is provided.
+#'
+#' @param y A numeric vector of observed outcomes.
+#' @param mupred A numeric matrix of posterior expected predictions with
+#'   dimensions draws x observations. A length-`n` vector is also accepted.
+#' @param log_weights Optional numeric matrix of unnormalized log-weights
+#'   with dimensions draws x observations.
+#' @param pointwise Optional numeric vector of precomputed pointwise absolute
+#'   errors. If provided, `y`, `mupred`, and `log_weights` are ignored.
+#' @param revert_sign Logical; if `TRUE`, multiply the estimate and pointwise
+#'   values by -1 before returning.
+#'
+#' @returns A named list with elements `estimate` (mean absolute error), `se`
+#'   (standard error), and `pointwise` (pointwise absolute errors).
+#'
+#' @examples
+#' y <- c(1, 2, 3)
+#' mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+#' mae(y, mupred)
+#' @export
+mae <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  if (!is.null(pointwise)) {
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(mupred = mupred, log_weights = log_weights),
+      fun_name = "mae"
+    )
+    mae_i <- pointwise
+  } else {
+    .validate_numeric_vector(y, arg = "y")
+    if (!is.null(mupred) && !is.matrix(mupred)){
+      .validate_numeric_vector(mupred, arg = "mupred", len = length(y))
+      cli::cli_inform(
+        "Coercing {.arg mupred} from vector to 1 x n matrix for {.fn mae}."
+      )
+      mupred <- matrix(mupred, nrow = 1, ncol = length(mupred))
+    }
+    .validate_numeric_matrix(mupred, arg = "mupred", ncol = length(y))
+    if (is.null(log_weights)) {
+      mae_i <- abs(y - colMeans(mupred))
+    } else {
+      weights <- exp(.normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(mupred),
+        n_obs = ncol(mupred)
+      ))
+      mae_i <- abs(y - colSums(weights * mupred))
+    }
+  }
+  
+  res <- list(
+    estimate = mean(mae_i),
+    se = sqrt(var(mae_i) / length(mae_i)),
+    pointwise = mae_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Mean squared error (`mse`)
+#'
+#' Computes MSE between observed outcomes and posterior predictive point
+#' predictions. Point predictions are obtained by averaging `mupred` draws, or
+#' by PSIS-weighted averaging when `log_weights` is provided.
+#'
+#' @inheritParams mae
+#'
+#' @returns A named list with elements `estimate` (mean squared error), `se`
+#'   (standard error), and `pointwise` (pointwise squared errors).
+#'
+#' @examples
+#' y <- c(1, 2, 3)
+#' mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+#' mse(y, mupred)
+#' @export
+mse <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {  
+  if (!is.null(pointwise)) {
+    .inform_ignored_inputs(
+      pointwise,
+      ignored_args = list(mupred = mupred, log_weights = log_weights),
+      fun_name = "mse"
+    )
+    sqe_i <- pointwise
+  } else {
+    .validate_numeric_vector(y, arg = "y")
+    if (!is.null(mupred) && !is.matrix(mupred)){
+      .validate_numeric_vector(mupred, arg = "mupred", len = length(y))
+      cli::cli_inform(
+        "Coercing {.arg mupred} from vector to 1 x n matrix for {.fn mse}."
+      )
+      mupred <- matrix(mupred, nrow = 1, ncol = length(mupred))
+    }
+    .validate_numeric_matrix(mupred, arg = "mupred", ncol = length(y))
+    if (is.null(log_weights)) {
+      sqe_i <- (y - colMeans(mupred))^2
+    } else {
+      weights <- exp(.normalize_and_validate_log_weights(
+        log_weights = log_weights,
+        n_draws = nrow(mupred),
+        n_obs = ncol(mupred)
+      ))
+      sqe_i <- (y - colSums(weights * mupred))^2
+    }
+  }
+
+  res <- list(
+    estimate = mean(sqe_i),
+    se = sqrt(var(sqe_i) / length(sqe_i)),
+    pointwise = sqe_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Root mean squared error (`rmse`)
+#'
+#' Computes RMSE as the square root of MSE and propagates uncertainty via a
+#' first-order delta-method approximation.
+#'
+#' @inheritParams mae
+#'
+#' @returns A named list with elements `estimate` (root mean squared error),
+#'   `se` (standard error), and `pointwise` (pointwise squared errors).
+#'
+#' @examples
+#' y <- c(1, 2, 3)
+#' mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+#' rmse(y, mupred)
+#' @export
+rmse <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  mse_res <- mse(
+    y = y, mupred = mupred, log_weights = log_weights, 
+    pointwise = pointwise
+  )
+  sqe_i <- mse_res$pointwise
+  rmse_est <- sqrt(mse_res$estimates[1])
+  if (rmse_est == 0) {
+    rmse_se <- 0
+  } else {
+    rmse_se <- mse_res$estimates[2] / (2 * rmse_est)
+  }
+
+  res <- list(
+    estimate = rmse_est,
+    se = rmse_se,
+    pointwise = sqe_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Predictive R-squared (`r2`)
+#'
+#' Computes predictive R-squared as one minus the ratio of prediction MSE to
+#' the empirical variance of `y`. The standard error is computed with a
+#' first-order delta-method approximation.
+#'
+#' @inheritParams mae
+#'
+#' @returns A named list with elements `estimate` (predictive R2), `se`
+#'   (standard error), and `pointwise` (pointwise squared errors).
+#'
+#' @examples
+#' y <- c(1, 2, 3)
+#' mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+#' r2(y, mupred)
+#' @export
+r2 <- function(
+  y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE
+) {
+  .validate_numeric_vector(y, arg = "y")
+  if (var(y) == 0) {
+    cli::cli_abort(
+      "{.fn r2} is undefined when {.arg y} has zero variance."
+    )
+  }
+  
+  mse_res <- mse(
+    y = y, 
+    mupred = mupred, 
+    log_weights = log_weights,
+    pointwise = pointwise
+  )
+  mse_hat <- mse_res$estimate
+  sqe_i <- mse_res$pointwise
+  n_obs <- length(sqe_i)
+  
+  mse_y_i <- (y - mean(y))^2
+  mse_y_hat <- mean(mse_y_i)
+   
+  var_mse_hat <- mse_res$se^2     
+  cov_mse_msey <- stats::cov(sqe_i, mse_y_i) / n_obs              
+  var_mse_y_hat <- var(mse_y_i) / n_obs 
+  
+  t1 <- var_mse_hat
+  t2 <- -2 * (mse_hat / mse_y_hat) * cov_mse_msey
+  t3 <- (mse_hat^2 / mse_y_hat^2) * var_mse_y_hat
+  se_r2 <- sqrt(t1 + t2 + t3) * (1 / mse_y_hat)
+  
+  est_r2 <- 1 - mse_hat / mse_y_hat
+  
+  res <- list(
+    estimate = est_r2,
+    se = se_r2,
+    pointwise = sqe_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Ranked Probability Score (RPS, SRPS, CRPS, SCRPS)
+#'
+#' Computes proper scoring rules based on the ranked probability score family,
+#' covering both discrete and continuous outcomes, with optional scaling.
+#' The specific score computed depends on the type of `y` and `ypred` and the
+#' value of `scaled`:
+#'
+#' | `y`/`ypred` type | `scaled = FALSE` | `scaled = TRUE` |
+#' |------------------|-----------------|-----------------|
+#' | Discrete         | RPS             | SRPS            |
+#' | Continuous       | CRPS            | SCRPS           |
+#'
+#' **Scoring rules:**
+#'
+#' - **RPS** (Epstein, 1969): Compares predictive and observed cumulative
+#'   distributions for ordered discrete outcomes.
+#' - **CRPS** (Matheson & Winkler, 1976; Gneiting & Raftery, 2007): Generalizes
+#'   RPS to continuous outcomes. Defined as
+#'   \deqn{\mathrm{CRPS}(X; y) = \frac{1}{2} E[|X - X'|] - E[|X - y|],}
+#'   where \eqn{X, X'} are independent draws from the predictive distribution.
+#' - **SRPS/SCRPS** (Bolin & Wallin, 2023): Scaled variants that are invariant
+#'   to the scale of the predictive distribution. Defined as
+#'   \deqn{\mathrm{SCRPS}(X; y) = -\frac{E[|X - y|]}{E[|X - X'|]} -
+#'     \frac{1}{2} \log E[|X - X'|].}
+#'
+#' **Estimation:**
+#'
+#' Scores are estimated using the probability-weighted moment (PWM) estimator
+#' (Taillardat et al., 2016; Zamo & Naveau, 2018), which requires only a single
+#' set of predictive draws unlike permutation-based estimators, which require
+#' two independent draw sets. The PWM estimator is unbiased and generally more
+#' accurate than single-permutation estimators. The same estimator is used for
+#' both discrete and continuous outcomes; see Hosking (1990, 1996) for
+#' theoretical justification in the discrete case.
+#'
+#' If log-weights (`log_weights`) are provided (e.g., PSIS weights
+#' for LOO cross-validation), a weighted PWM estimator is used instead, which
+#' accounts for the importance weights when estimating expectations.
+#'
+#' **Sign convention:**
+#'
+#' Unscaled scores are returned as utilities (higher is better), consistent 
+#' with the sign convention of log score / ELPD. Scaled scores are negatively
+#' oriented (lower is better). Use `revert_sign = TRUE` to obtain the
+#' loss convention (lower is better) used in some references.
+#'
+#' @param y A numeric vector of \eqn{n} observed outcomes. May be integer-valued
+#'   (for RPS/SRPS) or continuous (for CRPS/SCRPS).
+#' @param ypred A numeric matrix of posterior predictive draws with dimensions
+#'   \eqn{S \times n} (draws × observations).
+#' @param log_weights Optional numeric matrix of log-importance-
+#'   weights with dimensions \eqn{S \times n}. When provided, a weighted PWM
+#'   estimator is used. Useful for LOO cross-validation via importance sampling
+#'   (e.g., PSIS-LOO).
+#' @param scaled Logical; if `TRUE`, computes the scaled variant (SRPS for
+#'   discrete outcomes, SCRPS for continuous outcomes). Default is `FALSE`.
+#' @param revert_sign Logical; if `TRUE`, multiplies scores by \eqn{-1} to
+#'   return the loss convention (lower is better) rather than the utility
+#'   convention. Default is `FALSE`.
+#'
+#' @return A named list with:
+#'   \describe{
+#'     \item{`estimate`}{Mean score across all observations.}
+#'     \item{`se`}{Standard error of the mean score.}
+#'     \item{`pointwise`}{Numeric vector of length \eqn{n} with per-observation
+#'       scores.}
+#'   }
+#'
+#' @examples
+#' # Discrete outcomes: RPS
+#' y <- c(2L, 1L, 3L)
+#' ypred <- matrix(c(2, 1, 2, 3, 1, 3), nrow = 2)
+#' rps(y, ypred)
+#'
+#' # Discrete outcomes: SRPS (scaled)
+#' rps(y, ypred, scaled = TRUE)
+#'
+#' # Continuous outcomes: CRPS
+#' y_cont <- c(0.5, -1.2, 2.3)
+#' ypred_cont <- matrix(rnorm(200), nrow = 100, ncol = 3)
+#' rps(y_cont, ypred_cont)
+#'
+#' # With importance weights: LOO-CRPS
+#' log_weights <- matrix(rnorm(200), nrow = 100, ncol = 3)
+#' rps(y_cont, ypred_cont, log_weights = log_weights)
+#'
+#' @references
+#' Bolin, D. and Wallin, J. (2023). Local scale invariance and robustness of
+#' proper scoring rules. *Statistical Science*, 38(1):140–159.
+#'
+#' Epstein, E. S. (1969). A scoring system for probability forecasts of ranked
+#' categories. *Journal of Applied Meteorology*, 8(6):985–987.
+#'
+#' Gneiting, T. and Raftery, A. E. (2007). Strictly proper scoring rules,
+#' prediction, and estimation. *Journal of the American Statistical
+#' Association*, 102(477):359–378.
+#'
+#' Hosking, J. R. M. (1990). L-moments: Analysis and estimation of
+#' distributions using linear combinations of order statistics. *Journal of
+#' the Royal Statistical Society Series B*, 52(1):105–124.
+#'
+#' Hosking, J. R. M. (1996). Some theoretical results concerning L-moments.
+#' Research report RC 14492. IBM Thomas J. Watson Research Division.
+#'
+#' Matheson, J. E. and Winkler, R. L. (1976). Scoring rules for continuous
+#' probability distributions. *Management Science*, 22(10):1087–1096.
+#'
+#' Taillardat, M., Mestre, O., Zamo, M., and Naveau, P. (2016). Calibrated
+#' ensemble forecasts using quantile regression forests and ensemble model
+#' output statistics. *Monthly Weather Review*, 144(6):2375–2393.
+#'
+#' Zamo, M. and Naveau, P. (2018). Estimation of the continuous ranked
+#' probability score with limited information and applications to ensemble
+#' weather forecasts. *Mathematical Geosciences*, 50:209–234.
+#'
+#' @export
+rps <- function(y, ypred, log_weights = NULL, scaled = FALSE, revert_sign = FALSE) {
+  if (is.null(log_weights)) {
+    EXy <- colMeans(abs(sweep(ypred, 2, y)))
+    ypred_sorted <- apply(ypred, 2, sort)
+    S <- nrow(ypred)
+    EXX  <- colMeans(ypred_sorted * ((1:S) * (4 / (S - 1)) - 2))
+    if (scaled) {
+      # Scaled version by Bolin & Wallin (2023)
+      rps_i <- -EXy/EXX - 0.5 * log(EXX)
+    } else {
+      # Gneiting & Raftery (2007)
+      rps_i <- - 0.5 * EXX + EXy
+    }
+  } else {
+    S <- nrow(ypred)
+    n <- ncol(ypred)
+    w <- exp(.normalize_log_weights(log_weights))
+    w_csum <- sapply(1:n, function(j) {
+      perm <- order(ypred[,j])
+      result <- numeric(S)
+      result[perm] <- cumsum(w[perm, j])
+      result
+    })
+    
+    EXX <- 2 * colSums(ypred * w * (1 - (2*(1 - w_csum)) / (1 - w)))
+    EXy <- colSums(w * abs(sweep(ypred, 2, y)))
+    if (scaled) {
+      # Scaled version by Bolin & Wallin (2023)
+      rps_i <- -EXy/EXX - 0.5 * log(EXX)
+    } else {
+      # Gneiting & Raftery (2007)
+      rps_i <- EXy - 0.5 * EXX
+    }
+  }
+  res <- list(
+    estimate = mean(rps_i),
+    se = sqrt(var(rps_i) / length(rps_i)),
+    pointwise = rps_i
+  )
+  .measure_output(res, revert_sign)
+}
+
+#' Scaled Ranked Probability Score (SRPS, SCRPS)
+#'
+#' A convenience wrapper around [rps()] with `scaled = TRUE`. Computes the
+#' scaled ranked probability score (SRPS) for discrete outcomes or the scaled
+#' continuously ranked probability score (SCRPS) for continuous outcomes.
+#' Scaling makes the score invariant to the spread of the predictive
+#' distribution, which can be useful when comparing models across observations
+#' with very different predictive uncertainties.
+#'
+#' See [rps()] for full details on arguments, estimation, and references.
+#'
+#' @inheritParams rps
+#'
+#' @inherit rps return
+#'
+#' @examples
+#' y <- c(2L, 1L, 3L)
+#' ypred <- matrix(c(2, 1, 2, 3, 1, 3), nrow = 2)
+#' srps(y, ypred)
+#'
+#' @export
+srps <- function(y, ypred, log_weights = NULL, revert_sign = FALSE) {
+  rps(
+    y = y, ypred = ypred, log_weights = log_weights, 
+    scaled = TRUE, revert_sign = revert_sign
+  )
+}
+
+# measure overview -----------------------------
+#
+# Built-in measures are registered in `.measure_spec`. Users can also pass
+# custom functions to `measure` in the pred_measure family; those functions
+# must set `attr(fun, "measure_name")` and return `estimate`, `se`, and
+# `pointwise` (see `?insample_pred_measure`).
+
+# internal function to get the measure specification
+# @noRd
+# @param measure The measure used.
+# @return The measure specification.
+.measure_spec <- list(
+  elpd = elpd,
+  mlpd = mlpd,
+  mae = mae,
+  r2 = r2,
+  rmse = rmse,
+  mse = mse,
+  acc = acc,
+  bacc = bacc,
+  rps = rps,
+  srps = srps
+)
+
+#' Supported predictive measure names
+#'
+#' A character vector of measure names that can be passed to the `measure`
+#' argument of [insample_pred_measure()], [loo_pred_measure()],
+#' [kfold_pred_measure()], [test_pred_measure()], and [pred_measure()].
+#'
+#' @export
+supported_measures_list <- names(.measure_spec)
+
+# internal function that produces output format for measures
+.measure_output <- function(res, revert_sign) {
+  if (isTRUE(revert_sign)) {
+    res$estimate <- -res$estimate
+    res$pointwise <- -res$pointwise
+  }
+  out <- list()
+  out$estimates <- c(res$estimate, res$se)
+  names(out$estimates) <- c('Estimate', 'SE')
+  out$pointwise <- res$pointwise
+  out
+}
\ No newline at end of file
diff --git a/R/pred_measure.R b/R/pred_measure.R
new file mode 100644
index 00000000..44df8691
--- /dev/null
+++ b/R/pred_measure.R
@@ -0,0 +1,690 @@
+#' Shared parameters for predictive measure wrappers
+#'
+#' @description
+#' Parameter definitions shared by the user-facing entry points and the
+#' internal engine [pred_measure_engine()].
+#'
+#' @param y Vector of observed values (`n`). Required for distributional and
+#'   point-prediction measures such as `crps`, `mae`, and `acc`.
+#' @param ypred Matrix of posterior predictive draws (`S` draws × `n`
+#'   observations), typically from [brms::posterior_predict()]. Required for
+#'   distributional measures such as `crps`, `rps`, and `scrps`.
+#' @param mupred Matrix of posterior expected values (`S` × `n`), typically from
+#'   [brms::posterior_epred()]. Required for point-prediction measures such as
+#'   `mae`, `rmse`, `r2`, and `acc`.
+#' @param ylp Matrix of pointwise log predictive densities or probabilities
+#'   (`S` × `n`), typically from [brms::log_lik()]. Required for density-based
+#'   summaries (`elpd`, `mlpd`, `ic`).
+#' @param ylp_test Matrix of pointwise log predictive densities for holdout
+#'   observations (`S` × `n_test`), typically from
+#'   `brms::log_lik(fit, newdata = test_data)`. Used with `ylp` (from the
+#'   training fit) in [test_pred_measure()] to score genuinely new data.
+#' @param predperf An existing predictive measure object (class
+#'   `"pred_measure"`) to update. When supplied, base density summaries and
+#'   (for LOO) PSIS weights are reused instead of recomputed.
+#' @param measure Additional measures beyond the base summaries (`elpd` and
+#'   `ic`, which are always included). Can be:
+#'   \itemize{
+#'     \item A **character vector** of built-in names; see
+#'       [supported_measures_list()].
+#'     \item A **function** with attribute `"measure_name"` for one custom measure.
+#'     \item A **list** mixing character scalars (built-in names) and named
+#'       functions (custom measures), e.g. `list("rps", my_metric = my_fun)`.
+#'   }
+#'   Custom functions are called with any of `y`, `ypred`, `mupred`, `ylp`, and
+#'   `log_weights` that appear in their formals, plus arguments from `control`.
+#'   They must return a list with  `estimates` and `pointwise`.
+#' @param measure_name For a single custom function, set
+#'   `attr(my_fun, "measure_name") <- "my_metric"` before passing `my_fun` to
+#'   `measure`.
+#' @param group_ids Optional vector of group identifiers for grouped summaries
+#'   (reserved; not yet implemented).
+#' @param loo A [loo::loo()] result, computed with
+#'   `save_psis = TRUE` so that PSIS weights are available for additional
+#'   measures. See [loo_pred_measure()].
+#' @param kfold A `kfold` object from [brms::kfold()]. Supplies ELPD summaries
+#'   and fold structure for [kfold_pred_measure()]; pass `y`, `ypred`, and/or
+#'   `mupred` when requesting additional measures.
+#' @param psis_object A `psis` object with LOO importance weights. An
+#'   alternative to passing a full `loo` object; must be supplied together with
+#'   `ylp` when computing `elpd`.
+#' @param save_psis Logical. If `TRUE`, store the `psis` object in the result
+#'   so that additional measures can be added later with [pred_measure()] without
+#'   recomputing PSIS weights.
+#' @param control Named list of per-measure settings. Each name must match an
+#'   element of `measure`; the value is a list of arguments passed to that
+#'   measure's summary function (e.g. `list(new_measure = list(add_arg = 10))`).
+#' @param source Character string indicating the evaluation mode: `"insample"`,
+#'   `"loo"`, `"kfold"`, or `"test"`. Set automatically by the wrapper
+#'   functions; required when calling [pred_measure_engine()] directly.
+#'
+#' @keywords internal
+#' @name pred_measure_params
+pred_measure_engine <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  ylp_test = NULL,
+  measure = NULL,
+  predperf = NULL,
+  loo = NULL,
+  kfold = NULL,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  source = NULL,
+  control = list()
+) {
+  # input validation ---------------------------------------------------
+  checkmate::assert_list(control, types = "list", names = "named")
+  measures <- .prepare_measures(measure, predperf, supported_measures_list)
+  
+  if (source == "loo") {
+    if (is.null(predperf)) {
+      if (!is.null(loo) && is.null(loo$psis_object)) {
+        cli::cli_abort(c(
+          "No `psis_object` found in `loo` object. Did you run loo(..., save_psis = 'TRUE')."
+        ))
+      }
+      if (is.null(ylp) && !is.null(psis_object)) {
+        cli::cli_abort(c(
+          "For computation of `elpd` it is required to pass `ylp` besides `psis_object`."
+        ))
+      }
+    } else {
+      if (is.null(psis_object) && !is.null(predperf$psis_object)) {
+        cli::cli_inform("Using psis_object for LOO CV from `predperf`")
+        psis_object <- predperf$psis_object
+      }
+    }
+    checkmate::assert_null(ylp_test)
+  } else if (source == "insample") {
+    checkmate::assert_null(ylp_test)
+    checkmate::assert_null(psis_object)
+  } else if (source == "test") {
+    checkmate::assert_null(psis_object)
+  } else { # kfold
+    checkmate::assert_null(psis_object)
+  }
+
+  # core logic ---------------------------------------------------
+  if (source == "loo") {
+    psis_object <- .get_psis_object(
+      loo = loo,
+      predperf = predperf,
+      psis_object = psis_object,
+      ylp = ylp,
+      r_eff = 1
+    )
+  }
+  
+  base_measure <- .compute_base_measure(
+    ylp = ylp,
+    ylp_test = ylp_test,
+    loo = loo,
+    kfold = kfold,
+    predperf = predperf,
+    psis_object = psis_object,
+    source = source
+  )
+
+  estimates <- base_measure$estimates
+  pointwise <- base_measure$pointwise
+
+  log_weights <- if (!is.null(psis_object)) psis_object$log_weights else NULL
+
+  for (entry in measures) {
+    sel_measure <- .compute_measure(
+      y = y,
+      ypred = ypred,
+      mupred = mupred,
+      ylp = ylp,
+      measure_entry = entry,
+      log_weights = log_weights,
+      control = control
+    )
+    estimates <- .merge_matrix(
+      source = source,
+      mat = estimates,
+      name = entry$name,
+      values = .measure_estimate_se(sel_measure),
+      margin = 1
+    )
+    pointwise <- .merge_matrix(
+      source = source,
+      mat = pointwise,
+      name = entry$name,
+      values = sel_measure$pointwise,
+      margin = 2
+    )
+  }
+
+  predperf_res <- .build_pred_measure(
+    estimates = estimates,
+    pointwise = pointwise,
+    diagnostics = base_measure$diagnostics,
+    psis_object = psis_object,
+    save_psis = save_psis
+  )
+  
+  .add_attributes(
+    save_psis,
+    predperf_res, 
+    y, 
+    ypred, 
+    mupred, 
+    ylp,
+    ylp_test,
+    kfold, 
+    loo, 
+    predperf, 
+    source
+  )
+  }
+
+# internal helper functions ---------------------------------------------------
+
+#' Resolve or compute the PSIS object for LOO scoring
+#'
+#' @description
+#' Selects a PSIS object from the available inputs, or computes one from `ylp`
+#' when no precomputed weights are supplied (assuming `r_eff = 1`).
+#'
+#' Resolution order:
+#' \enumerate{
+#'   \item If both `psis_object` and `loo` are provided, return `psis_object`
+#'     after verifying it matches `loo$psis_object`.
+#'   \item Extract from `loo$psis_object` when `loo` is provided.
+#'   \item Use the supplied `psis_object`.
+#'   \item Reuse `predperf$psis_object` when accumulating measures.
+#'   \item Compute from `ylp` via [loo::psis()] on `-ylp` log ratios.
+#' }
+#'
+#' @param ylp Matrix of pointwise log predictive densities (`S` × `n`).
+#' @param loo Optional [loo::loo()] result containing a `psis_object`.
+#' @param predperf Optional existing measure object with a stored `psis_object`.
+#' @param psis_object Optional precomputed PSIS object.
+#' @param r_eff Relative effective sample size passed to [loo::psis()];
+#'   default `1`.
+#'
+#' @return A `psis` object with `log_weights` and `diagnostics`.
+#'
+#' @note See developer notes on computation of the `psis_object` for details.
+#' @noRd
+.get_psis_object <- function(
+  ylp,
+  loo,
+  predperf,
+  psis_object,
+  r_eff = 1
+) {
+  # psis_object + loo are both provided -> return psis_object
+  if (!is.null(psis_object) && !is.null(loo)) {
+    psis_equal_loo <- all(psis_object == loo$psis_object)
+    if (!psis_equal_loo) {
+      cli::cli_abort(
+        "Provided `psis_object` and `loo$psis_object` are not identical."
+      )
+    } 
+    return(psis_object)
+  # loo is provided
+  } else if (!is.null(loo)) {
+    return(loo$psis_object)
+  # psis_object is provided
+  } else if (!is.null(psis_object)) {
+    return(psis_object)
+  # predperf with psis_object is provided
+  } else if (!is.null(predperf$psis_object)) {
+    return(predperf$psis_object)
+  # ylp is provided
+  } else if (is.null(loo) && is.null(psis_object) && !is.null(ylp)) {
+    cli::cli_inform(
+      "Compute `psis_object` internally from `ylp` assuming `r_eff = 1`."
+    )
+    log_ratios <- -1 * ylp
+    return(psis(log_ratios, r_eff = r_eff))
+  # nothing is provided
+  } else {
+    cli::cli_abort(c(
+      "psis_object can not be computed, either of `psis_object`, `loo`, or",
+      "`ylp` needs to be provided."
+    ))
+  }}
+
+#' Compute a single predictive measure
+#'
+#' @description
+#' Dispatches one requested predictive measure to the appropriate summary
+#' function. The measure's `family` in `.measure_spec` determines which inputs
+#' are passed through:
+#'
+#' \describe{
+#'   \item{`metrics`}{`y` and `mupred` (e.g. MAE, RMSE, accuracy).}
+#'   \item{`rank_scores`}{`y` and `ypred` (e.g. RPS, CRPS).}
+#'   \item{`density_scores`}{`ylp` (e.g. ELPD, MLPD).}
+#' }
+#'
+#' @param y Vector of observed values (n).
+#' @param ypred Matrix of posterior predictive draws (S × n).
+#' @param mupred Matrix of posterior point predictions (S × n).
+#' @param ylp Matrix of pointwise log predictive densities (S × n).
+#' @param measure_entry A normalized measure entry with elements 
+#'   `name`, `type` (`"builtin"` or `"custom"`), and `key`.
+#' @param log_weights Matrix of log-weights (S × n), as returned by 
+#'   `.compute_log_weights()`.
+#' @param control Named list of per-measure settings passed from
+#'   [pred_measure()]; the active slice is `control[[measure_entry$name]]`.
+#'
+#' @return A named list with three elements:
+#'   \describe{
+#'     \item{`estimate`}{Scalar point estimate for the measure.}
+#'     \item{`se`}{Scalar standard error for the estimate.}
+#'     \item{`pointwise`}{Length-`n` vector of observation-level contributions.}
+#'   }
+#'
+#' Extract estimate and SE from a measure result
+#'
+#' Supports `estimate`/`se` (pred_measure functions) and `estimates` (CRPS).
+#'
+#' @noRd
+.measure_estimate_se <- function(res) {
+  if (!is.null(res$estimates)) {
+    return(res$estimates)
+  }
+  c(res$estimate, res$se)
+}
+
+#' @noRd
+.compute_measure <- function(
+    y,
+    ypred,
+    mupred,
+    ylp,
+    measure_entry,
+    log_weights,
+    control = list()
+) {
+  if (measure_entry$type == "builtin") {
+    measure_fun <- .measure_spec[[measure_entry$key]]
+    if (is.null(measure_fun)) {
+      cli::cli_abort("Unknown built-in measure {.val {measure_entry$key}}.")
+    }
+  } else {
+    measure_fun <- measure_entry$key
+  }
+
+  measure_control <- control[[measure_entry$name]]
+  if (is.null(measure_control)) {
+    measure_control <- list()
+  }
+
+  pool <- c(
+    list(
+      y = y,
+      ypred = ypred,
+      mupred = mupred,
+      ylp = ylp,
+      log_weights = log_weights
+    ),
+    measure_control
+  )
+  args <- pool[intersect(names(formals(measure_fun)), names(pool))]
+  res <- do.call(measure_fun, args)
+  if (measure_entry$type == "custom") {
+    n_obs <- .measure_n_obs(y, ypred, mupred, ylp)
+    res <- .validate_measure_result(res, measure_entry$name, n_obs = n_obs)
+  }
+  res
+}
+
+#' Compute base density summaries for a predictive measure object
+#'
+#' @description
+#' Forms the default block of log-density summaries (`elpd`, `ic`, and for LOO
+#' `p_loo`) that underlie every [pred_measure()] result. Additional measures
+#' requested via `measure` are merged into the returned matrices later.
+#'
+#' When `predperf` is provided (incremental update), returns `predperf` unchanged.
+#' For `source = "kfold"` or `"loo"` with a precomputed object, extracts existing
+#' summaries from `kfold` or `loo`.
+#'
+#' Otherwise ELPD is computed from:
+#' \itemize{
+#'   \item `insample`: `ylp` directly (in-sample log predictive density).
+#'   \item `loo`: `ylp` reweighted with PSIS log weights from `psis_object`.
+#'   \item `test`: `ylp_test` on holdout observations.
+#' }
+#'
+#' `ic` is always \eqn{-2 \times} ELPD on the same scale as LOOIC. For
+#' `source = "loo"`, `p_loo` (effective number of parameters) is also included,
+#' computed by \code{.compute_effective_param()} as the difference between
+#' in-sample and LOO log predictive density; see `p_loo` in [loo::loo()] and the
+#' [CV-FAQ on p_loo](https://users.aalto.fi/~ave/CV-FAQ.html#p_loo).
+#'
+#' @param ylp Matrix of pointwise log predictive densities for training data
+#'   (`S` × `n`).
+#' @param ylp_test Matrix of pointwise log predictive densities for holdout
+#'   data (`S` × `n_test`; `test` source only).
+#' @param loo Optional [loo::loo()] result. When supplied for `source = "loo"`,
+#'   base summaries are taken from the `loo` object.
+#' @param kfold Optional `kfold` object. When supplied for `source = "kfold"`,
+#'   base summaries are taken from the `kfold` object.
+#' @param predperf Existing [pred_measure()] object. When not `NULL`, returned
+#'   unchanged so base summaries are not recomputed.
+#' @param psis_object PSIS object with LOO log weights and diagnostics.
+#' @param source Character string; one of `"insample"`, `"loo"`, `"kfold"`, or
+#'   `"test"`.
+#'
+#' @return A named list with:
+#' \describe{
+#'   \item{`estimates`}{Matrix with rows `elpd`, optionally `p_loo` (LOO only),
+#'     and `ic`, plus a source suffix when applicable (`_loo`, `_kfold`,
+#'     `_test`).}
+#'   \item{`pointwise`}{Matrix of observation-level contributions.}
+#'   \item{`diagnostics`}{From `psis_object$diagnostics` when LOO weights are
+#'     used; otherwise `NULL` or taken from the input `loo`/`kfold` object.}
+#' }
+#'
+#' @noRd
+.compute_base_measure <- function(
+  ylp,
+  ylp_test = ylp_test,
+  loo,
+  kfold,
+  predperf,
+  psis_object,
+  source
+) {
+  if (!is.null(predperf)) {
+    return(predperf)
+  }
+  if (source == "kfold") {
+    if ("diagnostics" %in% names(kfold)) {
+      return(kfold[c("estimates", "pointwise", "diagnostics")])
+    } else {
+      return(kfold[c("estimates", "pointwise")])
+    }
+  }
+  if (source == "loo" && !is.null(loo)) {
+    return(loo[c("estimates", "pointwise", "diagnostics")])
+  }
+  
+  elpd_res <- switch(
+    source,
+    insample = elpd(ylp = ylp),
+    loo = elpd(ylp = ylp, log_weights = psis_object$log_weights),
+    test = elpd(ylp_test)
+  )
+  if (!is.null(elpd_res$estimates)) {
+    elpd_res <- list(
+      estimate = unname(elpd_res$estimates["Estimate"]),
+      se = unname(elpd_res$estimates["SE"]),
+      pointwise = elpd_res$pointwise
+    )
+  }
+  suffix <- if (source == "insample") "" else paste0("_", source)
+  add_p_eff <- source == "loo"
+  est_mat <- rbind(
+    elpd = c(elpd_res$estimate, elpd_res$se),
+    ic = c(-2 * elpd_res$estimate, 2 * elpd_res$se)
+  )
+  pw_mat <- cbind(
+    elpd = elpd_res$pointwise,
+    ic = -2 * elpd_res$pointwise
+  )
+  if (add_p_eff) {
+    p_eff_res <- .compute_effective_param(ylp, elpd_res$pointwise)
+    est_mat <- rbind(
+      est_mat["elpd", , drop = FALSE],
+      p_eff = c(p_eff_res$estimate, p_eff_res$se),
+      est_mat["ic", , drop = FALSE]
+    )
+    pw_mat <- cbind(
+      elpd = pw_mat[, "elpd"],
+      p_eff = p_eff_res$pointwise,
+      ic = pw_mat[, "ic"]
+    )
+  }
+  
+  rownames(est_mat) <- paste0(rownames(est_mat), suffix)
+  colnames(est_mat) <- c("Estimate", "SE")
+  colnames(pw_mat) <- paste0(colnames(pw_mat), suffix)
+  
+  list(
+    estimates = est_mat, 
+    pointwise = pw_mat,
+    diagnostics = psis_object$diagnostics
+  )
+}
+
+# compute the effective number of parameters
+#' Compute effective number of parameters (`p_loo`)
+#'
+#' @description
+#' Per-observation effective number of parameters as the difference between
+#' the log posterior predictive density (`lpd`) and the cross-validated log
+#' predictive density (`elpd`). Summed across observations, this matches
+#' `p_loo` from the **loo** package: it describes how much harder it is to
+#' predict held-out data than the data used to fit the model.
+#'
+#' @param ylp Matrix of pointwise log predictive densities (`S` × `n`).
+#' @param elpd_cv_i Numeric vector of length `n` with LOO pointwise ELPD
+#'   contributions.
+#'
+#' @return A named list with `estimate` (total `p_loo`), `se`, and `pointwise`
+#'   (`p_loo` per observation).
+#'
+#' @references See the **loo** package glossary (`vignette("loo2", package = "loo")`)
+#'   and \url{https://users.aalto.fi/~ave/CV-FAQ.html#p_loo}.
+#'
+#' @noRd
+.compute_effective_param <- function(ylp, elpd_cv_i) {
+  lpd_i <- matrixStats::colLogSumExps(ylp) - log(nrow(ylp))
+  p_eff_i <- lpd_i - elpd_cv_i
+  
+  list(
+      estimate = sum(p_eff_i),
+      se = sqrt(ncol(ylp) * var(p_eff_i)),
+      pointwise = p_eff_i
+    )
+}
+
+#' Add or update a row or column in a summary matrix
+#'
+#' @description
+#' Builds the `estimates` and `pointwise` matrices used in
+#' [pred_measure()] results. When `margin = 1`, appends or updates a **row**
+#' (for estimates). When `margin = 2`, appends or updates a **column** 
+#' (for pointwise).
+#'
+#' If `mat` is `NULL`, returns a new one-row or one-column matrix for `name`.
+#' If `name` is already present along that margin, the update is skipped with a
+#' warning. Otherwise the new slice is bound with [rbind()] or [cbind()].
+#'
+#' @param source Character string; one of `"loo"`, `"insample"`, `"kfold"`, or
+#'   `"test"`.
+#' @param mat Existing matrix, or `NULL` when adding the first measure.
+#' @param name Character label for the row (`margin = 1`) or column
+#'   (`margin = 2`).
+#' @param values Numeric vector to store. For `margin = 1`, length-2 vector
+#'   `(estimate, se)`; for `margin = 2`, length-`n` pointwise vector.
+#' @param margin `1` to merge along rows (estimates table), `2` along columns
+#'   (pointwise table).
+#'
+#' @return Updated matrix with `name` as a row or column name.
+#'
+#' @noRd
+.merge_matrix <- function(source, mat, name, values, margin) {
+  is_row <- margin == 1
+  along <- if (is_row) rownames else colnames
+  bind_fn <- if (is_row) rbind else cbind
+  name_updated <- switch(
+    source,
+    kfold = paste0(name, "_kfold"),
+    loo = paste0(name, "_loo"),
+    test = paste0(name, "_test"),
+    insample = name
+  )
+  
+  new_slice <- if (is_row) {
+    matrix(values, nrow = 1, dimnames = list(name_updated, c("Estimate", "SE")))
+  } else {
+    matrix(values, ncol = 1, dimnames = list(NULL, name_updated))
+  }
+
+  if (is.null(mat)) return(new_slice)
+  # && margin == 1 condition ensures that warning is shown only once
+  if (name_updated %in% along(mat) && margin == 1) {
+    # TODO: Check whether this behavior is wanted
+    cli::cli_warn(
+      c(
+        "{.field {name_updated}} already present in results. Skipping the update."
+      )
+    )
+    return(mat)
+  }
+  bind_fn(mat, new_slice)
+}
+
+#' Construct the S3 predictive measure result object
+#'
+#' @description
+#' Wraps computed summaries into the list structure returned by the
+#' [pred_measure()] pipeline. S3 classes and attributes are attached later by
+#' \code{.add_attributes()}.
+#'
+#' When `save_psis = TRUE`, the `psis_object` is stored in the result; otherwise
+#' that slot is omitted. When a `psis_object` is available, its `log_weights` are
+#' copied to the result.
+#'
+#' @param estimates Matrix of overall estimates and standard errors (rows =
+#'   measures, columns = `Estimate` and `SE`).
+#' @param pointwise Matrix of observation-level contributions (columns =
+#'   measures).
+#' @param diagnostics Optional PSIS or other diagnostic information, or `NULL`.
+#' @param psis_object PSIS object with LOO weights and diagnostics, or `NULL`.
+#' @param save_psis Logical; if `TRUE`, include `psis_object` in the result.
+#'
+#' @return A list with elements `estimates`, `pointwise`, and optionally
+#'   `diagnostics`, `psis_object`, and `log_weights`. Class attributes are added
+#'   by \code{.add_attributes()}.
+#'
+#' @noRd
+.build_pred_measure <- function(
+  estimates,
+  pointwise,
+  diagnostics = NULL,
+  psis_object,
+  save_psis
+) {
+  output_list <- list(
+    estimates = estimates,
+    pointwise = pointwise
+  )
+  if (!is.null(diagnostics)) {
+    output_list$diagnostics <- diagnostics
+  }
+  if (isTRUE(save_psis)) {
+    output_list$psis_object <- psis_object
+  }
+  if (!is.null(psis_object)) {
+    output_list$log_weights <- psis_object$log_weights
+  }
+  
+  structure(output_list)
+}
+
+#' Attach S3 classes and metadata attributes to a result
+#'
+#' @description
+#' Sets `class`, `source`, and `dims` attributes on a predictive measure object.
+#'
+#' When updating an existing result (`predperf` is not `NULL`), copies attributes
+#' from `predperf` and refreshes `dims` from newly supplied input matrices.
+#' When `save_psis = FALSE`, clears any stored `psis_object` from the prior
+#' result.
+#'
+#' For new objects, copies relevant attributes from `loo` or `kfold` inputs
+#' (e.g. `yhash`, `model_name`, fold structure) and assigns a source-specific
+#' subclass (`"insample_pred_measure"`, `"loo_pred_measure"`, etc.).
+#'
+#' @param save_psis Logical; when `FALSE` and accumulating, clears stored
+#'   `psis_object` from the prior result.
+#' @param predperf_res List returned by \code{.build_pred_measure()}.
+#' @param y Vector of observed values; used indirectly via matrix `dims`.
+#' @param ypred Matrix of posterior predictive draws; used to set `dims`.
+#' @param mupred Matrix of posterior expected values; used to set `dims`.
+#' @param ylp Matrix of pointwise log predictive densities; used to set `dims`.
+#' @param ylp_test Matrix of holdout log predictive densities; sets `dims` for
+#'   `source = "test"`.
+#' @param kfold Optional `kfold` object whose attributes are inherited.
+#' @param loo Optional [loo::loo()] object whose attributes are inherited.
+#' @param predperf Existing object when accumulating measures, or `NULL`.
+#' @param source Character evaluation mode (`"insample"`, `"loo"`, `"kfold"`,
+#'   or `"test"`).
+#'
+#' @return The updated `predperf_res` with class and attributes set.
+#'
+#' @noRd
+.add_attributes <- function(save_psis, predperf_res, y, ypred, mupred, ylp, ylp_test, kfold, loo, predperf, source) {
+  if (!is.null(predperf)) {
+    if (isFALSE(save_psis)) {
+      predperf$psis_object <- NULL
+    }
+    attributes(predperf_res) <- attributes(predperf)
+    
+    dims <- if (!is.null(ypred)) {
+      dim(ypred)
+    } else if (!is.null(mupred)) {
+      dim(mupred)
+    } else if (!is.null(ylp)) {
+      dim(ylp)
+    }
+    attr(predperf_res, "dims") <- dims
+    
+    return(predperf_res)
+  }
+  
+  predperf_res <- switch(
+    source,
+    kfold = .copy_attrs(
+      predperf_res,
+      kfold,
+      setdiff(names(attributes(kfold)), "names")
+    ),
+    loo = .copy_attrs(
+      predperf_res,
+      loo,
+      setdiff(names(attributes(loo)), "names")
+    ),
+    test = , # fall through (same as insample)
+    insample = predperf_res
+  )
+  
+  if (source %in% c("insample", "test") || 
+    (is.null(attr(predperf_res, "dims")) && !is.null(ylp))){
+    # make attribute names consistent between pred_measure classes
+    if (source == "test") {
+      attr(predperf_res, "dims") <- attr(ylp_test, "dim")
+    } else {
+      attr(predperf_res, "dims") <- attr(ylp, "dim")  
+    } 
+  }
+  
+  attr(predperf_res, "class") <- c(
+    "pred_measure",
+    switch(
+      source,
+      loo = "loo_pred_measure",
+      insample = "insample_pred_measure",
+      kfold = "kfold_pred_measure",
+      test = "test_pred_measure",
+      "pred_measure"
+    ),
+    attr(predperf_res, "class")
+  )
+  attr(predperf_res, "source") <- source
+  
+  return(predperf_res)
+}
\ No newline at end of file
diff --git a/R/pred_measure_helpers.R b/R/pred_measure_helpers.R
new file mode 100644
index 00000000..efb3fac8
--- /dev/null
+++ b/R/pred_measure_helpers.R
@@ -0,0 +1,420 @@
+#' Normalize the `measure` argument to an internal list
+#'
+#' @description
+#' Converts `measure` (character, function, list, or `NULL`) into a list of
+#' entries with elements `name`, `type` (`"builtin"` or `"custom"`), and `key`
+#' (built-in name or function).
+#'
+#' @param measure User-supplied `measure` argument.
+#'
+#' @return A list of normalized measure entries, or an empty list when
+#'   `measure` is `NULL`.
+#'
+#' @noRd
+.normalize_measure <- function(measure) {
+  if (is.null(measure)) {
+    return(list())
+  }
+
+  if (is.function(measure)) {
+    entries <- list(.measure_entry_custom(measure))
+    .check_measure_entry_names(entries)
+    return(entries)
+  }
+
+  if (is.character(measure)) {
+    entries <- lapply(measure, function(nm) {
+      list(name = nm, type = "builtin", key = nm)
+    })
+    .check_measure_entry_names(entries)
+    return(entries)
+  }
+
+  if (is.list(measure)) {
+    if (length(measure) == 0L) {
+      return(list())
+    }
+    entries <- lapply(seq_along(measure), function(i) {
+      el <- measure[[i]]
+      nm <- names(measure)[i]
+      if (is.character(el) && length(el) == 1L) {
+        list(name = el, type = "builtin", key = el)
+      } else if (is.function(el)) {
+        if (is.null(nm) || !nzchar(nm)) {
+          cli::cli_abort(c(
+            "Each custom function in {.arg measure} must be named.",
+            "i" = "Use {.code measure = list(my_metric = my_fun)}."
+          ))
+        }
+        list(name = nm, type = "custom", key = el)
+      } else {
+        cli::cli_abort(c(
+          "Each element of {.arg measure} must be a character scalar (built-in",
+          "name) or a function (custom measure).",
+          "i" = "Element {i} has type {.cls {class(el)[1]}}."
+        ))
+      }
+    })
+    .check_measure_entry_names(entries)
+    return(entries)
+  }
+
+  cli::cli_abort(c(
+    "{.arg measure} must be a character vector, a function, a list, or",
+    "{.code NULL}.",
+    "i" = "Got an object of class {.cls {class(measure)[1]}}."
+  ))
+}
+
+#' Build a custom measure entry from a function
+#'
+#' @param fun Function implementing a custom measure.
+#' @noRd
+.measure_entry_custom <- function(fun) {
+  name <- attr(fun, "measure_name", exact = TRUE)
+  if (is.null(name) || length(name) != 1L || !nzchar(name)) {
+    cli::cli_abort(c(
+      "A custom function passed to {.arg measure} must have attribute",
+      "{.code measure_name}.",
+      "i" = "Set {.code attr(my_fun, \"measure_name\") <- \"my_metric\"}."
+    ))
+  }
+  list(name = name, type = "custom", key = fun)
+}
+
+#' Check duplicate and reserved measure names
+#'
+#' @param entries List of normalized measure entries from `.normalize_measure()`.
+#' @noRd
+.check_measure_entry_names <- function(entries) {
+  names <- vapply(entries, `[[`, "", "name")
+  dups <- names[duplicated(names)]
+  if (length(dups) > 0L) {
+    cli::cli_abort(c(
+      "Duplicate measure names in {.arg measure}: {.val {unique(dups)}}",
+      "i" = "Each measure may appear only once."
+    ))
+  }
+  invisible(NULL)
+}
+
+#' Normalize, validate, and filter the `measure` argument
+#'
+#' @description
+#' Converts `measure` via `.normalize_measure()`, validates built-in names and
+#' custom functions, and drops entries already present in `predperf`.
+#'
+#' @param measure User-supplied `measure` argument (see `.normalize_measure()`).
+#' @param predperf Existing pred_measure object used when accumulating measures.
+#' @param supported_measures_list Character vector of allowed built-in names.
+#'
+#' @return A list of normalized measure entries ready for computation.
+#'
+#' @noRd
+.prepare_measures <- function(measure, predperf, supported_measures_list) {
+  entries <- .normalize_measure(measure)
+  if (length(entries) == 0L) {
+    return(entries)
+  }
+
+  is_builtin <- vapply(entries, function(e) e$type == "builtin", logical(1L))
+  builtin_keys <- vapply(entries[is_builtin], function(e) e$key, character(1L))
+  invalid <- setdiff(builtin_keys, supported_measures_list)
+  if (length(invalid) > 0L) {
+    cli::cli_abort(c(
+      "Invalid measure{?s}: {paste(shQuote(invalid), collapse = ', ')}",
+      "i" = "Built-in measures must be one of:",
+      " " = "{paste(shQuote(supported_measures_list), collapse = ', ')}"
+    ))
+  }
+
+  is_custom <- !is_builtin
+  if (any(is_custom)) {
+    for (entry in entries[is_custom]) {
+      checkmate::assert_function(entry$key, .var.name = "measure")
+    }
+  }
+
+  if (!is.null(predperf)) {
+    existing_measures <- rownames(predperf$estimates)
+    entry_names <- vapply(entries, function(e) e$name, character(1L))
+    dups <- intersect(entry_names, existing_measures)
+    if (length(dups) > 0L) {
+      cli::cli_warn(c(
+        "!" = "{cli::qty(length(dups))} Measure{?s} {.val {dups}} {?is/are}",
+        "already present in {.arg predperf} and will be skipped."
+      ))
+    }
+    keep <- !vapply(
+      entries,
+      function(e) e$name %in% existing_measures,
+      logical(1L)
+    )
+    entries <- entries[keep]
+  }
+
+  entries
+}
+
+#' Infer number of observations from measure inputs
+#'
+#' @noRd
+.measure_n_obs <- function(y, ypred, mupred, ylp) {
+  if (!is.null(y)) {
+    return(length(y))
+  }
+  if (!is.null(ypred)) {
+    return(ncol(ypred))
+  }
+  if (!is.null(mupred)) {
+    return(if (length(dim(mupred)) == 3L) dim(mupred)[2L] else ncol(mupred))
+  }
+  if (!is.null(ylp)) {
+    return(ncol(ylp))
+  }
+  NULL
+}
+
+#' Validate the return value of a custom measure function
+#'
+#' @param res Object returned by a custom measure function.
+#' @param measure_name Label used in error messages.
+#' @param n_obs Expected length of `pointwise`, or `NULL` to skip.
+#'
+#' @return `res`, invisibly.
+#'
+#' @noRd
+.validate_measure_result <- function(res, measure_name, n_obs = NULL) {
+  if (!is.list(res)) {
+    cli::cli_abort(c(
+      "Custom measure {.val {measure_name}} must return a list.",
+      "i" = "Got an object of class {.cls {class(res)[1]}}."
+    ))
+  }
+
+  if (!is.null(res$estimates)) {
+    if (!is.numeric(res$estimates) || length(res$estimates) != 2L) {
+      cli::cli_abort(c(
+        "{.field estimates} from custom measure {.val {measure_name}} must be",
+        "a numeric vector of length 2 (estimate and SE)."
+      ))
+    }
+  } else {
+    missing <- setdiff(c("estimate", "se", "pointwise"), names(res))
+    if (length(missing) > 0L) {
+      cli::cli_abort(c(
+        "Custom measure {.val {measure_name}} must return a list with",
+        "{.field estimate}, {.field se}, and {.field pointwise}.",
+        "x" = "Missing: {.field {missing}}"
+      ))
+    }
+    if (!is.numeric(res$estimate) || length(res$estimate) != 1L) {
+      cli::cli_abort(
+        "{.field estimate} from custom measure {.val {measure_name}} must be a numeric scalar."
+      )
+    }
+    if (!is.numeric(res$se) || length(res$se) != 1L) {
+      cli::cli_abort(
+        "{.field se} from custom measure {.val {measure_name}} must be a numeric scalar."
+      )
+    }
+  }
+
+  if (is.null(res$pointwise)) {
+    cli::cli_abort(
+      "Custom measure {.val {measure_name}} must return {.field pointwise}."
+    )
+  }
+  if (!is.numeric(res$pointwise) || length(res$pointwise) < 1L) {
+    cli::cli_abort(
+      "{.field pointwise} from custom measure {.val {measure_name}} must be a numeric vector."
+    )
+  }
+  if (!is.null(n_obs) && length(res$pointwise) != n_obs) {
+    cli::cli_abort(c(
+      "{.field pointwise} from custom measure {.val {measure_name}} must have",
+      "length {.val {n_obs}}, not {.val {length(res$pointwise)}}."
+    ))
+  }
+
+  invisible(res)
+}
+
+#' Validate a numeric matrix argument
+#'
+#' @description
+#' Checks that `x` is a numeric matrix with at least one row and column.
+#' Optionally enforces expected `nrow` and/or `ncol`. Aborts via
+#' [cli::cli_abort()] when validation fails.
+#'
+#' @param x Object to validate.
+#' @param arg Name of the argument (used in error messages).
+#' @param nrow Expected number of rows, or `NULL` to skip this check.
+#' @param ncol Expected number of columns, or `NULL` to skip this check.
+#'
+#' @return `NULL`, invisibly, on success.
+#'
+#' @noRd
+.validate_numeric_matrix <- function(x, arg, nrow = NULL, ncol = NULL) {
+  if (!is.matrix(x) || !is.numeric(x)) {
+    cli::cli_abort("{.arg {arg}} must be a numeric matrix.")
+  }
+  if (!is.null(nrow) && nrow(x) != nrow) {
+    cli::cli_abort(
+      "{.arg {arg}} must have {.val {nrow}} row{?s}, not {.val {nrow(x)}}."
+    )
+  }
+  if (!is.null(ncol) && ncol(x) != ncol) {
+    cli::cli_abort(
+      "{.arg {arg}} must have {.val {ncol}} column{?s}, not {.val {ncol(x)}}."
+    )
+  }
+  if (nrow(x) < 1 || ncol(x) < 1) {
+    cli::cli_abort("{.arg {arg}} must have at least 1 row and 1 column.")
+  }
+}
+
+#' Validate a numeric vector argument
+#'
+#' @description
+#' Checks that `x` is a numeric atomic vector (not a matrix or array) with
+#' length at least one. Optionally enforces an expected `len`. Aborts via
+#' [cli::cli_abort()] when validation fails.
+#'
+#' @param x Object to validate.
+#' @param arg Name of the argument (used in error messages).
+#' @param len Expected length, or `NULL` to skip this check.
+#'
+#' @return `NULL`, invisibly, on success.
+#'
+#' @noRd
+.validate_numeric_vector <- function(x, arg, len = NULL) {
+  if (!is.atomic(x) || !is.numeric(x) || is.matrix(x) || is.array(x)) {
+    cli::cli_abort("{.arg {arg}} must be a numeric vector.")
+  }
+  if (!is.null(len) && length(x) != len) {
+    cli::cli_abort(
+      "{.arg {arg}} must have length {.val {len}}, not {.val {length(x)}}."
+    )
+  }
+  if (length(x) < 1) {
+    cli::cli_abort("{.arg {arg}} must not be empty.")
+  }
+}
+
+#' Validate and normalize log weights
+#'
+#' @description
+#' Validates that `log_weights` is a numeric matrix of size `n_draws` by
+#' `n_obs`, then column-normalizes it via `.normalize_log_weights()`.
+#'
+#' @param log_weights Numeric matrix of log weights (`n_draws` \eqn{\times}
+#'   `n_obs`).
+#' @param n_draws Expected number of rows (posterior draws).
+#' @param n_obs Expected number of columns (observations).
+#'
+#' @return Numeric matrix of the same dimensions as `log_weights` with
+#'   column-normalized log weights.
+#'
+#' @noRd
+.normalize_and_validate_log_weights <- function(log_weights, n_draws, n_obs) {
+  .validate_numeric_matrix(
+    log_weights,
+    arg = "log_weights",
+    nrow = n_draws,
+    ncol = n_obs
+  )
+  .normalize_log_weights(log_weights)
+}
+
+#' Inform about ignored inputs when pointwise is supplied
+#'
+#' @description
+#' When `pointwise` is not `NULL`, emits an informative message via
+#' [cli::cli_inform()] listing non-`NULL` entries in `ignored_args` that are
+#' not used.
+#'
+#' @param pointwise Optional precomputed pointwise contributions. When
+#'   `NULL`, no message is emitted.
+#' @param ignored_args Named list of arguments that may be ignored (e.g.
+#'   `ylp`, `log_weights`).
+#' @param fun_name Name of the calling function (shown in the message).
+#'
+#' @return `NULL`, invisibly.
+#'
+#' @noRd
+.inform_ignored_inputs <- function(pointwise, ignored_args, fun_name) {
+  if (is.null(pointwise)) {
+    return(invisible(NULL))
+  }
+  supplied <- names(ignored_args)[vapply(ignored_args, Negate(is.null), logical(1))]
+  if (length(supplied) > 0L) {
+    cli::cli_inform(
+      "In {.fn {fun_name}}, {.arg pointwise} is provided; ignoring {.arg {supplied}}."
+    )
+  }
+  invisible(NULL)
+}
+
+#' Validate probability values
+#'
+#' @description
+#' Checks that all elements of `x` lie in the closed interval \eqn{[0, 1]}.
+#' Aborts via [cli::cli_abort()] when any value is out of range.
+#'
+#' @param x Numeric vector or matrix of probabilities.
+#' @param arg Name of the argument (used in error messages).
+#'
+#' @return `NULL`, invisibly, on success.
+#'
+#' @noRd
+.validate_probs <- function(x, arg) {
+  if (!all(x >= 0 | x <= 1)) {
+    cli::cli_abort("{.arg {arg}} must contain values in [0, 1].")
+  }
+}
+
+#' Copy selected attributes between objects
+#'
+#' @description
+#' Copies attributes named in `which` from `from` onto `to`, overwriting any
+#' existing attributes with the same names.
+#'
+#' @param to Object receiving attributes.
+#' @param from Object supplying attributes.
+#' @param which Character vector of attribute names to copy.
+#'
+#' @return `to`, with updated attributes.
+#'
+#' @noRd
+.copy_attrs <- function(to, from, which) {
+  for (nm in which) {
+    attr(to, nm) <- attr(from, nm)
+  }
+  to
+}
+
+#' Normalize log weights
+#'
+#' @description
+#' Normalizes a matrix of log weights column-wise so that the weights in each
+#' column sum to one on the probability scale. Normalization is performed by
+#' subtracting the log-sum-exp of each column from its elements, equivalent to
+#' dividing each column's weights by their sum on the probability scale.
+#'
+#' @param log_weights Numeric matrix of log weights (`n_draws` \eqn{\times}
+#'   `n_obs`), where rows are draws and columns are observations.
+#'
+#' @return Numeric matrix of the same dimensions as `log_weights` with
+#'   column-normalized log weights.
+#'
+#' @noRd
+.normalize_log_weights <- function(log_weights) {
+  sweep(
+    log_weights,
+    2,
+    matrixStats::colLogSumExps(log_weights),
+    FUN = "-",
+    check.margin = FALSE
+  )
+}
diff --git a/R/pred_measure_wrappers.R b/R/pred_measure_wrappers.R
new file mode 100644
index 00000000..dd5096c2
--- /dev/null
+++ b/R/pred_measure_wrappers.R
@@ -0,0 +1,475 @@
+#' In-sample predictive performance measures
+#'
+#' @description
+#' Compute predictive performance measures on the same data used to fit the
+#' model. This is the simplest entry point when you want density scores
+#' (`elpd`, `ic`) and optional distributional or point-prediction metrics in
+#' one call.
+#'
+#' In-sample `elpd` sums the expected log pointwise predictive density (ELPD)
+#' over the training observations. Because the model has already seen these
+#' data, in-sample scores are **optimistically biased** for predicting future
+#' or otherwise unseen observations. For out-of-sample performance, use
+#' [loo_pred_measure()], [kfold_pred_measure()], or [test_pred_measure()]
+#' instead; see Vehtari's
+#' [Cross-validation FAQ](https://users.aalto.fi/~ave/CV-FAQ.html) (Section 4).
+#'
+#' @inheritParams pred_measure_params
+#'
+#' @return
+#' An object of class `"insample_pred_measure"` and `"pred_measure"`: a list
+#' with:
+#' \describe{
+#'   \item{`estimates`}{Matrix of summary estimates and standard errors (rows
+#'     are measures, columns are `Estimate` and `SE`). Base rows `elpd` and
+#'     `ic` are always present; `ic` equals \eqn{-2 \times} `elpd`.}
+#'   \item{`pointwise`}{Matrix of observation-level contributions (one column
+#'     per measure).}
+#' }
+#'
+#' The attribute `source` is `"insample"`. Attribute `dims` gives posterior
+#' draws × observations. Use [print()] for a readable summary table.
+#'
+#' @details
+#' **Input requirements by measure.** Supply only the inputs each measure
+#' needs:
+#'
+#' | Measure | `ylp` | `y` | `ypred` | `mupred` |
+#' |:---|:---:|:---:|:---:|:---:|
+#' | `elpd`, `mlpd`, `ic` | ✓ | | | |
+#' | `crps`, `scrps`, `rps`, `srps` | | ✓ | ✓ | |
+#' | `acc`, `bacc` | | ✓ | | ✓ |
+#' | `mae`, `mse`, `rmse`, `r2` | | ✓ | | ✓ |
+#'
+#' Base measures (`elpd`, `ic`) are always computed when `ylp` is provided.
+#' Pass additional built-in names via `measure`, or supply a custom function;
+#' see [supported_measures_list] and `vignette("overview-measures")` for
+#' definitions and orientation (higher vs lower is better).
+#'
+#' **Custom measures.** A function passed to `measure` must have attribute
+#' `measure_name` and return `estimate`, `se`, and `pointwise`. Only arguments
+#' declared in the function signature among `y`, `ypred`, `mupred`, `ylp`, and
+#' `log_weights` are supplied automatically.
+#'
+#' @examples
+#' \donttest{
+#' if (requireNamespace("brms", quietly = TRUE)) {
+#'   fit <- brms::brm(
+#'     Reaction ~ Days, data = lme4::sleepstudy,
+#'     refresh = 0, chains = 2, iter = 1000
+#'   )
+#'   insample_pred_measure(
+#'     ylp = brms::log_lik(fit),
+#'     y = fit$data$Reaction,
+#'     ypred = brms::posterior_predict(fit),
+#'     mupred = brms::posterior_epred(fit),
+#'     measure = c("rmse", "r2")
+#'   )
+#' }
+#' }
+#' \dontrun{
+#' # Custom measure (same contract as built-in point-prediction metrics)
+#' my_abs_err <- function(y, mupred, log_weights = NULL) {
+#'   mu <- colMeans(mupred)
+#'   pw <- abs(y - mu)
+#'   list(
+#'     estimate = mean(pw),
+#'     se = sd(pw) / sqrt(length(pw)),
+#'     pointwise = pw
+#'   )
+#' }
+#' attr(my_abs_err, "measure_name") <- "my_abs_err"
+#' # insample_pred_measure(y = y, mupred = mupred, ylp = ylp, measure = my_abs_err)
+#' }
+#'
+#' @seealso [pred_measure()] to add measures incrementally,
+#'   [loo_pred_measure()], [kfold_pred_measure()], [test_pred_measure()],
+#'   [supported_measures_list],
+#'   `vignette("pred_measure_tutorial")`
+#'
+#' @export
+insample_pred_measure <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  group_ids = NULL,
+  save_psis = FALSE,
+  control = list()
+) {
+  pred_measure_engine(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    measure = measure, 
+    predperf = NULL,
+    loo = NULL, 
+    kfold = NULL,
+    group_ids = group_ids,
+    psis_object = NULL,
+    save_psis = save_psis,
+    source = "insample",
+    control = control
+  )
+}
+
+#' PSIS-LOO predictive performance measures
+#'
+#' @description
+#' Estimate out-of-sample predictive performance with **PSIS-LOO**
+#' (Pareto-smoothed importance sampling leave-one-out cross-validation).
+#' PSIS-LOO approximates exact LOO-CV without refitting the model once per
+#' observation: each held-out point is scored by reweighting the full-data
+#' posterior draws.
+#'
+#' The primary summary is `elpd_loo`, the LOO estimate of expected log
+#' pointwise predictive density (ELPD). Additional rows include `ic_loo`
+#' (\eqn{-2 \times} `elpd_loo`) and `p_loo` (effective number of parameters,
+#' the difference between in-sample and LOO log predictive density). See
+#' [loo::loo()] and the
+#' [Cross-validation FAQ](https://users.aalto.fi/~ave/CV-FAQ.html) for
+#' interpretation.
+#'
+#' @inheritParams pred_measure_params
+#'
+#' @return
+#' An object of class `"loo_pred_measure"`, `"pred_measure"`, and (when a
+#' `loo` object is supplied) `"loo"`. In addition to `estimates` and
+#' `pointwise`, the list may contain:
+#' \describe{
+#'   \item{`diagnostics`}{PSIS diagnostics, including Pareto \eqn{\hat{k}} in
+#'     `diagnostics$pareto_k`. Values above 0.7 suggest unreliable
+#'     LOO estimates for those observations.}
+#'   \item{`log_weights`}{Normalized log importance weights used for LOO
+#'     scoring.}
+#'   \item{`psis_object`}{Stored when `save_psis = TRUE`; needed to add
+#'     further measures with [pred_measure()] without recomputing weights.}
+#' }
+#'
+#' Measure names carry a `_loo` suffix (e.g. `elpd_loo`, `crps_loo`).
+#'
+#' @details
+#' **Three equivalent input patterns:**
+#'
+#' \describe{
+#'   \item{Precomputed `loo` object}{`loo_pred_measure(loo = loo_fit, ...)`.
+#'     Run [loo::loo()] with `save_psis = TRUE`.}
+#'   \item{`ylp` + `psis_object`}{Pass both when you have already computed
+#'     PSIS weights separately.}
+#'   \item{`ylp` only}{PSIS weights are computed internally from `ylp`.}
+#' }
+#'
+#' For distributional and point-prediction measures (`crps`, `r2`, etc.),
+#' supply `y`, `ypred`, and/or `mupred` as for [insample_pred_measure()]. When
+#' adding measures incrementally, call [pred_measure()] with `predperf` set to
+#' an existing result; use `save_psis = TRUE` on the initial call so weights
+#' are stored.
+#'
+#' @examples
+#' \donttest{
+#' if (requireNamespace("brms", quietly = TRUE)) {
+#'   fit <- brms::brm(
+#'     Reaction ~ Days, data = lme4::sleepstudy,
+#'     refresh = 0, chains = 2, iter = 1000
+#'   )
+#'   loo_fit <- loo::loo(fit, save_psis = TRUE)
+#'   loo_pred_measure(
+#'     loo = loo_fit,
+#'     y = fit$data$Reaction,
+#'     ypred = brms::posterior_predict(fit),
+#'     measure = c("rmse", "r2")
+#'   )
+#' }
+#' }
+#'
+#' @seealso [insample_pred_measure()], [pred_measure()], [loo::loo()],
+#'   [supported_measures_list],
+#'   `vignette("pred_measure_tutorial")`
+#'
+#' @export
+loo_pred_measure <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  loo = NULL,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  control = list()
+) {
+  pred_measure_engine(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    ylp_test = NULL,
+    measure = measure, 
+    predperf = NULL,
+    loo = loo, 
+    kfold = NULL,
+    group_ids = group_ids,
+    psis_object = psis_object,
+    save_psis = save_psis,
+    source = "loo",
+    control = control
+  )
+}
+
+#' K-fold cross-validation predictive performance measures
+#'
+#' @description
+#' Compute predictive performance measures under **k-fold cross-validation**.
+#' K-fold CV holds out groups of observations, refits (or reuses stored fits),
+#' and scores the held-out folds.
+#'
+#' Pass a `kfold` object from [brms::kfold()] (with `save_fits = TRUE` when
+#' you need posterior predictions on held-out folds). Base density summaries
+#' (`elpd_kfold`, `ic_kfold`, `p_kfold`) come from the `kfold` object;
+#' additional measures require the same optional inputs as
+#' [insample_pred_measure()].
+#'
+#' @inheritParams pred_measure_params
+#'
+#' @return
+#' An object of class `"kfold_pred_measure"` and `"pred_measure"`, inheriting
+#' attributes from the `kfold` object (`K`, `folds`, `fold_type`, etc.). The
+#' list contains `estimates` and `pointwise`; measure names carry a `_kfold`
+#' suffix (e.g. `elpd_kfold`, `crps_kfold`).
+#'
+#' @details
+#' For distributional measures on held-out folds, obtain posterior predictions
+#' with `brms::kfold_predict()` and pass the resulting `yrep` matrices as
+#' `ypred` and/or `mupred`. See the sleep-study workflow in
+#' `vignette("pred_measure_tutorial")`.
+#'
+#' @examples
+#' \donttest{
+#' if (requireNamespace("brms", quietly = TRUE)) {
+#'   fit <- brms::brm(
+#'     Reaction ~ Days, data = lme4::sleepstudy,
+#'     refresh = 0, chains = 2, iter = 1000
+#'   )
+#'   kf <- brms::kfold(fit, K = 5, save_fits = TRUE)
+#'   ypred_kf <- brms::kfold_predict(kf, method = "predict")$yrep
+#'   kfold_pred_measure(
+#'     y = fit$data$Reaction,
+#'     ypred = ypred_kf,
+#'     kfold = kf,
+#'     measure = "rmse"
+#'   )
+#' }
+#' }
+#'
+#' @seealso [loo_pred_measure()], [insample_pred_measure()], [pred_measure()],
+#'   [brms::kfold()], [supported_measures_list],
+#'   `vignette("pred_measure_tutorial")`
+#'
+#' @export
+kfold_pred_measure <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  kfold = NULL,
+  group_ids = NULL,
+  control = list()
+) {
+  pred_measure_engine(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    ylp_test = NULL,
+    measure = measure, 
+    predperf = NULL,
+    loo = NULL, 
+    kfold = kfold,
+    group_ids = group_ids,
+    psis_object = NULL,
+    save_psis = FALSE,
+    source = "kfold",
+    control = control
+  )
+}
+
+#' Holdout predictive performance measures
+#'
+#' @description
+#' Score predictive performance on **genuinely new (holdout) data** that was
+#' not used to fit the model. This mirrors the cross-validation goal of
+#' assessing how well a model predicts unseen observations, but with an
+#' explicit train/test split rather than LOO or k-fold reweighting.
+#'
+#' Supply `ylp` from the **training** fit and `ylp_test` from log predictive
+#' densities evaluated on the holdout set (e.g.
+#' `brms::log_lik(fit, newdata = test_data)`). Optional distributional and
+#' point-prediction measures use observed and predicted values on the test set
+#' only.
+#'
+#' @inheritParams pred_measure_params
+#'
+#' @return
+#' An object of class `"test_pred_measure"` and `"pred_measure"` with
+#' `estimates` and `pointwise`. Measure names carry a `_test` suffix (e.g.
+#' `elpd_test`, `crps_test`). Attribute `dims` reflects the test-set size
+#' (from `ylp_test`), not the training data.
+#'
+#' @details
+#' When both `ylp` (training) and `ylp_test` (holdout) are provided, the
+#' effective number of parameters (`p_test`) compares in-sample and holdout log
+#' predictive density, analogous to `p_loo` for PSIS-LOO.
+#'
+#' @examples
+#' \donttest{
+#' if (requireNamespace("brms", quietly = TRUE)) {
+#'   data <- lme4::sleepstudy
+#'   train <- data[1:150, ]
+#'   test <- data[151:nrow(data), ]
+#'   fit <- brms::brm(
+#'     Reaction ~ Days, data = train,
+#'     refresh = 0, chains = 2, iter = 1000
+#'   )
+#'   test_pred_measure(
+#'     y = test$Reaction,
+#'     ypred = brms::posterior_predict(fit, newdata = test),
+#'     mupred = brms::posterior_epred(fit, newdata = test),
+#'     ylp = brms::log_lik(fit),
+#'     ylp_test = brms::log_lik(fit, newdata = test),
+#'     measure = c("rmse", "r2")
+#'   )
+#' }
+#' }
+#'
+#' @seealso [insample_pred_measure()], [loo_pred_measure()],
+#'   [kfold_pred_measure()], [pred_measure()], [supported_measures_list],
+#'   `vignette("pred_measure_tutorial")`
+#'
+#' @export
+test_pred_measure <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  ylp_test = NULL,
+  measure = NULL,
+  group_ids = NULL,
+  control = list()
+) {
+  pred_measure_engine(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    ylp_test = ylp_test,
+    measure = measure, 
+    predperf = NULL,
+    loo = NULL, 
+    kfold = NULL,
+    group_ids = group_ids,
+    psis_object = NULL,
+    save_psis = FALSE,
+    source = "test",
+    control = control
+  )
+}
+
+#' Add predictive performance measures to an existing result
+#'
+#' @description
+#' Extend a `"pred_measure"` object with additional measures **without
+#' recomputing** what is already stored. Use this for interactive exploration
+#' or when you first compute base density summaries and later add distributional
+#' or point-prediction metrics.
+#'
+#' Pass the existing object as `predperf` and supply any inputs newly required
+#' by the requested measures (see the input table in
+#' [insample_pred_measure()]). The evaluation mode (`"insample"`, `"loo"`,
+#' `"kfold"`, or `"test"`) is taken from `predperf`; LOO paths reuse stored
+#' PSIS weights when available.
+#'
+#' @inheritParams pred_measure_params
+#'
+#' @return
+#' An updated object of the same class as `predperf`, with new rows in
+#' `estimates` and columns in `pointwise` for each requested measure. Base
+#' summaries (`elpd`, `ic`, and LOO/k-fold complexity terms) are not
+#' recomputed.
+#'
+#' @details
+#' **Typical workflow:**
+#'
+#' \preformatted{
+#' result <- loo_pred_measure(loo = loo_fit, save_psis = TRUE)
+#' pred_measure(
+#'   y = y,
+#'   mupred = mupred,
+#'   predperf = result,
+#'   measure = c("rmse", "r2")
+#' )
+#' }
+#'
+#' When extending a LOO result, ensure the initial call used `save_psis = TRUE`
+#' (or that `predperf` already contains a `psis_object`) so LOO weights are
+#' available for additional measures.
+#'
+#' @examples
+#' \donttest{
+#' if (requireNamespace("brms", quietly = TRUE)) {
+#'   fit <- brms::brm(
+#'     Reaction ~ Days, data = lme4::sleepstudy,
+#'     refresh = 0, chains = 2, iter = 1000
+#'   )
+#'   result <- insample_pred_measure(
+#'     ylp = brms::log_lik(fit),
+#'     y = fit$data$Reaction,
+#'     ypred = brms::posterior_predict(fit),
+#'     measure = "rmse"
+#'   )
+#'   pred_measure(
+#'     y = fit$data$Reaction,
+#'     mupred = brms::posterior_epred(fit),
+#'     predperf = result,
+#'     measure = "r2"
+#'   )
+#' }
+#' }
+#'
+#' @seealso [insample_pred_measure()], [loo_pred_measure()],
+#'   [kfold_pred_measure()], [test_pred_measure()], [supported_measures_list],
+#'   `vignette("pred_measure_tutorial", package = "pred_measures")`
+#'
+#' @export
+pred_measure <- function(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  predperf,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  control = list()
+) {
+  pred_measure_engine(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    ylp_test = NULL,
+    measure = measure, 
+    predperf = predperf,
+    loo = NULL,
+    kfold = NULL,
+    group_ids = group_ids,
+    psis_object = psis_object,
+    save_psis = save_psis,
+    source = attr(predperf, "source"),
+    control = control
+  )
+}
diff --git a/R/print.R b/R/print.R
index 6a3f2139..808814a4 100644
--- a/R/print.R
+++ b/R/print.R
@@ -247,3 +247,107 @@ convert_old_object <- function(x, digits = 1, ...) {
   ses <- grepl("se", nms)
   list(estimates = data.frame(Estimate = uz[!ses], SE = uz[ses]))
 }
+
+# print.R: S3 print methods for predictive measure objects and source
+# labeling helpers.
+
+
+#' @export
+print.pred_measure <- function(x, digits = 1, ...) {
+  # TODO: ppl should be able to choose what is printed
+  dims <- attr(x, "dims")
+  if (is.null(dims) && !is.null(x$log_weights)) {
+    dims <- dim(x$log_weights)
+  }
+  source <- .pred_measure_source_label(x)
+
+  cat("\n")
+  if (!is.null(dims) && length(dims) == 2) {
+    cat(
+      sprintf(
+        "Computed from %s posterior draws and %s observations.\n",
+        dims[1],
+        dims[2]
+      )
+    )
+  }
+  cat(sprintf("Data source: %s\n\n", source))
+  print(
+    format(round(as.data.frame(x$estimates), digits), nsmall = digits),
+    quote = FALSE
+  )
+  invisible(x)
+}
+
+#' @export
+print.kfold_pred_measure <- function(x, digits = 1, ...) {
+  print.pred_measure(x, digits = digits, ...)
+  if (!is.null(x$metadata$fold_id)) {
+    cat(sprintf("Folds: %s\n", length(unique(x$metadata$fold_id))))
+  }
+  invisible(x)
+}
+
+#' @export
+print.loo_pred_measure <- function(x, digits = 1, plot_k = FALSE, ...) {
+  print.pred_measure(x, digits = digits, ...)
+  cat("------\n")
+  pareto_k <- x$diagnostics$pareto_k
+  if (is.null(pareto_k)) {
+    cat("No Pareto-k diagnostics available.\n")
+    return(invisible(x))
+  }
+
+  n <- length(pareto_k)
+  labels <- c("good", "bad", "very bad")
+  ranges <- c("k <= 0.7", "0.7 < k <= 1", "k > 1")
+  bins <- cut(
+    pareto_k,
+    breaks = c(-Inf, 0.7, 1, Inf),
+    labels = labels,
+    include.lowest = TRUE
+  )
+  counts <- tabulate(as.integer(bins[!is.na(bins)]), nbins = length(labels))
+
+  cat("Pareto k diagnostic values:\n")
+  for (i in seq_along(labels)) {
+    cat(sprintf(
+      "  %s (%s): %d (%.1f%%)\n",
+      labels[i],
+      ranges[i],
+      counts[i],
+      100 * counts[i] / n
+    ))
+  }
+
+  if (plot_k) {
+    graphics::plot(
+      pareto_k,
+      ylab = "Pareto-k",
+      xlab = "Observation",
+      main = "Pareto-k diagnostics",
+      pch = 16
+    )
+  }
+  invisible(x)
+}
+
+.pred_measure_source_label <- function(x) {
+  cls <- class(x)
+  if ("loo_pred_measure" %in% cls) {
+    return("loo")
+  }
+  if ("insample_pred_measure" %in% cls) {
+    return("in-sample")
+  }
+  if ("kfold_pred_measure" %in% cls) {
+    return("k-fold")
+  }
+  if ("test_pred_measure" %in% cls) {
+    return("test")
+  }
+  if (!is.null(x$metadata$source)) {
+    return(as.character(x$metadata$source))
+  }
+  "unknown"
+}
diff --git a/developer-notes.md b/developer-notes.md
new file mode 100644
index 00000000..8dffa755
--- /dev/null
+++ b/developer-notes.md
@@ -0,0 +1,126 @@
+# Developer Notes: `loo` Refactoring & `pred_measure` Feature
+
+> **Status:** In Progress  
+> **Branch:** `pred_measure`  
+> **Related PR / Issue:** `https://github.com/stan-dev/loo/pull/363`  
+> **Contributors:** @florence-bockting, @avehtari, @VisruthSK, @jgabry
+> **Last updated:** 2026-06-03
+
+These notes document the internal discussions and ongoing progress on the `loo`
+refactoring and the addition of `pred_measure` features. They serve as a
+transparent, shared reference for all developers involved, covering the current
+status, open tasks, resolved items, and outstanding decisions.
+
+## Status at a Glance
+
+| Area | Status |
+|---|---|
+| Refactoring | In Progress |
+| Scores & Metrics Implementation | In Progress |
+| Documentation | In Progress |
+| Grouping via `group_ids` | Not Started |
+
+## Open Decisions
+
+*These questions need to be resolved before or alongside implementation.
+Discuss, record the agreed outcome, and update the status below.*
+
+### D1: Sign convention for pointwise estimates
+
++ **Context:** Some measures have opposite orientations (e.g., `rps`: lower is
+better; `srps`: higher is better). It may be useful to align measures of
+different orientations so that comparisons are meaningful.
++ **Question:** Should we allow the sign of pointwise estimates to be reversed
+via a user-facing argument?
++ **Options under consideration:**
+  - `lower_is_better = TRUE / FALSE`
+  - `orientation = "utility" / "loss"`
+  - `revert_sign = TRUE / FALSE`
++ **Decision:** *pending*
+
+### D2: Scaled `rps`: separate function vs. argument
++ **Context:** We consider to use `rps` as a unified score for continuous and
+ordered categorical inputs, subsuming `crps`, `scrps`, `srps`, and `rps`. 
+As they can use the same underlying implementation (see implementation shared 
+by Aki; contact Aki or Flo)
++ **Question:** Do we need an explicit `srps` function, or is a `scale = TRUE /
+FALSE` argument inside `rps` sufficient?
++ **Decision:** Yes. We need `srps` for easy user-input of measures in `pred_measure`. Example: 
+```
+pred_measure(
+  y = y, ypred = ypred, measure = c("rps", "srps"), ...
+)
+```
+Without having the scaled version as explicit form, we would have to create a custom measure with a different name otherwise we would have duplicate names.
+
+### D3: Handling of `r_eff`
++ **Context:** There is an open upstream discussion on `r_eff` handling in
+`posterior` by Aki:
+[stan-dev/posterior#446](https://github.com/stan-dev/posterior/issues/446).
++ **Question:** How should `r_eff` be handled in this package?
++ **Decision:** *pending*
+
+
+
+## Tasks
+
+### Refactoring
+
+- [ ] Remove the "old" `crps` implementation that requires two independent
+  `ypred` inputs; retain only the new ECDF-based implementation.
+- [ ] Align the currently two existing `elpd` implementations 
+(old and new implementation).
+- [ ] Move `ic` out of "base measures"; it should not be included
+  automatically but must be explicitly requested by the user.
+- [ ] Provide an interface to `loo_compare` and verify consistency with it.
+- [ ] Resolve handling of `r_eff` *(see Decision D3 above)*.
+
+### Implementation of Scores & Metrics
+
+- [ ] Consolidate `crps`, `rps`, `scrps`, and `srps` into a unified `rps`
+  function usable for continuous and ordered categorical inputs (including
+  scaled variants).
+  - [ ] Use the implementation shared by Aki with Flo.
+  - [ ] Verify consistency with Seth's derivations.
+  - [ ] Resolve the `srps` question *(see Decision D2 above)*.
+- [ ] Address the `kfold_pred_measure()` limitation with families that return
+  3D objects (e.g., categorical models produce an `S × N × K` array from
+  `brms::kfold_predict()`):
+  - [x] Open a corresponding issue in `brms`: [Issue#1889](https://github.com/paul-buerkner/brms/issues/1889)
+    + Created also a corresponding [PR#1890](https://github.com/paul-buerkner/brms/pull/1890)
+  - [ ] Until the `brms` issue is resolved: document which families are not
+    yet supported and return a `"not implemented"` error with a pointer to
+    the `brms` issue.
+
+### Documentation
+
+- [ ] Write detailed descriptions (including derivations where appropriate)
+  for individual measures.
+- [ ] Write overview document with short descriptions of all measures >
+  [`vignettes/articles-online-only/pred_measure-formulas.Rmd`]
+- [ ] Write overview document explaining the new `pred_measure` family >
+  [`vignettes/articles-online-only/pred_measure-family.Rmd`]
+- [ ] Ensure overview documents are published as online-only articles.
+- [ ] Extend glossary with key terminology (measure, metric, score, utility,
+  loss, ...) > [`R/loo-glossary.R`]
+
+### Grouping via `group_ids`
+
+- [ ] Work out the different grouping scenarios for `loo` and `kfold`.
+- [ ] Add `group_ids` support to `kfold_pred_measure()`,
+  `loo_pred_measure()`, and `pred_measure()`.
+
+  > **Blocker:** The implementation approach is still uncertain. The
+  > grouping scenarios must be fully understood before any implementation
+  > begins.
+
+
+
+## References & Resources
+
+| Resource | Link / Contact |
+|---|---|
+| Vignette: `kfold` and test data with `loo` | <https://mc-stan.org/loo/articles/loo2-elpd.html> |
+| Discussion: `r_eff` handling in `posterior` (Aki) | <https://github.com/stan-dev/posterior/issues/446> |
+| `rps` derivation by Seth | Contact @florence-bockting or @avehtari |
+| `rps` implementation by Aki | Contact @florence-bockting or @avehtari |
\ No newline at end of file
diff --git a/man/acc.Rd b/man/acc.Rd
new file mode 100644
index 00000000..e64bab92
--- /dev/null
+++ b/man/acc.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{acc}
+\alias{acc}
+\title{Classification accuracy (\code{acc})}
+\usage{
+acc(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{An integer vector of observed class labels.}
+
+\item{mupred}{A numeric array of posterior predictive means. For binary
+outcomes use a draws x observations matrix. For multiclass outcomes use a
+draws x observations x categories array.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise accuracy
+contributions. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (mean accuracy), \code{se}
+(standard error), and \code{pointwise} (pointwise accuracy).
+}
+\description{
+Computes pointwise and average classification accuracy for binary or
+multiclass outcomes from posterior predictive class assignments. For binary
+outcomes, each draw is thresholded at 0.5. For multiclass outcomes, each
+draw is mapped to the most likely category via \code{which.max()}.
+}
+\examples{
+y <- c(1L, 0L, 1L)
+mupred <- matrix(c(0.8, 0.3, 0.7, 0.6, 0.4, 0.9), nrow = 2)
+loo::acc(y, mupred)
+}
diff --git a/man/bacc.Rd b/man/bacc.Rd
new file mode 100644
index 00000000..115fd3df
--- /dev/null
+++ b/man/bacc.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{bacc}
+\alias{bacc}
+\title{Balanced classification accuracy (\code{bacc})}
+\usage{
+bacc(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{An integer vector of observed class labels.}
+
+\item{mupred}{A numeric array of posterior predictive means. For binary
+outcomes use a draws x observations matrix. For multiclass outcomes use a
+draws x observations x categories array.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise accuracy
+contributions. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (balanced accuracy), \code{se}
+(standard error based on balanced pointwise contributions), and
+\code{pointwise} (pointwise balanced accuracy terms).
+}
+\description{
+Computes balanced accuracy by averaging class-specific mean accuracy, giving
+each observed class equal weight regardless of class frequency.
+}
+\examples{
+y <- c(1L, 1L, 2L, 2L)
+mupred <- array(
+  c(0.8, 0.2, 0.7, 0.3, 0.3, 0.7, 0.2, 0.8),
+  dim = c(1, 4, 2)
+)
+bacc(y, mupred)
+}
diff --git a/man/brier.Rd b/man/brier.Rd
new file mode 100644
index 00000000..9b6a3004
--- /dev/null
+++ b/man/brier.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{brier}
+\alias{brier}
+\title{Brier score (\code{brier})}
+\usage{
+brier(y, ypred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of binary outcomes coded as 0 or 1.}
+
+\item{ypred}{A numeric matrix of posterior predictive probabilities with
+dimensions draws x observations.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise Brier
+scores. If provided, \code{y}, \code{ypred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (mean Brier score), \code{se}
+(standard error), and \code{pointwise} (pointwise brier score).
+}
+\description{
+Computes the Brier score for binary outcomes as squared error between the
+observed label and predicted event probability.
+}
+\examples{
+y <- c(1, 0, 1)
+ypred <- matrix(c(0.8, 0.2, 0.7, 0.9, 0.4, 0.6), nrow = 2)
+brier(y, ypred)
+}
diff --git a/man/crps.Rd b/man/crps.Rd
deleted file mode 100644
index 72f848df..00000000
--- a/man/crps.Rd
+++ /dev/null
@@ -1,123 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/crps.R
-\name{crps}
-\alias{crps}
-\alias{scrps}
-\alias{loo_crps}
-\alias{loo_scrps}
-\alias{crps.matrix}
-\alias{crps.numeric}
-\alias{loo_crps.matrix}
-\alias{scrps.matrix}
-\alias{scrps.numeric}
-\alias{loo_scrps.matrix}
-\title{Continuously ranked probability score}
-\usage{
-crps(x, ...)
-
-scrps(x, ...)
-
-loo_crps(x, ...)
-
-loo_scrps(x, ...)
-
-\method{crps}{matrix}(x, x2, y, ..., permutations = 1)
-
-\method{crps}{numeric}(x, x2, y, ..., permutations = 1)
-
-\method{loo_crps}{matrix}(
-  x,
-  x2,
-  y,
-  log_lik,
-  ...,
-  permutations = 1,
-  r_eff = 1,
-  cores = getOption("mc.cores", 1)
-)
-
-\method{scrps}{matrix}(x, x2, y, ..., permutations = 1)
-
-\method{scrps}{numeric}(x, x2, y, ..., permutations = 1)
-
-\method{loo_scrps}{matrix}(
-  x,
-  x2,
-  y,
-  log_lik,
-  ...,
-  permutations = 1,
-  r_eff = 1,
-  cores = getOption("mc.cores", 1)
-)
-}
-\arguments{
-\item{x}{A \code{S} by \code{N} matrix (draws by observations), or a vector of length
-\code{S} when only single observation is provided in \code{y}.}
-
-\item{...}{Passed on to \code{\link[=E_loo]{E_loo()}} in the \verb{loo_*()} version of these
-functions.}
-
-\item{x2}{Independent draws from the same distribution as draws in \code{x}.
-Should be of the identical dimension.}
-
-\item{y}{A vector of observations or a single value.}
-
-\item{permutations}{An integer, with default value of 1,  specifying how many
-times the expected value of  |X - X'| (\verb{|x - x2|}) is computed. The row
-order of \code{x2} is shuffled as elements \code{x} and \code{x2} are typically drawn
-given the same values of parameters. This happens, e.g., when one calls
-\code{posterior_predict()} twice for a fitted \pkg{rstanarm} or \pkg{brms}
-model. Generating more permutations is expected to decrease the variance of
-the computed expected value.}
-
-\item{log_lik}{A log-likelihood matrix the same size as \code{x}.}
-
-\item{r_eff}{An optional vector of relative effective sample size estimates
-containing one element per observation. See \code{\link[=psis]{psis()}} for details.}
-
-\item{cores}{The number of cores to use for parallelization of \verb{[psis()]}.
-See \code{\link[=psis]{psis()}} for details.}
-}
-\value{
-A list containing two elements: \code{estimates} and \code{pointwise}.
-The former reports estimator and standard error and latter the pointwise
-values. Following Bolin & Wallin (2023), a larger value is better.
-}
-\description{
-The \code{crps()} and \code{scrps()} functions and their \verb{loo_*()} counterparts can be
-used to compute the continuously ranked probability score (CRPS) and scaled
-CRPS (SCRPS) (as defined by Bolin and Wallin, 2023). CRPS is a proper scoring rule, and
-strictly proper when the first moment of the predictive distribution is
-finite. Both can be expressed in terms of samples form the predictive
-distribution. See, for example, a paper by Gneiting and Raftery (2007)
-for a comprehensive discussion on CRPS.
-}
-\details{
-To compute (S)CRPS, the user needs to provide two sets of draws, \code{x} and
-\code{x2}, from the predictive distribution. This is due to the fact that formulas
-used to compute CRPS involve an expectation of the absolute difference of \code{x}
-and \code{x2}, both having the same distribution. See the \code{permutations} argument,
-as well as Gneiting and Raftery (2007) for details.
-}
-\examples{
-\dontrun{
-# An example using rstanarm
-library(rstanarm)
-data("kidiq")
-fit <- stan_glm(kid_score ~ mom_hs + mom_iq, data = kidiq)
-ypred1 <- posterior_predict(fit)
-ypred2 <- posterior_predict(fit)
-crps(ypred1, ypred2, y = fit$y)
-loo_crps(ypred1, ypred2, y = fit$y, log_lik = log_lik(fit))
-}
-
-}
-\references{
-Bolin, D., & Wallin, J. (2023). Local scale invariance and robustness of
-proper scoring rules. Statistical Science, 38(1):140-159.
-
-Gneiting, T., & Raftery, A. E. (2007). Strictly Proper Scoring Rules,
-Prediction, and Estimation. Journal of the American Statistical Association,
-102(477), 359–378.
-}
diff --git a/man/elpd.Rd b/man/elpd.Rd
index 35bde6eb..5ea3fc3e 100644
--- a/man/elpd.Rd
+++ b/man/elpd.Rd
@@ -1,49 +1,72 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/elpd.R
+% Please edit documentation in R/elpd.R, R/measures.R
 \name{elpd}
 \alias{elpd}
 \alias{elpd.array}
 \alias{elpd.matrix}
 \title{Generic (expected) log-predictive density}
 \usage{
-elpd(x, ...)
+elpd(ylp, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
 
-\method{elpd}{array}(x, ...)
+elpd.array(x, ...)
 
-\method{elpd}{matrix}(x, ...)
+elpd.matrix(x, ...)
+
+elpd(ylp, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
 }
 \arguments{
+\item{ylp}{A numeric matrix of log predictive densities/probabilities with
+dimensions draws x observations.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise
+contributions. If provided, \code{ylp} and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+
 \item{x}{A log-likelihood array or matrix. The \strong{Methods (by class)}
 section, below, has detailed descriptions of how to specify the inputs for
 each method.}
 
 \item{...}{Currently ignored.}
 }
+\value{
+A named list with elements \code{estimate} (sum over lppd_i), \code{se}
+(standard error across lppd_i), and \code{pointwise} (lppd_i).
+}
 \description{
 The \code{elpd()} methods for arrays and matrices can compute the expected log
 pointwise predictive density for a new dataset or the log pointwise
 predictive density of the observed data (an overestimate of the elpd).
+
+Computes ELPD as the sum of pointwise log predictive density contributions
+(lppd_i).
 }
 \details{
 The \code{elpd()} function is an S3 generic and methods are provided for
 3-D pointwise log-likelihood arrays and matrices.
 }
-\section{Methods (by class)}{
+\section{Functions}{
 \itemize{
-\item \code{elpd(array)}: An \eqn{I} by \eqn{C} by \eqn{N} array, where \eqn{I}
+\item \code{elpd.array()}: An \eqn{I} by \eqn{C} by \eqn{N} array, where \eqn{I}
 is the number of MCMC iterations per chain, \eqn{C} is the number of
 chains, and \eqn{N} is the number of data points.
 
-\item \code{elpd(matrix)}: An \eqn{S} by \eqn{N} matrix, where \eqn{S} is the size
+\item \code{elpd.matrix()}: An \eqn{S} by \eqn{N} matrix, where \eqn{S} is the size
 of the posterior sample (with all chains merged) and \eqn{N} is the number
 of data points.
 
 }}
 \examples{
 # Calculate the lpd of the observed data
-LLarr <- example_loglik_array()
-elpd(LLarr)
+# LLarr <- example_loglik_array()
+# elpd(LLarr)
 
+ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+elpd(ylp)
 }
 \seealso{
 The vignette \emph{Holdout validation and K-fold cross-validation of Stan
diff --git a/man/insample_pred_measure.Rd b/man/insample_pred_measure.Rd
new file mode 100644
index 00000000..455926be
--- /dev/null
+++ b/man/insample_pred_measure.Rd
@@ -0,0 +1,144 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure_wrappers.R
+\name{insample_pred_measure}
+\alias{insample_pred_measure}
+\title{In-sample predictive performance measures}
+\usage{
+insample_pred_measure(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  group_ids = NULL,
+  save_psis = FALSE,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{save_psis}{Logical. If \code{TRUE}, store the \code{psis} object in the result
+so that additional measures can be added later with \code{\link[=pred_measure]{pred_measure()}} without
+recomputing PSIS weights.}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+}
+\value{
+An object of class \code{"insample_pred_measure"} and \code{"pred_measure"}: a list
+with:
+\describe{
+\item{\code{estimates}}{Matrix of summary estimates and standard errors (rows
+are measures, columns are \code{Estimate} and \code{SE}). Base rows \code{elpd} and
+\code{ic} are always present; \code{ic} equals \eqn{-2 \times} \code{elpd}.}
+\item{\code{pointwise}}{Matrix of observation-level contributions (one column
+per measure).}
+}
+
+The attribute \code{source} is \code{"insample"}. Attribute \code{dims} gives posterior
+draws × observations. Use \code{\link[=print]{print()}} for a readable summary table.
+}
+\description{
+Compute predictive performance measures on the same data used to fit the
+model. This is the simplest entry point when you want density scores
+(\code{elpd}, \code{ic}) and optional distributional or point-prediction metrics in
+one call.
+
+In-sample \code{elpd} sums the expected log pointwise predictive density (ELPD)
+over the training observations. Because the model has already seen these
+data, in-sample scores are \strong{optimistically biased} for predicting future
+or otherwise unseen observations. For out-of-sample performance, use
+\code{\link[=loo_pred_measure]{loo_pred_measure()}}, \code{\link[=kfold_pred_measure]{kfold_pred_measure()}}, or \code{\link[=test_pred_measure]{test_pred_measure()}}
+instead; see Vehtari's
+\href{https://users.aalto.fi/~ave/CV-FAQ.html}{Cross-validation FAQ} (Section 4).
+}
+\details{
+\strong{Input requirements by measure.} Supply only the inputs each measure
+needs:\tabular{lcccc}{
+   Measure \tab \code{ylp} \tab \code{y} \tab \code{ypred} \tab \code{mupred} \cr
+   \code{elpd}, \code{mlpd}, \code{ic} \tab ✓ \tab  \tab  \tab  \cr
+   \code{crps}, \code{scrps}, \code{rps}, \code{srps} \tab  \tab ✓ \tab ✓ \tab  \cr
+   \code{acc}, \code{bacc} \tab  \tab ✓ \tab  \tab ✓ \cr
+   \code{mae}, \code{mse}, \code{rmse}, \code{r2} \tab  \tab ✓ \tab  \tab ✓ \cr
+}
+
+
+Base measures (\code{elpd}, \code{ic}) are always computed when \code{ylp} is provided.
+Pass additional built-in names via \code{measure}, or supply a custom function;
+see \link{supported_measures_list} and \code{vignette("overview-measures")} for
+definitions and orientation (higher vs lower is better).
+
+\strong{Custom measures.} A function passed to \code{measure} must have attribute
+\code{measure_name} and return \code{estimate}, \code{se}, and \code{pointwise}. Only arguments
+declared in the function signature among \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} are supplied automatically.
+}
+\examples{
+\donttest{
+if (requireNamespace("brms", quietly = TRUE)) {
+  fit <- brms::brm(
+    Reaction ~ Days, data = lme4::sleepstudy,
+    refresh = 0, chains = 2, iter = 1000
+  )
+  insample_pred_measure(
+    ylp = brms::log_lik(fit),
+    y = fit$data$Reaction,
+    ypred = brms::posterior_predict(fit),
+    mupred = brms::posterior_epred(fit),
+    measure = c("rmse", "r2")
+  )
+}
+}
+\dontrun{
+# Custom measure (same contract as built-in point-prediction metrics)
+my_abs_err <- function(y, mupred, log_weights = NULL) {
+  mu <- colMeans(mupred)
+  pw <- abs(y - mu)
+  list(
+    estimate = mean(pw),
+    se = sd(pw) / sqrt(length(pw)),
+    pointwise = pw
+  )
+}
+attr(my_abs_err, "measure_name") <- "my_abs_err"
+# insample_pred_measure(y = y, mupred = mupred, ylp = ylp, measure = my_abs_err)
+}
+
+}
+\seealso{
+\code{\link[=pred_measure]{pred_measure()}} to add measures incrementally,
+\code{\link[=loo_pred_measure]{loo_pred_measure()}}, \code{\link[=kfold_pred_measure]{kfold_pred_measure()}}, \code{\link[=test_pred_measure]{test_pred_measure()}},
+\link{supported_measures_list},
+\code{vignette("pred_measure_tutorial")}
+}
diff --git a/man/kfold_pred_measure.Rd b/man/kfold_pred_measure.Rd
new file mode 100644
index 00000000..f44d415d
--- /dev/null
+++ b/man/kfold_pred_measure.Rd
@@ -0,0 +1,104 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure_wrappers.R
+\name{kfold_pred_measure}
+\alias{kfold_pred_measure}
+\title{K-fold cross-validation predictive performance measures}
+\usage{
+kfold_pred_measure(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  kfold = NULL,
+  group_ids = NULL,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{kfold}{A \code{kfold} object from \code{\link[brms:kfold.brmsfit]{brms::kfold()}}. Supplies ELPD summaries
+and fold structure for \code{\link[=kfold_pred_measure]{kfold_pred_measure()}}; pass \code{y}, \code{ypred}, and/or
+\code{mupred} when requesting additional measures.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+}
+\value{
+An object of class \code{"kfold_pred_measure"} and \code{"pred_measure"}, inheriting
+attributes from the \code{kfold} object (\code{K}, \code{folds}, \code{fold_type}, etc.). The
+list contains \code{estimates} and \code{pointwise}; measure names carry a \verb{_kfold}
+suffix (e.g. \code{elpd_kfold}, \code{crps_kfold}).
+}
+\description{
+Compute predictive performance measures under \strong{k-fold cross-validation}.
+K-fold CV holds out groups of observations, refits (or reuses stored fits),
+and scores the held-out folds.
+
+Pass a \code{kfold} object from \code{\link[brms:kfold.brmsfit]{brms::kfold()}} (with \code{save_fits = TRUE} when
+you need posterior predictions on held-out folds). Base density summaries
+(\code{elpd_kfold}, \code{ic_kfold}, \code{p_kfold}) come from the \code{kfold} object;
+additional measures require the same optional inputs as
+\code{\link[=insample_pred_measure]{insample_pred_measure()}}.
+}
+\details{
+For distributional measures on held-out folds, obtain posterior predictions
+with \code{brms::kfold_predict()} and pass the resulting \code{yrep} matrices as
+\code{ypred} and/or \code{mupred}. See the sleep-study workflow in
+\code{vignette("pred_measure_tutorial")}.
+}
+\examples{
+\donttest{
+if (requireNamespace("brms", quietly = TRUE)) {
+  fit <- brms::brm(
+    Reaction ~ Days, data = lme4::sleepstudy,
+    refresh = 0, chains = 2, iter = 1000
+  )
+  kf <- brms::kfold(fit, K = 5, save_fits = TRUE)
+  ypred_kf <- brms::kfold_predict(kf, method = "predict")$yrep
+  kfold_pred_measure(
+    y = fit$data$Reaction,
+    ypred = ypred_kf,
+    kfold = kf,
+    measure = "rmse"
+  )
+}
+}
+
+}
+\seealso{
+\code{\link[=loo_pred_measure]{loo_pred_measure()}}, \code{\link[=insample_pred_measure]{insample_pred_measure()}}, \code{\link[=pred_measure]{pred_measure()}},
+\code{\link[brms:kfold.brmsfit]{brms::kfold()}}, \link{supported_measures_list},
+\code{vignette("pred_measure_tutorial")}
+}
diff --git a/man/loo_pred_measure.Rd b/man/loo_pred_measure.Rd
new file mode 100644
index 00000000..62086136
--- /dev/null
+++ b/man/loo_pred_measure.Rd
@@ -0,0 +1,138 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure_wrappers.R
+\name{loo_pred_measure}
+\alias{loo_pred_measure}
+\title{PSIS-LOO predictive performance measures}
+\usage{
+loo_pred_measure(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  loo = NULL,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{loo}{A \code{\link[=loo]{loo()}} result, computed with
+\code{save_psis = TRUE} so that PSIS weights are available for additional
+measures. See \code{\link[=loo_pred_measure]{loo_pred_measure()}}.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{psis_object}{A \code{psis} object with LOO importance weights. An
+alternative to passing a full \code{loo} object; must be supplied together with
+\code{ylp} when computing \code{elpd}.}
+
+\item{save_psis}{Logical. If \code{TRUE}, store the \code{psis} object in the result
+so that additional measures can be added later with \code{\link[=pred_measure]{pred_measure()}} without
+recomputing PSIS weights.}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+}
+\value{
+An object of class \code{"loo_pred_measure"}, \code{"pred_measure"}, and (when a
+\code{loo} object is supplied) \code{"loo"}. In addition to \code{estimates} and
+\code{pointwise}, the list may contain:
+\describe{
+\item{\code{diagnostics}}{PSIS diagnostics, including Pareto \eqn{\hat{k}} in
+\code{diagnostics$pareto_k}. Values above 0.7 suggest unreliable
+LOO estimates for those observations.}
+\item{\code{log_weights}}{Normalized log importance weights used for LOO
+scoring.}
+\item{\code{psis_object}}{Stored when \code{save_psis = TRUE}; needed to add
+further measures with \code{\link[=pred_measure]{pred_measure()}} without recomputing weights.}
+}
+
+Measure names carry a \verb{_loo} suffix (e.g. \code{elpd_loo}, \code{crps_loo}).
+}
+\description{
+Estimate out-of-sample predictive performance with \strong{PSIS-LOO}
+(Pareto-smoothed importance sampling leave-one-out cross-validation).
+PSIS-LOO approximates exact LOO-CV without refitting the model once per
+observation: each held-out point is scored by reweighting the full-data
+posterior draws.
+
+The primary summary is \code{elpd_loo}, the LOO estimate of expected log
+pointwise predictive density (ELPD). Additional rows include \code{ic_loo}
+(\eqn{-2 \times} \code{elpd_loo}) and \code{p_loo} (effective number of parameters,
+the difference between in-sample and LOO log predictive density). See
+\code{\link[=loo]{loo()}} and the
+\href{https://users.aalto.fi/~ave/CV-FAQ.html}{Cross-validation FAQ} for
+interpretation.
+}
+\details{
+\strong{Three equivalent input patterns:}
+
+\describe{
+\item{Precomputed \code{loo} object}{\code{loo_pred_measure(loo = loo_fit, ...)}.
+Run \code{\link[=loo]{loo()}} with \code{save_psis = TRUE}.}
+\item{\code{ylp} + \code{psis_object}}{Pass both when you have already computed
+PSIS weights separately.}
+\item{\code{ylp} only}{PSIS weights are computed internally from \code{ylp}.}
+}
+
+For distributional and point-prediction measures (\code{crps}, \code{r2}, etc.),
+supply \code{y}, \code{ypred}, and/or \code{mupred} as for \code{\link[=insample_pred_measure]{insample_pred_measure()}}. When
+adding measures incrementally, call \code{\link[=pred_measure]{pred_measure()}} with \code{predperf} set to
+an existing result; use \code{save_psis = TRUE} on the initial call so weights
+are stored.
+}
+\examples{
+\donttest{
+if (requireNamespace("brms", quietly = TRUE)) {
+  fit <- brms::brm(
+    Reaction ~ Days, data = lme4::sleepstudy,
+    refresh = 0, chains = 2, iter = 1000
+  )
+  loo_fit <- loo::loo(fit, save_psis = TRUE)
+  loo_pred_measure(
+    loo = loo_fit,
+    y = fit$data$Reaction,
+    ypred = brms::posterior_predict(fit),
+    measure = c("rmse", "r2")
+  )
+}
+}
+
+}
+\seealso{
+\code{\link[=insample_pred_measure]{insample_pred_measure()}}, \code{\link[=pred_measure]{pred_measure()}}, \code{\link[=loo]{loo()}},
+\link{supported_measures_list},
+\code{vignette("pred_measure_tutorial")}
+}
diff --git a/man/mae.Rd b/man/mae.Rd
new file mode 100644
index 00000000..530ec762
--- /dev/null
+++ b/man/mae.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{mae}
+\alias{mae}
+\title{Mean absolute error (\code{mae})}
+\usage{
+mae(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of observed outcomes.}
+
+\item{mupred}{A numeric matrix of posterior expected predictions with
+dimensions draws x observations. A length-\code{n} vector is also accepted.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise absolute
+errors. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (mean absolute error), \code{se}
+(standard error), and \code{pointwise} (pointwise absolute errors).
+}
+\description{
+Computes MAE between observed outcomes and posterior predictive point
+predictions. Point predictions are obtained by averaging \code{mupred} draws, or
+by PSIS-weighted averaging when \code{log_weights} is provided.
+}
+\examples{
+y <- c(1, 2, 3)
+mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+mae(y, mupred)
+}
diff --git a/man/mlpd.Rd b/man/mlpd.Rd
new file mode 100644
index 00000000..99e21055
--- /dev/null
+++ b/man/mlpd.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{mlpd}
+\alias{mlpd}
+\title{Mean log pointwise predictive density (\code{mlpd})}
+\usage{
+mlpd(ylp, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{ylp}{A numeric matrix of log predictive densities/probabilities with
+dimensions draws x observations.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise
+contributions. If provided, \code{ylp} and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (mean over lppd_i), \code{se}
+(standard error), and \code{pointwise} (lppd_i).
+}
+\description{
+Computes MLPD as the average of pointwise log predictive density (lppd_i)
+values. Inputs follow the same conventions as \code{\link[=elpd]{elpd()}}.
+}
+\examples{
+ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+mlpd(ylp)
+}
diff --git a/man/mse.Rd b/man/mse.Rd
new file mode 100644
index 00000000..9c501545
--- /dev/null
+++ b/man/mse.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{mse}
+\alias{mse}
+\title{Mean squared error (\code{mse})}
+\usage{
+mse(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of observed outcomes.}
+
+\item{mupred}{A numeric matrix of posterior expected predictions with
+dimensions draws x observations. A length-\code{n} vector is also accepted.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise absolute
+errors. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (mean squared error), \code{se}
+(standard error), and \code{pointwise} (pointwise squared errors).
+}
+\description{
+Computes MSE between observed outcomes and posterior predictive point
+predictions. Point predictions are obtained by averaging \code{mupred} draws, or
+by PSIS-weighted averaging when \code{log_weights} is provided.
+}
+\examples{
+y <- c(1, 2, 3)
+mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+mse(y, mupred)
+}
diff --git a/man/pred_measure.Rd b/man/pred_measure.Rd
new file mode 100644
index 00000000..e63b1d42
--- /dev/null
+++ b/man/pred_measure.Rd
@@ -0,0 +1,130 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure_wrappers.R
+\name{pred_measure}
+\alias{pred_measure}
+\title{Add predictive performance measures to an existing result}
+\usage{
+pred_measure(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  measure = NULL,
+  predperf,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{predperf}{An existing predictive measure object (class
+\code{"pred_measure"}) to update. When supplied, base density summaries and
+(for LOO) PSIS weights are reused instead of recomputed.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{psis_object}{A \code{psis} object with LOO importance weights. An
+alternative to passing a full \code{loo} object; must be supplied together with
+\code{ylp} when computing \code{elpd}.}
+
+\item{save_psis}{Logical. If \code{TRUE}, store the \code{psis} object in the result
+so that additional measures can be added later with \code{\link[=pred_measure]{pred_measure()}} without
+recomputing PSIS weights.}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+}
+\value{
+An updated object of the same class as \code{predperf}, with new rows in
+\code{estimates} and columns in \code{pointwise} for each requested measure. Base
+summaries (\code{elpd}, \code{ic}, and LOO/k-fold complexity terms) are not
+recomputed.
+}
+\description{
+Extend a \code{"pred_measure"} object with additional measures \strong{without
+recomputing} what is already stored. Use this for interactive exploration
+or when you first compute base density summaries and later add distributional
+or point-prediction metrics.
+
+Pass the existing object as \code{predperf} and supply any inputs newly required
+by the requested measures (see the input table in
+\code{\link[=insample_pred_measure]{insample_pred_measure()}}). The evaluation mode (\code{"insample"}, \code{"loo"},
+\code{"kfold"}, or \code{"test"}) is taken from \code{predperf}; LOO paths reuse stored
+PSIS weights when available.
+}
+\details{
+\strong{Typical workflow:}
+
+\preformatted{
+result <- loo_pred_measure(loo = loo_fit, save_psis = TRUE)
+pred_measure(
+  y = y,
+  mupred = mupred,
+  predperf = result,
+  measure = c("rmse", "r2")
+)
+}
+
+When extending a LOO result, ensure the initial call used \code{save_psis = TRUE}
+(or that \code{predperf} already contains a \code{psis_object}) so LOO weights are
+available for additional measures.
+}
+\examples{
+\donttest{
+if (requireNamespace("brms", quietly = TRUE)) {
+  fit <- brms::brm(
+    Reaction ~ Days, data = lme4::sleepstudy,
+    refresh = 0, chains = 2, iter = 1000
+  )
+  result <- insample_pred_measure(
+    ylp = brms::log_lik(fit),
+    y = fit$data$Reaction,
+    ypred = brms::posterior_predict(fit),
+    measure = "rmse"
+  )
+  pred_measure(
+    y = fit$data$Reaction,
+    mupred = brms::posterior_epred(fit),
+    predperf = result,
+    measure = "r2"
+  )
+}
+}
+
+}
+\seealso{
+\code{\link[=insample_pred_measure]{insample_pred_measure()}}, \code{\link[=loo_pred_measure]{loo_pred_measure()}},
+\code{\link[=kfold_pred_measure]{kfold_pred_measure()}}, \code{\link[=test_pred_measure]{test_pred_measure()}}, \link{supported_measures_list},
+\code{vignette("pred_measure_tutorial", package = "pred_measures")}
+}
diff --git a/man/pred_measure_params.Rd b/man/pred_measure_params.Rd
new file mode 100644
index 00000000..dc0dbf98
--- /dev/null
+++ b/man/pred_measure_params.Rd
@@ -0,0 +1,98 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure.R
+\name{pred_measure_params}
+\alias{pred_measure_params}
+\alias{pred_measure_engine}
+\title{Shared parameters for predictive measure wrappers}
+\usage{
+pred_measure_engine(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  ylp_test = NULL,
+  measure = NULL,
+  predperf = NULL,
+  loo = NULL,
+  kfold = NULL,
+  group_ids = NULL,
+  psis_object = NULL,
+  save_psis = FALSE,
+  source = NULL,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{ylp_test}{Matrix of pointwise log predictive densities for holdout
+observations (\code{S} × \code{n_test}), typically from
+\code{brms::log_lik(fit, newdata = test_data)}. Used with \code{ylp} (from the
+training fit) in \code{\link[=test_pred_measure]{test_pred_measure()}} to score genuinely new data.}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{predperf}{An existing predictive measure object (class
+\code{"pred_measure"}) to update. When supplied, base density summaries and
+(for LOO) PSIS weights are reused instead of recomputed.}
+
+\item{loo}{A \code{\link[=loo]{loo()}} result, computed with
+\code{save_psis = TRUE} so that PSIS weights are available for additional
+measures. See \code{\link[=loo_pred_measure]{loo_pred_measure()}}.}
+
+\item{kfold}{A \code{kfold} object from \code{\link[brms:kfold.brmsfit]{brms::kfold()}}. Supplies ELPD summaries
+and fold structure for \code{\link[=kfold_pred_measure]{kfold_pred_measure()}}; pass \code{y}, \code{ypred}, and/or
+\code{mupred} when requesting additional measures.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{psis_object}{A \code{psis} object with LOO importance weights. An
+alternative to passing a full \code{loo} object; must be supplied together with
+\code{ylp} when computing \code{elpd}.}
+
+\item{save_psis}{Logical. If \code{TRUE}, store the \code{psis} object in the result
+so that additional measures can be added later with \code{\link[=pred_measure]{pred_measure()}} without
+recomputing PSIS weights.}
+
+\item{source}{Character string indicating the evaluation mode: \code{"insample"},
+\code{"loo"}, \code{"kfold"}, or \code{"test"}. Set automatically by the wrapper
+functions; required when calling \code{\link[=pred_measure_engine]{pred_measure_engine()}} directly.}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+
+\item{measure_name}{For a single custom function, set
+\code{attr(my_fun, "measure_name") <- "my_metric"} before passing \code{my_fun} to
+\code{measure}.}
+}
+\description{
+Parameter definitions shared by the user-facing entry points and the
+internal engine \code{\link[=pred_measure_engine]{pred_measure_engine()}}.
+}
+\keyword{internal}
diff --git a/man/ptw_log_pred_density.Rd b/man/ptw_log_pred_density.Rd
new file mode 100644
index 00000000..4d6a925d
--- /dev/null
+++ b/man/ptw_log_pred_density.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{ptw_log_pred_density}
+\alias{ptw_log_pred_density}
+\title{Pointwise log predictive density (\code{lppd_i})}
+\usage{
+ptw_log_pred_density(ylp, psis_log_weights = NULL)
+}
+\arguments{
+\item{ylp}{A numeric matrix of log predictive densities/probabilities with
+dimensions draws x observations.}
+
+\item{psis_log_weights}{Optional numeric matrix of normalized PSIS log
+weights with the same dimensions as \code{ylp}. Each column must sum to 1 on
+the probability scale.}
+}
+\value{
+A numeric vector of length \code{ncol(ylp)} with pointwise log
+predictive density values.
+}
+\description{
+Computes pointwise log predictive density contributions from a matrix of
+log predictive densities/probabilities for posterior draws. When PSIS
+log-weights are supplied, they are used to form a weighted log-sum-exp per
+observation.
+}
+\examples{
+ylp <- matrix(log(c(0.2, 0.4, 0.3, 0.8)), nrow = 2)
+ptw_log_pred_density(ylp)
+
+lw <- matrix(log(c(0.7, 0.3, 0.6, 0.4)), nrow = 2)
+ptw_log_pred_density(ylp, lw)
+}
diff --git a/man/r2.Rd b/man/r2.Rd
new file mode 100644
index 00000000..2967740d
--- /dev/null
+++ b/man/r2.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{r2}
+\alias{r2}
+\title{Predictive R-squared (\code{r2})}
+\usage{
+r2(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of observed outcomes.}
+
+\item{mupred}{A numeric matrix of posterior expected predictions with
+dimensions draws x observations. A length-\code{n} vector is also accepted.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise absolute
+errors. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (predictive R2), \code{se}
+(standard error), and \code{pointwise} (pointwise squared errors).
+}
+\description{
+Computes predictive R-squared as one minus the ratio of prediction MSE to
+the empirical variance of \code{y}. The standard error is computed with a
+first-order delta-method approximation.
+}
+\examples{
+y <- c(1, 2, 3)
+mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+r2(y, mupred)
+}
diff --git a/man/rmse.Rd b/man/rmse.Rd
new file mode 100644
index 00000000..304cdef5
--- /dev/null
+++ b/man/rmse.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{rmse}
+\alias{rmse}
+\title{Root mean squared error (\code{rmse})}
+\usage{
+rmse(y, mupred, log_weights = NULL, pointwise = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of observed outcomes.}
+
+\item{mupred}{A numeric matrix of posterior expected predictions with
+dimensions draws x observations. A length-\code{n} vector is also accepted.}
+
+\item{log_weights}{Optional numeric matrix of unnormalized log-weights
+with dimensions draws x observations.}
+
+\item{pointwise}{Optional numeric vector of precomputed pointwise absolute
+errors. If provided, \code{y}, \code{mupred}, and \code{log_weights} are ignored.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiply the estimate and pointwise
+values by -1 before returning.}
+}
+\value{
+A named list with elements \code{estimate} (root mean squared error),
+\code{se} (standard error), and \code{pointwise} (pointwise squared errors).
+}
+\description{
+Computes RMSE as the square root of MSE and propagates uncertainty via a
+first-order delta-method approximation.
+}
+\examples{
+y <- c(1, 2, 3)
+mupred <- matrix(c(0.9, 2.1, 2.8, 1.2, 1.9, 3.1), nrow = 2)
+rmse(y, mupred)
+}
diff --git a/man/rps.Rd b/man/rps.Rd
new file mode 100644
index 00000000..ca4b4964
--- /dev/null
+++ b/man/rps.Rd
@@ -0,0 +1,133 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{rps}
+\alias{rps}
+\title{Ranked Probability Score (RPS, SRPS, CRPS, SCRPS)}
+\usage{
+rps(y, ypred, log_weights = NULL, scaled = FALSE, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of \eqn{n} observed outcomes. May be integer-valued
+(for RPS/SRPS) or continuous (for CRPS/SCRPS).}
+
+\item{ypred}{A numeric matrix of posterior predictive draws with dimensions
+\eqn{S \times n} (draws × observations).}
+
+\item{log_weights}{Optional numeric matrix of log-importance-
+weights with dimensions \eqn{S \times n}. When provided, a weighted PWM
+estimator is used. Useful for LOO cross-validation via importance sampling
+(e.g., PSIS-LOO).}
+
+\item{scaled}{Logical; if \code{TRUE}, computes the scaled variant (SRPS for
+discrete outcomes, SCRPS for continuous outcomes). Default is \code{FALSE}.}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiplies scores by \eqn{-1} to
+return the loss convention (lower is better) rather than the utility
+convention. Default is \code{FALSE}.}
+}
+\value{
+A named list with:
+\describe{
+\item{\code{estimate}}{Mean score across all observations.}
+\item{\code{se}}{Standard error of the mean score.}
+\item{\code{pointwise}}{Numeric vector of length \eqn{n} with per-observation
+scores.}
+}
+}
+\description{
+Computes proper scoring rules based on the ranked probability score family,
+covering both discrete and continuous outcomes, with optional scaling.
+The specific score computed depends on the type of \code{y} and \code{ypred} and the
+value of \code{scaled}:
+}
+\details{
+\tabular{lll}{
+   \code{y}/\code{ypred} type \tab \code{scaled = FALSE} \tab \code{scaled = TRUE} \cr
+   Discrete \tab RPS \tab SRPS \cr
+   Continuous \tab CRPS \tab SCRPS \cr
+}
+
+
+\strong{Scoring rules:}
+\itemize{
+\item \strong{RPS} (Epstein, 1969): Compares predictive and observed cumulative
+distributions for ordered discrete outcomes.
+\item \strong{CRPS} (Matheson & Winkler, 1976; Gneiting & Raftery, 2007): Generalizes
+RPS to continuous outcomes. Defined as
+\deqn{\mathrm{CRPS}(X; y) = \frac{1}{2} E[|X - X'|] - E[|X - y|],}
+where \eqn{X, X'} are independent draws from the predictive distribution.
+\item \strong{SRPS/SCRPS} (Bolin & Wallin, 2023): Scaled variants that are invariant
+to the scale of the predictive distribution. Defined as
+\deqn{\mathrm{SCRPS}(X; y) = -\frac{E[|X - y|]}{E[|X - X'|]} -
+    \frac{1}{2} \log E[|X - X'|].}
+}
+
+\strong{Estimation:}
+
+Scores are estimated using the probability-weighted moment (PWM) estimator
+(Taillardat et al., 2016; Zamo & Naveau, 2018), which requires only a single
+set of predictive draws unlike permutation-based estimators, which require
+two independent draw sets. The PWM estimator is unbiased and generally more
+accurate than single-permutation estimators. The same estimator is used for
+both discrete and continuous outcomes; see Hosking (1990, 1996) for
+theoretical justification in the discrete case.
+
+If log-weights (\code{log_weights}) are provided (e.g., PSIS weights
+for LOO cross-validation), a weighted PWM estimator is used instead, which
+accounts for the importance weights when estimating expectations.
+
+\strong{Sign convention:}
+
+Unscaled scores are returned as utilities (higher is better), consistent
+with the sign convention of log score / ELPD. Scaled scores are negatively
+oriented (lower is better). Use \code{revert_sign = TRUE} to obtain the
+loss convention (lower is better) used in some references.
+}
+\examples{
+# Discrete outcomes: RPS
+y <- c(2L, 1L, 3L)
+ypred <- matrix(c(2, 1, 2, 3, 1, 3), nrow = 2)
+rps(y, ypred)
+
+# Discrete outcomes: SRPS (scaled)
+rps(y, ypred, scaled = TRUE)
+
+# Continuous outcomes: CRPS
+y_cont <- c(0.5, -1.2, 2.3)
+ypred_cont <- matrix(rnorm(200), nrow = 100, ncol = 3)
+rps(y_cont, ypred_cont)
+
+# With importance weights: LOO-CRPS
+log_weights <- matrix(rnorm(200), nrow = 100, ncol = 3)
+rps(y_cont, ypred_cont, log_weights = log_weights)
+
+}
+\references{
+Bolin, D. and Wallin, J. (2023). Local scale invariance and robustness of
+proper scoring rules. \emph{Statistical Science}, 38(1):140–159.
+
+Epstein, E. S. (1969). A scoring system for probability forecasts of ranked
+categories. \emph{Journal of Applied Meteorology}, 8(6):985–987.
+
+Gneiting, T. and Raftery, A. E. (2007). Strictly proper scoring rules,
+prediction, and estimation. \emph{Journal of the American Statistical
+Association}, 102(477):359–378.
+
+Hosking, J. R. M. (1990). L-moments: Analysis and estimation of
+distributions using linear combinations of order statistics. \emph{Journal of
+the Royal Statistical Society Series B}, 52(1):105–124.
+
+Hosking, J. R. M. (1996). Some theoretical results concerning L-moments.
+Research report RC 14492. IBM Thomas J. Watson Research Division.
+
+Matheson, J. E. and Winkler, R. L. (1976). Scoring rules for continuous
+probability distributions. \emph{Management Science}, 22(10):1087–1096.
+
+Taillardat, M., Mestre, O., Zamo, M., and Naveau, P. (2016). Calibrated
+ensemble forecasts using quantile regression forests and ensemble model
+output statistics. \emph{Monthly Weather Review}, 144(6):2375–2393.
+
+Zamo, M. and Naveau, P. (2018). Estimation of the continuous ranked
+probability score with limited information and applications to ensemble
+weather forecasts. \emph{Mathematical Geosciences}, 50:209–234.
+}
diff --git a/man/srps.Rd b/man/srps.Rd
new file mode 100644
index 00000000..a2b37d23
--- /dev/null
+++ b/man/srps.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\name{srps}
+\alias{srps}
+\title{Scaled Ranked Probability Score (SRPS, SCRPS)}
+\usage{
+srps(y, ypred, log_weights = NULL, revert_sign = FALSE)
+}
+\arguments{
+\item{y}{A numeric vector of \eqn{n} observed outcomes. May be integer-valued
+(for RPS/SRPS) or continuous (for CRPS/SCRPS).}
+
+\item{ypred}{A numeric matrix of posterior predictive draws with dimensions
+\eqn{S \times n} (draws × observations).}
+
+\item{log_weights}{Optional numeric matrix of log-importance-
+weights with dimensions \eqn{S \times n}. When provided, a weighted PWM
+estimator is used. Useful for LOO cross-validation via importance sampling
+(e.g., PSIS-LOO).}
+
+\item{revert_sign}{Logical; if \code{TRUE}, multiplies scores by \eqn{-1} to
+return the loss convention (lower is better) rather than the utility
+convention. Default is \code{FALSE}.}
+}
+\value{
+A named list with:
+\describe{
+\item{\code{estimate}}{Mean score across all observations.}
+\item{\code{se}}{Standard error of the mean score.}
+\item{\code{pointwise}}{Numeric vector of length \eqn{n} with per-observation
+scores.}
+}
+}
+\description{
+A convenience wrapper around \code{\link[=rps]{rps()}} with \code{scaled = TRUE}. Computes the
+scaled ranked probability score (SRPS) for discrete outcomes or the scaled
+continuously ranked probability score (SCRPS) for continuous outcomes.
+Scaling makes the score invariant to the spread of the predictive
+distribution, which can be useful when comparing models across observations
+with very different predictive uncertainties.
+}
+\details{
+See \code{\link[=rps]{rps()}} for full details on arguments, estimation, and references.
+}
+\examples{
+y <- c(2L, 1L, 3L)
+ypred <- matrix(c(2, 1, 2, 3, 1, 3), nrow = 2)
+srps(y, ypred)
+
+}
diff --git a/man/supported_measures_list.Rd b/man/supported_measures_list.Rd
new file mode 100644
index 00000000..20b3a397
--- /dev/null
+++ b/man/supported_measures_list.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/measures.R
+\docType{data}
+\name{supported_measures_list}
+\alias{supported_measures_list}
+\title{Supported predictive measure names}
+\format{
+An object of class \code{character} of length 10.
+}
+\usage{
+supported_measures_list
+}
+\description{
+A character vector of measure names that can be passed to the \code{measure}
+argument of \code{\link[=insample_pred_measure]{insample_pred_measure()}}, \code{\link[=loo_pred_measure]{loo_pred_measure()}},
+\code{\link[=kfold_pred_measure]{kfold_pred_measure()}}, \code{\link[=test_pred_measure]{test_pred_measure()}}, and \code{\link[=pred_measure]{pred_measure()}}.
+}
+\keyword{datasets}
diff --git a/man/test_pred_measure.Rd b/man/test_pred_measure.Rd
new file mode 100644
index 00000000..cb1ead6c
--- /dev/null
+++ b/man/test_pred_measure.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pred_measure_wrappers.R
+\name{test_pred_measure}
+\alias{test_pred_measure}
+\title{Holdout predictive performance measures}
+\usage{
+test_pred_measure(
+  y = NULL,
+  ypred = NULL,
+  mupred = NULL,
+  ylp = NULL,
+  ylp_test = NULL,
+  measure = NULL,
+  group_ids = NULL,
+  control = list()
+)
+}
+\arguments{
+\item{y}{Vector of observed values (\code{n}). Required for distributional and
+point-prediction measures such as \code{crps}, \code{mae}, and \code{acc}.}
+
+\item{ypred}{Matrix of posterior predictive draws (\code{S} draws × \code{n}
+observations), typically from \code{\link[brms:posterior_predict.brmsfit]{brms::posterior_predict()}}. Required for
+distributional measures such as \code{crps}, \code{rps}, and \code{scrps}.}
+
+\item{mupred}{Matrix of posterior expected values (\code{S} × \code{n}), typically from
+\code{\link[brms:posterior_epred.brmsfit]{brms::posterior_epred()}}. Required for point-prediction measures such as
+\code{mae}, \code{rmse}, \code{r2}, and \code{acc}.}
+
+\item{ylp}{Matrix of pointwise log predictive densities or probabilities
+(\code{S} × \code{n}), typically from \code{\link[brms:log_lik.brmsfit]{brms::log_lik()}}. Required for density-based
+summaries (\code{elpd}, \code{mlpd}, \code{ic}).}
+
+\item{ylp_test}{Matrix of pointwise log predictive densities for holdout
+observations (\code{S} × \code{n_test}), typically from
+\code{brms::log_lik(fit, newdata = test_data)}. Used with \code{ylp} (from the
+training fit) in \code{\link[=test_pred_measure]{test_pred_measure()}} to score genuinely new data.}
+
+\item{measure}{Additional measures beyond the base summaries (\code{elpd} and
+\code{ic}, which are always included). Can be:
+\itemize{
+\item A \strong{character vector} of built-in names; see
+\code{\link[=supported_measures_list]{supported_measures_list()}}.
+\item A \strong{function} with attribute \code{"measure_name"} for one custom measure.
+\item A \strong{list} mixing character scalars (built-in names) and named
+functions (custom measures), e.g. \code{list("rps", my_metric = my_fun)}.
+}
+Custom functions are called with any of \code{y}, \code{ypred}, \code{mupred}, \code{ylp}, and
+\code{log_weights} that appear in their formals, plus arguments from \code{control}.
+They must return a list with  \code{estimates} and \code{pointwise}.}
+
+\item{group_ids}{Optional vector of group identifiers for grouped summaries
+(reserved; not yet implemented).}
+
+\item{control}{Named list of per-measure settings. Each name must match an
+element of \code{measure}; the value is a list of arguments passed to that
+measure's summary function (e.g. \code{list(new_measure = list(add_arg = 10))}).}
+}
+\value{
+An object of class \code{"test_pred_measure"} and \code{"pred_measure"} with
+\code{estimates} and \code{pointwise}. Measure names carry a \verb{_test} suffix (e.g.
+\code{elpd_test}, \code{crps_test}). Attribute \code{dims} reflects the test-set size
+(from \code{ylp_test}), not the training data.
+}
+\description{
+Score predictive performance on \strong{genuinely new (holdout) data} that was
+not used to fit the model. This mirrors the cross-validation goal of
+assessing how well a model predicts unseen observations, but with an
+explicit train/test split rather than LOO or k-fold reweighting.
+
+Supply \code{ylp} from the \strong{training} fit and \code{ylp_test} from log predictive
+densities evaluated on the holdout set (e.g.
+\code{brms::log_lik(fit, newdata = test_data)}). Optional distributional and
+point-prediction measures use observed and predicted values on the test set
+only.
+}
+\details{
+When both \code{ylp} (training) and \code{ylp_test} (holdout) are provided, the
+effective number of parameters (\code{p_test}) compares in-sample and holdout log
+predictive density, analogous to \code{p_loo} for PSIS-LOO.
+}
+\examples{
+\donttest{
+if (requireNamespace("brms", quietly = TRUE)) {
+  data <- lme4::sleepstudy
+  train <- data[1:150, ]
+  test <- data[151:nrow(data), ]
+  fit <- brms::brm(
+    Reaction ~ Days, data = train,
+    refresh = 0, chains = 2, iter = 1000
+  )
+  test_pred_measure(
+    y = test$Reaction,
+    ypred = brms::posterior_predict(fit, newdata = test),
+    mupred = brms::posterior_epred(fit, newdata = test),
+    ylp = brms::log_lik(fit),
+    ylp_test = brms::log_lik(fit, newdata = test),
+    measure = c("rmse", "r2")
+  )
+}
+}
+
+}
+\seealso{
+\code{\link[=insample_pred_measure]{insample_pred_measure()}}, \code{\link[=loo_pred_measure]{loo_pred_measure()}},
+\code{\link[=kfold_pred_measure]{kfold_pred_measure()}}, \code{\link[=pred_measure]{pred_measure()}}, \link{supported_measures_list},
+\code{vignette("pred_measure_tutorial")}
+}
diff --git a/tests/testthat/_snaps/loo_and_waic.md b/tests/testthat/_snaps/loo_and_waic.md
index 018b6d98..c259bb67 100644
--- a/tests/testthat/_snaps/loo_and_waic.md
+++ b/tests/testthat/_snaps/loo_and_waic.md
@@ -1,54 +1,54 @@
 # loo, waic and elpd results haven't changed
 
-    WAoAAAACAAQFAAACAwAAAAMTAAAACgAAAg4AAAAGwFTltnf8GWZACqFzqDCa8kBk5bZ3/Blm
-    QBEipbqh/pI/8m8Dok+RiEAhIqW6of6SAAAEAgAAAAEABAAJAAAAA2RpbQAAAA0AAAACAAAA
+    WAoAAAACAAQDAwACAwAAAAMTAAAACgAAAg4AAAAGwFTltnf8GWhACqFzqDCbAEBk5bZ3/Blo
+    QBEipbqh/pA/8m8Dok+Rg0AhIqW6of6QAAAEAgAAAAEABAAJAAAAA2RpbQAAAA0AAAACAAAA
     AwAAAAIAAAQCAAAAAQAEAAkAAAAIZGltbmFtZXMAAAATAAAAAgAAABAAAAADAAQACQAAAAhl
     bHBkX2xvbwAEAAkAAAAFcF9sb28ABAAJAAAABWxvb2ljAAAAEAAAAAIABAAJAAAACEVzdGlt
     YXRlAAQACQAAAAJTRQAAAP4AAAIOAAAAoMAC+fJnaLHwwAEP7abtCtDAArJiAQQUDsABa6HW
-    xx3swAC19bRGOqbAAOVsk57EhMAHXE6oz8X6wAhOMwo4HzjAAxWJEz7wYMAAvCuQLdoawAEz
-    +EIq/LDAARqnVc5fZMAAuMi8GPlmwAJBKzhjQ9zAAe5YKyVuTsADlXoqYwIGwBIRlu/H12/A
+    xx3swAC19bRGOqbAAOVsk57EhMAHXE6oz8X6wAhOMwo4HzjAAxWJEz7wXsAAvCuQLdoawAEz
+    +EIq/LDAARqnVc5fZMAAuMi8GPlmwAJBKzhjQ9zAAe5YKyVuUsADlXoqYwIGwBIRlu/H127A
     E/WagapcWsACpJaoNucYwBLYLgxdc6fAA7rSrp1oIMAEf9XEej+SwAa4oZCQGv7ABf5oQger
-    6sADXWMppHVuwAEJEltQ9nLAAOaxVhD86sAB7SPP8uDQwAnDIjSfxULABAp5a+6r9sAFJHaV
-    7SH8wAEph84QBEw/eQDDaV43ET9xp+BQBCmTP3qfwKytXZ0/ctyOXeTxzz9xqi9N9aR4P3IZ
+    6sADXWMppHVuwAEJEltQ9nLAAOaxVhD86sAB7SPP8uDawAnDIjSfxULABAp5a+6r9sAFJHaV
+    7SH8wAEph84QBFA/eQDDaV43ET9xp+BQBCmTP3qfwKytXZ0/ctyOXeTxzz9xqi9N9aR4P3IZ
     d1RCmrs/hDyfFGZAlj+GNSCYztY2P3e6iLhToUY/cX4fUwwY4j9yNONK++tcP3MW2+Teir8/
     chrQtYH1VT94S/hkJOXvP4KGRiL5AqY/jR2tM+V2kj+xFwrjXj1kP6T55+1SS6g/h0J5DiSH
     Lz+l12L3soNYP36Eg9WZBwg/fE5degjr7j+CgjOtLdigP4MAE38XAjM/efuEjSOY2z938dyR
     QJQSP3UOviSS5E0/hJj7+xYimz+KmFtUHEseP3x0d1lSXvQ/fyjSq08iSD9x5Q1KokZ4P6BC
     0qXNAwA/kYGquy8AAD+imuIOTvsAP5NUpOQ/eAA/kQtugOBEAD+RMeghdl4AP7Jn3QxtCsA/
-    tE9QFCi4gD+bSeGjWlIAP5FVoLo7XwA/kbVn03S8AD+Ui/4kS28AP5F+F0mQ2wA/mP14d7y0
-    AD+pRBELXxKAP7xVzQoMgYA/6p0tx9H/CD/kuDZpjnkQP7DPsKVRKQA/5Y1g8Rh9sD+m1FBE
+    tE9QFCi4gD+bSeGjWlEAP5FVoLo7XwA/kbVn03S8AD+Ui/4kS28AP5F+F0mQ2wA/mP14d7y0
+    AD+pRBELXxOAP7xVzQoMgYA/6p0tx9H/AD/kuDZpjnkQP7DPsKVRKQA/5Y1g8Rh9sD+m1FBE
     ZMYAP6LmZaqjsIA/rvRkyt/1gD+wN1KCt4TAP6FZe4TNQ4A/lxkATIN0AD+UCq5AO8gAP6ku
-    qlvkC4A/u837aXIIAD+joD1aCLQAP6afX0AzCAA/khQ6CvIGAEAS+fJnaLHwQBEP7abtCtBA
+    qlvkDgA/u837aXIIAD+joD1aCLQAP6afX0AzCAA/khQ6CvIIAEAS+fJnaLHwQBEP7abtCtBA
     ErJiAQQUDkARa6HWxx3sQBC19bRGOqZAEOVsk57EhEAXXE6oz8X6QBhOMwo4HzhAExWJEz7w
-    YEAQvCuQLdoaQBEz+EIq/LBAERqnVc5fZEAQuMi8GPlmQBJBKzhjQ9xAEe5YKyVuTkATlXoq
-    YwIGQCIRlu/H129AI/WagapcWkASpJaoNucYQCLYLgxdc6dAE7rSrp1oIEAUf9XEej+SQBa4
-    oZCQGv5AFf5oQger6kATXWMppHVuQBEJEltQ9nJAEOaxVhD86kAR7SPP8uDQQBnDIjSfxUJA
-    FAp5a+6r9kAVJHaV7SH8QBEph84QBEw/qQv9LeIWyb+uWWyqUA9QP7GREVrajU6/qkhYWDHd
-    rL+9tsdoV7uMv7fBKJv14eU/t7YcNUPKsT/aTE3zYuljP6G3mVtQiba/vp1jR9XXbb+gLW87
-    Hw2mv7FTEHhMvSa/sxFLk3EUuz+5A6XuSljYP9idj04idy4/0bL+IIz1RT/e15NGvsbPP9VS
-    jdQVlsE/2LoPMwLTfD/U+wQrF7IQP6aKY3Lc//Q/zQFaAA69qT+4Rrgot4/8P8jynnS82Ey/
-    nP2U9Wg1Vj+9sDYbFcUYP4hK+zwAbv4/1HzUh2soqj/D6wM3SdjbP7XPnkuMip4/xNfwCK9V
-    Jj+1oylZVifyAAAEAgAAAf8AAAANAAAAAgAAACAAAAAFAAAEAgAAAv8AAAATAAAAAgAAAP4A
+    XkAQvCuQLdoaQBEz+EIq/LBAERqnVc5fZEAQuMi8GPlmQBJBKzhjQ9xAEe5YKyVuUkATlXoq
+    YwIGQCIRlu/H125AI/WagapcWkASpJaoNucYQCLYLgxdc6dAE7rSrp1oIEAUf9XEej+SQBa4
+    oZCQGv5AFf5oQger6kATXWMppHVuQBEJEltQ9nJAEOaxVhD86kAR7SPP8uDaQBnDIjSfxUJA
+    FAp5a+6r9kAVJHaV7SH8QBEph84QBFA/qQv9LeIWWL+uWWyqUBB0P7GREVrajQO/qkhYWDHd
+    p7+9tsdoV7yPv7fBKJv14XY/t7YcNUPKlD/aTE3zYulDP6G3mVtQig2/vp1jR9XX0b+gLW87
+    Hw2jv7FTEHhMvT2/sxFLk3EU8D+5A6XuSlkgP9idj04idtI/0bL+IIz1NT/e15NGvsZcP9VS
+    jdQVlrw/2LoPMwLTbj/U+wQrF7IVP6aKY3LdABo/zQFaAA69lT+4Rrgot4+MP8jynnS82GS/
+    nP2U9Wg06j+9sDYbFcUWP4hK+zwAbhI/1HzUh2soVD/D6wM3SdkcP7XPnkuMis0/xNfwCK9U
+    2T+1oylZVifvAAAEAgAAAf8AAAANAAAAAgAAACAAAAAFAAAEAgAAAv8AAAATAAAAAgAAAP4A
     AAAQAAAABQAEAAkAAAAIZWxwZF9sb28ABAAJAAAADW1jc2VfZWxwZF9sb28ABAAJAAAABXBf
     bG9vAAQACQAAAAVsb29pYwAEAAkAAAASaW5mbHVlbmNlX3BhcmV0b19rAAAA/gAAAhMAAAAD
-    AAAADgAAACA/qQv9LeIWyb+uWWyqUA9QP7GREVrajU6/qkhYWDHdrL+9tsdoV7uMv7fBKJv1
-    4eU/t7YcNUPKsT/aTE3zYuljP6G3mVtQiba/vp1jR9XXbb+gLW87Hw2mv7FTEHhMvSa/sxFL
-    k3EUuz+5A6XuSljYP9idj04idy4/0bL+IIz1RT/e15NGvsbPP9VSjdQVlsE/2LoPMwLTfD/U
-    +wQrF7IQP6aKY3Lc//Q/zQFaAA69qT+4Rrgot4/8P8jynnS82Ey/nP2U9Wg1Vj+9sDYbFcUY
-    P4hK+zwAbv4/1HzUh2soqj/D6wM3SdjbP7XPnkuMip4/xNfwCK9VJj+1oylZVifyAAAADgAA
-    ACBAjGtR4vOUSECNSIXMZdptQI1OzHDwC35AjCl3nEfMhECMV+BQfP7MQItWfnwINa1Aipoa
-    +DhXG0CJ4jnKlWa8QIp1d1J1ZG1AjUahmmkFkkCL5UfgNu4DQI2abt7PHcNAi6rgACII5UCH
-    F8533XLIQIiixsC750JAiRWGP5WHBUBkAZfpVM1eQHdzaroEDUlAhbbxJuuvP0B2G3GedgHa
+    AAAADgAAACA/qQv9LeIWWL+uWWyqUBB0P7GREVrajQO/qkhYWDHdp7+9tsdoV7yPv7fBKJv1
+    4XY/t7YcNUPKlD/aTE3zYulDP6G3mVtQig2/vp1jR9XX0b+gLW87Hw2jv7FTEHhMvT2/sxFL
+    k3EU8D+5A6XuSlkgP9idj04idtI/0bL+IIz1NT/e15NGvsZcP9VSjdQVlrw/2LoPMwLTbj/U
+    +wQrF7IVP6aKY3LdABo/zQFaAA69lT+4Rrgot4+MP8jynnS82GS/nP2U9Wg06j+9sDYbFcUW
+    P4hK+zwAbhI/1HzUh2soVD/D6wM3SdkcP7XPnkuMis0/xNfwCK9U2T+1oylZVifvAAAADgAA
+    ACBAjGtR4vOUSECNSIXMZdptQI1OzHDwC35AjCl3nEfMgkCMV+BQfP7EQItWfnwINa1Aipoa
+    +DhXG0CJ4jnKlWa8QIp1d1J1ZHBAjUahmmkFkkCL5UfgNu4DQI2abt7PHcNAi6rgACII5UCH
+    F8533XLHQIiixsC750RAiRWGP5WHBUBkAZfpVM1YQHdzaroEDU5AhbbxJuuvP0B2G3GedgHa
     QIurDxL70Z1AimK5GPqZQ0CKb2AkeCpMQIqX6UZQkVlAjPSaSuWUT0CGV1Ue/HaIQIgmSdq8
-    SDtAhKKy2922NECIf2FmaiksQIrvm5gPU99AioGA+OUNHUCNhdwLjHhJAAAADgAAACA/7iIj
-    iprtpD/uhgpFxKUzP+9C2+HteTo/7WeuRVdLxz/thpx9PZyZP+x8NVYA5gc/7bgj+feDuD/t
-    VJY/ijqGP+vk6tOVjqc/7oFMRnLCAz/tFUhNscONP+71S5Zdfro/7NXCRLX3pz/oSat+OPKu
-    P+rzPc6+kK4/7rBYQ1wZPj/h+KYWRg06P/BP+OJ7Frg/6GyNUDLmvD/v8/5XaLpuP+3Om11x
+    SDtAhKKy2922MkCIf2FmaiksQIrvm5gPU99AioGA+OUNHUCNhdwLjHhJAAAADgAAACA/7iIj
+    iprtpD/uhgpFxKUzP+9C2+HteTo/7WeuRVdLxz/thpx9PZyRP+x8NVYA5gc/7bgj+feDuD/t
+    VJY/ijqGP+vk6tOVjqo/7oFMRnLCAz/tFUhNscONP+71S5Zdfro/7NXCRLX3pz/oSat+OPKu
+    P+rzPc6+kK4/7rBYQ1wZPj/h+KYWRg06P/BP+OJ7Frs/6GyNUDLmvD/v8/5XaLpuP+3Om11x
     ZMc/7CaGH4VipT/tF1NNe4n9P+1mF6TWIdc/7s96p0MNpT/ndID3UIhJP+lAQASVPCI/5qSP
-    laOkhD/s9t8Xe5GFP+zJwNIA1oY/7It81CYJlj/uynC4minDAAAEAgAAAAEABAAJAAAABW5h
+    laOkgj/s9t8Xe5GFP+zJwNIA1oY/7It81CYJlj/uynC4minDAAAEAgAAAAEABAAJAAAABW5h
     bWVzAAAAEAAAAAMABAAJAAAACHBhcmV0b19rAAQACQAAAAVuX2VmZgAEAAkAAAAFcl9lZmYA
-    AAD+AAAA/gAAAA4AAAABwFTltnf8GWYAAAAOAAAAAUAKoXOoMJryAAAADgAAAAFAZOW2d/wZ
-    ZgAAAA4AAAABQBEipbqh/pIAAAAOAAAAAT/ybwOiT5GIAAAADgAAAAFAISKluqH+kgAABAIA
+    AAD+AAAA/gAAAA4AAAABwFTltnf8GWgAAAAOAAAAAUAKoXOoMJsAAAAADgAAAAFAZOW2d/wZ
+    aAAAAA4AAAABQBEipbqh/pAAAAAOAAAAAT/ybwOiT5GDAAAADgAAAAFAISKluqH+kAAABAIA
     AAP/AAAAEAAAAAoABAAJAAAACWVzdGltYXRlcwAEAAkAAAAJcG9pbnR3aXNlAAQACQAAAAtk
     aWFnbm9zdGljcwAEAAkAAAALcHNpc19vYmplY3QABAAJAAAACGVscGRfbG9vAAQACQAAAAVw
     X2xvbwAEAAkAAAAFbG9vaWMABAAJAAAAC3NlX2VscGRfbG9vAAQACQAAAAhzZV9wX2xvbwAE
@@ -58,7 +58,7 @@
 
 ---
 
-    WAoAAAACAAQFAAACAwAAAAMTAAAACAAAAg4AAAAGwFTh8N3JQlhACijAYdW5U0Bk4fDdyUJY
+    WAoAAAACAAQDAwACAwAAAAMTAAAACAAAAg4AAAAGwFTh8N3JQlhACijAYdW5U0Bk4fDdyUJY
     QBEIPbMRcF8/8f1l7HLzXkAhCD2zEXBfAAAEAgAAAAEABAAJAAAAA2RpbQAAAA0AAAACAAAA
     AwAAAAIAAAQCAAAAAQAEAAkAAAAIZGltbmFtZXMAAAATAAAAAgAAABAAAAADAAQACQAAAAll
     bHBkX3dhaWMABAAJAAAABnBfd2FpYwAEAAkAAAAEd2FpYwAAABAAAAACAAQACQAAAAhFc3Rp
@@ -88,7 +88,7 @@
 
 ---
 
-    WAoAAAACAAQFAAACAwAAAAMTAAAAAgAAAg4AAAAEwFQQqtq6lJBAZBCq2rqUkEAJ4d/NRDUI
+    WAoAAAACAAQDAwACAwAAAAMTAAAAAgAAAg4AAAAEwFQQqtq6lJBAZBCq2rqUkEAJ4d/NRDUI
     QBnh381ENQgAAAQCAAAAAQAEAAkAAAADZGltAAAADQAAAAIAAAACAAAAAgAABAIAAAABAAQA
     CQAAAAhkaW1uYW1lcwAAABMAAAACAAAAEAAAAAIABAAJAAAABGVscGQABAAJAAAAAmljAAAA
     EAAAAAIABAAJAAAACEVzdGltYXRlAAQACQAAAAJTRQAAAP4AAAIOAAAAQMACuOcc0X3kwADs
diff --git a/tests/testthat/_snaps/print.md b/tests/testthat/_snaps/print.md
new file mode 100644
index 00000000..2c3cc8f5
--- /dev/null
+++ b/tests/testthat/_snaps/print.md
@@ -0,0 +1,288 @@
+# loo_pred_measure print snapshots
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    r2_loo        0.2    0.8
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    r2_loo        0.2    0.8
+    rmse_loo     44.6    4.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    r2_loo        0.2    0.8
+    rmse_loo     44.6    4.6
+    mse_loo    1991.3  413.8
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    r2_loo        0.2    0.8
+    rmse_loo     44.6    4.6
+    mse_loo    1991.3  413.8
+    mae_loo      24.6    2.3
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    r2_loo        0.2    0.8
+    rmse_loo     44.6    4.6
+    mse_loo    1991.3  413.8
+    mae_loo      24.6    2.3
+    rps_loo      22.0    2.3
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+              Estimate     SE
+    elpd_loo   -5440.7  690.2
+    p_loo        225.4   45.5
+    looic      10881.4 1380.3
+    r2_loo         0.2    0.8
+    rmse_loo      44.6    4.6
+    mse_loo     1991.3  413.8
+    mae_loo       24.6    2.3
+    rps_loo       22.0    2.3
+    crps_loo     21.3    2.2
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+              Estimate     SE
+    elpd_loo   -5440.7  690.2
+    p_loo        225.4   45.5
+    looic      10881.4 1380.3
+    r2_loo         0.2    0.8
+    rmse_loo      44.6    4.6
+    mse_loo     1991.3  413.8
+    mae_loo       24.6    2.3
+    rps_loo       22.0    2.3
+    crps_loo     21.3    2.2
+    srps_loo      -5.6    0.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+               Estimate     SE
+    elpd_loo    -5440.7  690.2
+    p_loo         225.4   45.5
+    looic       10881.4 1380.3
+    r2_loo          0.2    0.8
+    rmse_loo       44.6    4.6
+    mse_loo      1991.3  413.8
+    mae_loo        24.6    2.3
+    rps_loo        22.0    2.3
+    crps_loo      21.3    2.2
+    srps_loo       -5.6    0.6
+    scrps_loo     -4.5    0.3
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+               Estimate     SE
+    elpd_loo    -5440.7  690.2
+    p_loo         225.4   45.5
+    looic       10881.4 1380.3
+    r2_loo          0.2    0.8
+    rmse_loo       44.6    4.6
+    mse_loo      1991.3  413.8
+    mae_loo        24.6    2.3
+    rps_loo        22.0    2.3
+    crps_loo      21.3    2.2
+    srps_loo       -5.6    0.6
+    scrps_loo     -4.5    0.3
+    mlpd_loo      -20.8    2.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    mlpd_loo    -20.8    2.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    mlpd_loo    -20.8    2.6
+    mae_loo      24.6    2.3
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    mlpd_loo    -20.8    2.6
+    mae_loo      24.6    2.3
+    srps_loo     -5.6    0.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    mlpd_loo    -20.8    2.6
+    mae_loo      24.6    2.3
+    srps_loo     -5.6    0.6
+    r2_loo        0.2    0.8
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+             Estimate     SE
+    elpd_loo  -5440.7  690.2
+    p_loo       225.4   45.5
+    looic     10881.4 1380.3
+    mlpd_loo    -20.8    2.6
+    mae_loo      24.6    2.3
+    srps_loo     -5.6    0.6
+    r2_loo        0.2    0.8
+    rmse_loo     44.6    4.6
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+              Estimate     SE
+    elpd_loo   -5440.7  690.2
+    p_loo        225.4   45.5
+    looic      10881.4 1380.3
+    mlpd_loo     -20.8    2.6
+    mae_loo       24.6    2.3
+    srps_loo      -5.6    0.6
+    r2_loo         0.2    0.8
+    rmse_loo      44.6    4.6
+    crps_loo     21.3    2.2
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+              Estimate     SE
+    elpd_loo   -5440.7  690.2
+    p_loo        225.4   45.5
+    looic      10881.4 1380.3
+    mlpd_loo     -20.8    2.6
+    mae_loo       24.6    2.3
+    srps_loo      -5.6    0.6
+    r2_loo         0.2    0.8
+    rmse_loo      44.6    4.6
+    crps_loo     21.3    2.2
+    mse_loo     1991.3  413.8
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+               Estimate     SE
+    elpd_loo    -5440.7  690.2
+    p_loo         225.4   45.5
+    looic       10881.4 1380.3
+    mlpd_loo      -20.8    2.6
+    mae_loo        24.6    2.3
+    srps_loo       -5.6    0.6
+    r2_loo          0.2    0.8
+    rmse_loo       44.6    4.6
+    crps_loo      21.3    2.2
+    mse_loo      1991.3  413.8
+    scrps_loo     -4.5    0.3
+
+---
+
+    
+    Computed from 400 posterior draws and 262 observations.
+    Data source: loo
+    
+               Estimate     SE
+    elpd_loo    -5440.7  690.2
+    p_loo         225.4   45.5
+    looic       10881.4 1380.3
+    mlpd_loo      -20.8    2.6
+    mae_loo        24.6    2.3
+    srps_loo       -5.6    0.6
+    r2_loo          0.2    0.8
+    rmse_loo       44.6    4.6
+    crps_loo      21.3    2.2
+    mse_loo      1991.3  413.8
+    scrps_loo     -4.5    0.3
+    rps_loo        22.0    2.3
+
diff --git a/tests/testthat/data-for-tests/generate-data.R b/tests/testthat/data-for-tests/generate-data.R
new file mode 100644
index 00000000..16fd3e0b
--- /dev/null
+++ b/tests/testthat/data-for-tests/generate-data.R
@@ -0,0 +1,242 @@
+# this file includes a data-generation pipeline for test data specifically
+# used in the testthat tests for pred_measure* files and for
+# vignettes in articles-online-only
+library(rstanarm)
+SEED <- 42
+
+postprocess_res <- function(model, fit, chains = 2, draws = 200) {
+  ypred <- brms::posterior_predict(fit)
+  mupred <- brms::posterior_epred(fit)
+  ylp <- log_lik(fit)
+  log_ratios <- -1 * ylp
+  r_eff <- relative_eff(exp(-log_ratios), chain_id = rep(1:chains, each = draws))
+  psis_object <- psis(log_ratios, r_eff = r_eff, cores = 2)
+  kfold2 <- brms::kfold(fit, save_fits = FALSE)
+  if (model %in% c("roaches", "binary", "binomial", "sleep")) {
+    kfold <- brms::kfold(fit, save_fits = TRUE)
+    mupred_kfold <- brms::kfold_predict(kfold, method = "fitted")$yrep
+    ypred_kfold <- brms::kfold_predict(kfold, method = "predict")$yrep
+    loo <- brms::loo(fit, save_psis = TRUE)
+    mupred_loo <- loo::E_loo(ypred, psis_object, type = "mean")$value
+    predperf <- insample_pred_measure(y = fit$data$y, mupred = mupred, 
+      measure = "r2", ylp = ylp)
+  }
+  
+  if (model == "roaches") {
+    list(
+      y = fit$data$y,
+      ypred = ypred,
+      mupred = mupred,
+      ylp = ylp,
+      log_weights = psis_object$log_weights,
+      kfold = kfold2,
+      loo = loo,
+      predperf = predperf
+    )
+  } else if (model == "binomial") {
+    list(
+      y = fit$data$y,
+      ypred = ypred,
+      log_weights = psis_object$log_weights,
+      ypred_kfold = ypred_kfold
+    )
+  } else if (model == "binary") {
+    list(
+      y = fit$data$y,
+      ypred = ypred,
+      log_weights = psis_object$log_weights,
+      ypred_kfold = ypred_kfold
+    )
+  } else if (model == "categorical") {
+    list(
+      y = fit$data$y,
+      mupred = mupred,
+      log_weights = psis_object$log_weights
+    )
+  } else if (model == "sleep") {
+    list(
+      y = fit$data$y,
+      ypred = ypred,
+      log_weights = psis_object$log_weights,
+      ypred_kfold = ypred_kfold,
+      mupred = mupred,
+      mupred_loo = mupred_loo,
+      mupred_kfold = mupred_kfold
+    )
+  }
+}
+
+get_binary_res <- function() {
+  set.seed(SEED)
+  df_binary <- data.frame(y = rbinom(50, 1, 0.3))
+
+  fit_binary <- brms::brm(formula = "y ~ 1",
+    data = df_binary,
+    family = bernoulli,
+    chains = 2,
+    iter = 400,
+    seed = SEED,
+    refresh = 0
+  )
+  list(
+    fit = fit_binary,
+    res = postprocess_res("binary", fit_binary)
+  )
+}
+
+get_roaches_res <- function() {
+  data(roaches, package = "rstanarm")
+  roaches$sqrt_roach1 <- sqrt(roaches$roach1)
+  
+  fit_roaches <- brm(
+    y ~ sqrt_roach1 + treatment + senior + offset(log(exposure2)),
+    data = roaches,
+    family = poisson,
+    prior = prior(normal(0, 1), class = b),
+    chains = 2,
+    iter = 400,
+    refresh = 0,
+    seed = SEED
+  )
+  list(
+    fit = fit_roaches,
+    res = postprocess_res("roaches", fit_roaches)
+  )
+}
+
+get_sleep_test_train_res <- function() {
+  # specifically for testing test_pred_measure
+  data("sleepstudy")
+  conditions <- make_conditions(sleepstudy, "Subject", incl_vars = FALSE)
+  sleepstudy <- sleepstudy |>
+    filter(Days >= 2) |>
+    mutate(
+      Days = Days - 2,
+      y = Reaction
+    )
+  
+  test_subjects <- sample(unique(sleepstudy$Subject), size = 5)
+  train_data <- sleepstudy |>
+    dplyr::filter(!Subject %in% test_subjects)
+  test_data <- sleepstudy |>
+    dplyr::filter(Subject %in% test_subjects)
+
+  prior_lin_base <- prior(normal(200, 100), class = b, coef = "Intercept") +
+    prior(normal(0, 20), class = b, coef = "Days") +
+    prior(exponential(0.02), class = sigma)
+
+  fit_sleep_train <- brm(
+    y ~ 0 + Intercept + Days, 
+    data = train_data,
+    family = gaussian(),
+    prior = prior_lin_base,
+    chains = 2, 
+    iter = 400,
+    seed = SEED
+  )
+  
+  list(
+    fit = fit_sleep_train,
+    res = list(
+      y_test = test_data$y,
+      ypred_test = brms::posterior_predict(fit_sleep_train, newdata = test_data),
+      mupred_test = brms::posterior_epred(fit_sleep_train, newdata = test_data),
+      ylp_test = brms::log_lik(fit_sleep_train, newdata = test_data),
+      ylp_train = brms::log_lik(fit_sleep_train)
+    )
+  )
+}
+
+get_sleep_res <- function() {
+  data("sleepstudy")
+  conditions <- make_conditions(sleepstudy, "Subject", incl_vars = FALSE)
+  sleepstudy <- sleepstudy |>
+    dplyr::filter(Days >= 2) |>
+    dplyr::mutate(Days = Days - 2, y = Reaction)
+
+  prior_lin_base <- prior(normal(200, 100), class = b, coef = "Intercept") +
+    prior(normal(0, 20), class = b, coef = "Days") +
+    prior(exponential(0.02), class = sigma)
+
+  fit_sleepstudy <- brm(
+    y ~ 0 + Intercept + Days, 
+    data = sleepstudy,
+    family = gaussian(),
+    prior = prior_lin_base,
+    chains = 2,
+    iter = 400,
+    refresh = 0,
+    seed = SEED
+  )
+  list(
+    fit = fit_sleepstudy,
+    res = postprocess_res("sleep", fit_sleepstudy)
+  )
+}
+
+get_penguins_res <- function() {
+  data("penguins", package = "palmerpenguins")
+  penguins <- subset(penguins, complete.cases(penguins))
+  penguins$y <- penguins$species
+  
+  fit <- brm(
+    y ~ bill_length_mm + bill_depth_mm,
+    data = penguins,
+    family = categorical(),
+    chains = 2,
+    iter = 400,
+    cores = 2,
+    seed = SEED
+  )
+  list(
+    fit = fit,
+    res = postprocess_res("categorical", fit)
+  )
+}
+
+get_binomial_res <- function() {
+  set.seed(SEED)
+  df <- data.frame(
+    y = rbinom(50, 10, 0.3), 
+    n = 10
+  )
+
+  fit <- brms::brm(formula = "y | trials(n) ~ 1",
+    data = df,
+    family = binomial,
+    chains = 2,
+    iter = 400,
+    seed = SEED,
+    refresh = 0
+  )
+  list(
+    fit = fit,
+    res = postprocess_res("binomial", fit)
+  )
+}
+
+full_roaches <- get_roaches_res()     
+full_binary <- get_binary_res()
+full_penguins <- get_penguins_res()
+full_binomial <- get_binomial_res() 
+full_sleep <- get_sleep_res() 
+full_sleep_test <- get_sleep_test_train_res()
+
+# for tests --------------------------------------------------
+
+test_path <- "tests/testthat/data-for-tests/"
+saveRDS(full_roaches$res, paste0(test_path, "res_roaches.Rds"))
+saveRDS(full_binary$res, paste0(test_path, "res_binary.Rds"))
+saveRDS(full_penguins$res, paste0(test_path, "res_penguins.Rds"))
+saveRDS(full_binomial$res, paste0(test_path, "res_binomial.Rds"))
+saveRDS(full_sleep$res, paste0(test_path, "res_sleep.Rds"))
+saveRDS(full_sleep_test$res, paste0(test_path, "res_sleep_test.Rds"))
+
+# for vignettes --------------------------------------------------
+
+vignette_path <- "vignettes/articles-online-only/data-for-vignettes/"
+saveRDS(full_roaches$fit, paste0(vignette_path, "fit_roaches.Rds"))
+saveRDS(full_binary$fit, paste0(vignette_path, "fit_binary.Rds"))
+saveRDS(full_penguins$fit, paste0(vignette_path, "fit_penguins.Rds"))
+saveRDS(full_binomial$fit, paste0(vignette_path, "fit_binomial.Rds"))
+saveRDS(full_sleep$fit, paste0(vignette_path, "fit_sleep.Rds"))
\ No newline at end of file
diff --git a/tests/testthat/data-for-tests/res_binary.Rds b/tests/testthat/data-for-tests/res_binary.Rds
new file mode 100644
index 00000000..0f55a5ed
Binary files /dev/null and b/tests/testthat/data-for-tests/res_binary.Rds differ
diff --git a/tests/testthat/data-for-tests/res_binomial.Rds b/tests/testthat/data-for-tests/res_binomial.Rds
new file mode 100644
index 00000000..d90c6c81
Binary files /dev/null and b/tests/testthat/data-for-tests/res_binomial.Rds differ
diff --git a/tests/testthat/data-for-tests/res_penguins.Rds b/tests/testthat/data-for-tests/res_penguins.Rds
new file mode 100644
index 00000000..d0b8e299
Binary files /dev/null and b/tests/testthat/data-for-tests/res_penguins.Rds differ
diff --git a/tests/testthat/data-for-tests/res_roaches.Rds b/tests/testthat/data-for-tests/res_roaches.Rds
new file mode 100644
index 00000000..869fab16
Binary files /dev/null and b/tests/testthat/data-for-tests/res_roaches.Rds differ
diff --git a/tests/testthat/data-for-tests/res_sleep.Rds b/tests/testthat/data-for-tests/res_sleep.Rds
new file mode 100644
index 00000000..b987ee69
Binary files /dev/null and b/tests/testthat/data-for-tests/res_sleep.Rds differ
diff --git a/tests/testthat/data-for-tests/res_sleep_test.Rds b/tests/testthat/data-for-tests/res_sleep_test.Rds
new file mode 100644
index 00000000..55fd89a5
Binary files /dev/null and b/tests/testthat/data-for-tests/res_sleep_test.Rds differ
diff --git a/tests/testthat/test_crps.R b/tests/testthat/test_crps.R
deleted file mode 100644
index ce523476..00000000
--- a/tests/testthat/test_crps.R
+++ /dev/null
@@ -1,58 +0,0 @@
-set.seed(123456789)
-n <- 10
-S <- 100
-y <- rnorm(n)
-x1 <- matrix(rnorm(n * S), nrow = S)
-x2 <- matrix(rnorm(n * S), nrow = S)
-ll <- matrix(rnorm(n * S) * 0.1 - 1, nrow = S)
-
-with_seed <- function(seed, code) {
-  code <- substitute(code)
-  orig.seed <- .Random.seed
-  on.exit(.Random.seed <<- orig.seed)
-  set.seed(seed)
-  eval.parent(code)
-}
-
-test_that("crps computation is correct", {
-  expect_equal(.crps_fun(2.0, 1.0), 0.0)
-  expect_equal(.crps_fun(1.0, 2.0), -1.5)
-  expect_equal(.crps_fun(pi, pi^2), 0.5 * pi - pi^2)
-
-  expect_equal(.crps_fun(1.0, 0.0, scale = TRUE), 0.0)
-  expect_equal(.crps_fun(1.0, 2.0, scale = TRUE), -2.0)
-  expect_equal(.crps_fun(pi, pi^2, scale = TRUE), -pi^2/pi - 0.5 * log(pi))
-})
-
-test_that("crps matches snapshots", {
-  expect_snapshot_value(with_seed(1, crps(x1, x2, y)), style = "serialize")
-  expect_snapshot_value(with_seed(1, scrps(x1, x2, y)), style = "serialize")
-  expect_snapshot_value(with_seed(1, loo_crps(x1, x2, y, ll)), style = "serialize")
-  expect_snapshot_value(with_seed(1, loo_scrps(x1, x2, y, ll)), style = "serialize")
-})
-
-test_that("input validation throws correct errors", {
-  expect_error(validate_crps_input(as.character(x1), x2, y),
-               "is.numeric(x) is not TRUE",
-               fixed = TRUE)
-  expect_error(validate_crps_input(x1, as.character(x2), y),
-               "is.numeric(x2) is not TRUE",
-               fixed = TRUE)
-  expect_error(validate_crps_input(x1, x2, c('a', 'b')),
-               "is.numeric(y) is not TRUE",
-               fixed = TRUE)
-  expect_error(validate_crps_input(x1, t(x2), y),
-               "identical(dim(x), dim(x2)) is not TRUE",
-               fixed = TRUE)
-  expect_error(validate_crps_input(x1, x2, c(1, 2)),
-               "ncol(x) == length(y) is not TRUE",
-               fixed = TRUE)
-  expect_error(validate_crps_input(x1, x2, y, t(ll)),
-               "ifelse(is.null(log_lik), TRUE, identical(dim(log_lik), dim(x))) is not TRUE",
-               fixed = TRUE)
-})
-
-test_that("methods for single data point don't error", {
-  expect_silent(crps(x1[,1], x2[,1], y[1]))
-  expect_silent(scrps(x1[,1], x2[,1], y[1]))
-})
diff --git a/tests/testthat/test_loo_and_waic.R b/tests/testthat/test_loo_and_waic.R
index 5cdc823e..232f74c6 100644
--- a/tests/testthat/test_loo_and_waic.R
+++ b/tests/testthat/test_loo_and_waic.R
@@ -10,7 +10,7 @@ r_eff_mat <- relative_eff(exp(LLmat), chain_id = chain_id)
 
 loo1 <- suppressWarnings(loo(LLarr, r_eff = r_eff_arr))
 waic1 <- suppressWarnings(waic(LLarr))
-elpd1 <- suppressWarnings(elpd(LLarr))
+elpd1 <- suppressWarnings(elpd(llarray_to_matrix(LLarr)))
 
 test_that("using loo.cores is deprecated", {
   options(mc.cores = NULL)
@@ -135,8 +135,8 @@ test_that("waic.array and waic.matrix give same result", {
 })
 
 test_that("elpd.array and elpd.matrix give same result", {
-  elpd2 <- suppressWarnings(elpd(LLmat))
-  expect_identical(elpd1, elpd2)
+  elpd <- suppressWarnings(elpd(LLmat))
+  expect_identical(elpd1, elpd)
 })
 
 test_that("loo, waic, and elpd error with vector input", {
diff --git a/tests/testthat/test_measures.R b/tests/testthat/test_measures.R
new file mode 100644
index 00000000..36406dd3
--- /dev/null
+++ b/tests/testthat/test_measures.R
@@ -0,0 +1,307 @@
+# load test data --------------------------------------
+res_roaches <- readRDS("data-for-tests/res_roaches.Rds")
+res_sleep <- readRDS("data-for-tests/res_sleep.Rds")
+res_binom <- readRDS("data-for-tests/res_binomial.Rds")
+res_binary <- readRDS("data-for-tests/res_binary.Rds")
+res_cat <- readRDS("data-for-tests/res_penguins.Rds")
+
+# ptw_log_pred_density ------------------------
+testthat::test_that("ptw_log_pred_density() works as expected", {
+  res <- ptw_log_pred_density(ylp = res_roaches$ylp, psis_log_weights = NULL)
+
+  expect_equal(length(res), dim(res_roaches$ylp)[2])
+  expect_equal(res, matrixStats::colLogSumExps(res_roaches$ylp) - log(dim(res_roaches$ylp)[1]))
+})
+
+testthat::test_that("ptw_log_pred_density() with psis_log_weights works as expected", {
+  norm_log_weights <- .normalize_log_weights(res_roaches$log_weights)
+  res <- ptw_log_pred_density(
+    ylp = res_roaches$ylp,
+    psis_log_weights = norm_log_weights
+  )
+
+  expect_equal(length(res), dim(res_roaches$ylp)[2])
+  expect_equal(res, matrixStats::colLogSumExps(res_roaches$ylp + norm_log_weights)
+  )
+})
+
+testthat::test_that("ptw_log_pred_density() returns error when weights are not normalized", {
+  expect_error(
+    ptw_log_pred_density(
+      ylp = res_roaches$ylp,
+      psis_log_weights = res_roaches$log_weights
+    ),
+    regexp = "Range of current column sums"
+  )
+})
+
+# elpd() -----------------------------------
+
+testthat::test_that("elpd() works as expected", {
+  res <- elpd(ylp = res_roaches$ylp, log_weights = NULL)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates[1]), 1)
+  expect_equal(length(res$estimates[2]), 1)
+  expect_equal(length(res$pointwise), dim(res_roaches$ylp)[2])
+})
+
+testthat::test_that("elpd() with unnormalized log-weights works as expected", {
+  log_weights <- res_roaches$log_weights
+  res <- elpd(ylp = res_roaches$ylp, log_weights = log_weights)
+
+  expect_false(all(.normalize_log_weights(log_weights) == log_weights))
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates[1]), 1)
+  expect_equal(length(res$estimates[2]), 1)
+  expect_equal(length(res$pointwise), dim(res_roaches$ylp)[2])
+})
+
+testthat::test_that("elpd() with normalized log-weights works as expected", {
+  res <- elpd(ylp = res_roaches$ylp, log_weights = res_roaches$predperf_loo$log_weights)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates[1]), 1)
+  expect_equal(length(res$estimates[2]), 1)
+  expect_equal(length(res$pointwise), dim(res_roaches$ylp)[2])
+})
+
+# mlpd() -----------------------------------
+
+testthat::test_that("mlpd() works as expected", {
+  res <- mlpd(ylp = res_roaches$ylp, log_weights = NULL)
+  res_elpd <- elpd(ylp = res_roaches$ylp, log_weights = NULL)
+  n_obs <- dim(res_roaches$ylp)[2]
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates[1]), 1)
+  expect_equal(length(res$estimates[2]), 1)
+  expect_equal(length(res$pointwise), n_obs)
+  expect_equal(res$estimates, res_elpd$estimates / n_obs)
+  expect_equal(res$pointwise, res_elpd$pointwise)
+})
+
+# rps() -------------------------------------
+
+testthat::test_that("rps() with ordered categorial data works as expected", {
+  res <- rps(
+    y = res_binom$y,
+    ypred = res_binom$ypred
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_binom$y))
+})
+
+testthat::test_that("rps() scaled version with categorical data works as expected", {
+  res <- rps(
+    y = res_binom$y,
+    ypred = res_binom$ypred,
+    scaled = TRUE
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_binom$y))
+  expect_true(all(res$pointwise < 0))
+})
+
+testthat::test_that("rps() for categorical data with log-weights works as expected", {
+  res <- rps(
+    y = res_binom$y,
+    ypred = res_binom$ypred,
+    log_weights = res_binom$log_weights
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_binary$y))
+  expect_true(all(res$pointwise >= 0))
+})
+
+testthat::test_that("rps() with continuous data works as expected", {
+  res <- rps(res_sleep$y, res_sleep$ypred)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_sleep$y))
+})
+
+testthat::test_that("rps() with continuous data and log-weights works as expected", {
+  res <- rps(
+    res_sleep$y, 
+    res_sleep$ypred, 
+    log_weights = res_sleep$log_weights
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_sleep$y))
+})
+
+testthat::test_that("rps() with continuous data and scaled version works as expected", {
+  res <- rps(res_sleep$y, res_sleep$ypred, scaled = TRUE)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_sleep$y))
+  expect_true(all(res$pointwise < 0))
+})
+
+
+# brier() ---------------------------------------
+
+testthat::test_that("brier() works as expected", {
+  res_brier <- brier(y = res_binary$y, ypred = res_binary$ypred, log_weights = NULL)
+
+  expect_equal(names(res_brier), c("estimates", "pointwise"))
+  expect_equal(length(res_brier$estimates), 2)
+  expect_equal(length(res_brier$pointwise), length(res_binary$y))
+  expect_true(all(res_brier$pointwise >= 0 & res_brier$pointwise <= 1))
+})
+
+testthat::test_that("brier() with log-weights works as expected", {
+  res_brier <- brier(
+    y = res_binary$y,
+    ypred = res_binary$ypred,
+    log_weights = res_binary$log_weights
+  )
+
+  expect_equal(names(res_brier), c("estimates", "pointwise"))
+  expect_equal(length(res_brier$estimates), 2)
+  expect_equal(length(res_brier$pointwise), length(res_binary$y))
+  expect_true(all(res_brier$pointwise >= 0 & res_brier$pointwise <= 1))
+
+  res_brier2 <- brier(
+    y = res_binary$y,
+    ypred = res_binary$ypred,
+    log_weights = .normalize_log_weights(res_binary$log_weights)
+  )
+  expect_equal(res_brier2$pointwise, res_brier$pointwise)
+})
+
+testthat::test_that("brier() errors when y not binary", {
+  expect_error(
+    brier(
+      y = res_binom$y,
+      ypred = res_binary$ypred,
+      log_weights = res_binary$log_weights
+    ),
+    regexp = "The brier score expects binary data 'y'."
+  )
+})
+
+# mae() ------------------------------------------------
+testthat::test_that("mae() works as expected", {
+  res <- mae(y = res_roaches$y, mupred = res_roaches$mupred,
+    log_weights = NULL)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_roaches$y))
+})
+
+testthat::test_that("mae() with log_weights works as expected", {
+  res <- mae(y = res_roaches$y, mupred = res_roaches$mupred,
+    log_weights = res_roaches$loo1$psis_object$log_weights)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_roaches$y))
+})
+
+# rmse() / mse() -----------------------------------------
+testthat::test_that("mse() and rmse() work as expected", {
+  res_mse <- mse(y = res_roaches$y, mupred = res_roaches$mupred,
+    log_weights = NULL)
+
+  res_rmse <- rmse(y = res_roaches$y, mupred = res_roaches$mupred,
+    log_weights = NULL)
+
+  expect_equal(names(res_mse), c("estimates", "pointwise"))
+  expect_equal(length(res_mse$estimates), 2)
+  expect_equal(length(res_mse$pointwise), length(res_roaches$y))
+
+  expect_equal(sqrt(abs(res_mse$estimates[1])), res_rmse$estimates[1])
+  expect_equal(res_mse$estimates[2]/(2*sqrt(abs(res_mse$estimates[1]))), res_rmse$estimates[2])
+})
+
+testthat::test_that("rmse() works with se=0", {
+  mupred0 <- t(replicate(4000, res_roaches$y))
+
+  res_rmse <- rmse(y = res_roaches$y, mupred = mupred0,
+    log_weights = NULL)
+
+  expect_equal(names(res_rmse), c("estimates", "pointwise"))
+  expect_equal(length(res_rmse$estimates), 2)
+  expect_equal(length(res_rmse$pointwise), length(res_roaches$y))
+  expect_equal(unname(res_rmse$estimates[2]), 0)
+})
+
+# r2() ---------------------------------------------------
+testthat::test_that("r2() works as expected", {
+  res <- r2(y = res_roaches$y, mupred = res_roaches$mupred, log_weights = NULL)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_roaches$y))
+  expect_true(all(res$estimates[1] >= 0 & res$estimates[1] <= 1))
+})
+
+testthat::test_that("r2() with log_weights works as expected", {
+  res <- r2(y = res_roaches$y, mupred = res_roaches$mupred,
+    log_weights = res_roaches$loo1$psis_object$log_weights)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_roaches$y))
+  expect_true(all(res$estimates[1] >= 0 & res$estimates[1] <= 1))
+})
+
+# acc() / bacc() -----------------------------------------------------
+testthat::skip_if_not_installed("brms")
+
+testthat::test_that("acc() works as expected", {
+  res <- acc(y = as.integer(res_cat$y), mupred = res_cat$mupred, log_weights = NULL)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_cat$y))
+  expect_true(all(res$pointwise >= 0 & res$pointwise <= 1))
+})
+
+testthat::test_that("acc() with log-weights works as expected", {
+  res <- acc(
+    y = as.integer(res_cat$y),
+    mupred = res_cat$mupred,
+    log_weights = res_cat$loo$psis_object$log_weights
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_cat$y))
+  expect_true(!all(res$pointwise < 0 | res$pointwise > 1))
+})
+
+testthat::test_that("bacc() works as expected", {
+  res <- bacc(y = as.integer(res_cat$y), mupred = res_cat$mupred, log_weights = NULL)
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_cat$y))
+  expect_true(!all(res$pointwise < 0 | res$pointwise > 1))
+})
+
+testthat::test_that("bacc() with log-weights works as expected", {
+  res <- bacc(
+    y = as.integer(res_cat$y),
+    mupred = res_cat$mupred,
+    log_weights = res_cat$loo$psis_object$log_weights
+  )
+
+  expect_equal(names(res), c("estimates", "pointwise"))
+  expect_equal(length(res$estimates), 2)
+  expect_equal(length(res$pointwise), length(res_cat$y))
+  expect_true(!all(res$pointwise < 0 | res$pointwise > 1))
+})
diff --git a/tests/testthat/test_pred_measure.R b/tests/testthat/test_pred_measure.R
new file mode 100644
index 00000000..ed971fdf
--- /dev/null
+++ b/tests/testthat/test_pred_measure.R
@@ -0,0 +1,382 @@
+# load data -----------------------------
+res <- readRDS("data-for-tests/res_roaches.Rds")
+
+y <- res$y
+ypred <- res$ypred
+mupred <- res$mupred
+ylp <- res$ylp
+kfold1 <- res$kfold
+loo1 <- res$loo
+predperf <- res$predperf
+
+# unit tests ----------------------
+## .compute_measure() --------------------
+
+.builtin_entry <- function(name) {
+  list(name = name, type = "builtin", key = name)
+}
+
+test_that(".compute_measure() with r2 works as expected", {
+  measure_res <- .compute_measure(
+    y = y,
+    ypred = NULL,
+    mupred = mupred,
+    ylp = ylp,
+    measure_entry = .builtin_entry("r2"),
+    log_weights = NULL
+  )
+
+  expect_equal(names(measure_res), c("estimates", "pointwise"))
+})
+
+test_that(".compute_measure() with crps works as expected", {
+  measure_res <- .compute_measure(
+    y = y,
+    ypred = ypred,
+    mupred = NULL,
+    ylp = ylp,
+    measure_entry = .builtin_entry("crps"),
+    log_weights = NULL
+  )
+
+  expect_equal(names(measure_res), c("estimates", "pointwise"))
+  expect_equal(names(measure_res$estimates), c("Estimate", "SE"))
+})
+
+test_that(".compute_measure() with elpd works as expected", {
+  measure_res <- .compute_measure(
+    y = NULL,
+    ypred = NULL,
+    mupred = NULL,
+    ylp = ylp,
+    measure_entry = .builtin_entry("elpd"),
+    log_weights = NULL
+  )
+
+  expect_equal(names(measure_res), c("estimates", "pointwise"))
+})
+
+test_that(".compute_measure() fails if insufficient input is provided", {
+  expect_error(
+    .compute_measure(
+      y = y,
+      ypred = ypred,
+      mupred = NULL,
+      ylp = ylp,
+      measure_entry = .builtin_entry("r2"),
+      log_weights = NULL
+    ),
+    regexp = "`mupred` must be a numeric matrix."
+  )
+})
+
+## .compute_base_measure() -------------------------
+
+test_that(".compute_base_measure() returns predperf if already existent", {
+  res <- .compute_base_measure(
+    ylp = NULL,
+    ylp_test = NULL,
+    loo = NULL,
+    kfold = NULL,
+    predperf = predperf,
+    psis_object = NULL,
+    source = "insample"
+  )
+  expect_equal(res, predperf)
+})
+
+test_that(".compute_base_measure() errors if missing input", {
+  expect_error(
+    .compute_base_measure(
+      ylp = NULL,
+      ylp_test = NULL,
+      loo = NULL,
+      kfold = NULL,
+      predperf = NULL,
+      psis_object = NULL,
+      source = "insample"
+    ),
+    regexp = "`ylp` must be a numeric matrix."
+  )
+})
+
+test_that(".compute_base_measure() computes elpd as expected", {
+  base_measure <- .compute_base_measure(
+    ylp = ylp,
+    ylp_test = NULL,
+    loo = NULL,
+    kfold = NULL,
+    predperf = NULL,
+    psis_object = NULL,
+    source = "insample"
+  )
+
+  expect_equal(names(base_measure), c("estimates", "pointwise", "diagnostics"))
+  expect_null(base_measure$diagnostics)
+  expect_equal(rownames(base_measure$estimates), c("elpd", "ic"))
+  expect_equal(colnames(base_measure$estimates), c("Estimate", "SE"))
+  expect_equal(dimnames(base_measure$pointwise)[[2]], c("elpd", "ic"))
+})
+
+test_that(".compute_base_measure() computes elpd_loo as expected", {
+  base_measure <- .compute_base_measure(
+    ylp = ylp,
+    ylp_test = NULL,
+    loo = NULL,
+    kfold = NULL,
+    predperf = NULL,
+    psis_object = loo1$psis_object,
+    source = "loo"
+  )
+
+  expect_equal(
+    rownames(base_measure$estimates),
+    c("elpd_loo", "p_eff_loo", "ic_loo")
+  )
+  expect_equal(
+    dimnames(base_measure$pointwise)[[2]],
+    c("elpd_loo", "p_eff_loo", "ic_loo")
+  )
+})
+
+test_that(".compute_base_measure() computes elpd_kfold as expected", {
+  base_measure <- .compute_base_measure(
+    ylp = ylp,
+    ylp_test = NULL,
+    loo = NULL,
+    kfold = kfold1,
+    predperf = NULL,
+    psis_object = NULL,
+    source = "kfold"
+  )
+
+  expect_equal(
+    rownames(base_measure$estimates),
+    c("elpd_kfold", "p_kfold", "kfoldic")
+  )
+  expect_equal(
+    dimnames(base_measure$pointwise)[[2]],
+    c("elpd_kfold", "p_kfold", "kfoldic")
+  )
+})
+
+## .merge_matrix() ---------------------------
+
+test_that(".merge_matrix() works as expected", {
+  mat <- matrix(c(1, 2, 3, 4), ncol = 2, nrow = 2)
+  val <- c(5, 6)
+  expected_name <- "test"
+
+  res <- .merge_matrix(
+    source = "insample", mat = mat, name = expected_name,
+    values = val, margin = 1
+  )
+
+  expect_equal(rownames(res)[[3]], "test")
+  expect_equal(dim(res), c(3, 2))
+  expect_equal(colnames(res), c("Estimate", "SE"))
+
+  res <- .merge_matrix(
+    source = "insample", mat = mat, name = expected_name,
+    values = val, margin = 2
+  )
+
+  expect_equal(dim(res), c(2, 3))
+  expect_equal(colnames(res)[3], "test")
+})
+
+test_that(".merge_matrix() with mat = NULL works as expected", {
+  res <- .merge_matrix(
+    source = "insample", mat = NULL, name = "test",
+    values = c(1, 2), margin = 1
+  )
+
+  expect_equal(rownames(res), "test")
+  expect_equal(colnames(res), c("Estimate", "SE"))
+  expect_equal(dim(res), c(1, 2))
+})
+
+test_that(".merge_matrix() with loo showes correct names", {
+  res <- .merge_matrix(
+    source = "loo", mat = NULL, name = "test",
+    values = c(1, 2), margin = 1
+  )
+
+  expect_equal(rownames(res), "test_loo")
+  expect_equal(colnames(res), c("Estimate", "SE"))
+  expect_equal(dim(res), c(1, 2))
+})
+
+test_that(".merge_matrix() with kfold showes correct names", {
+  res <- .merge_matrix(
+    source = "kfold", mat = NULL, name = "test",
+    values = c(1, 2), margin = 1
+  )
+
+  expect_equal(rownames(res), "test_kfold")
+  expect_equal(colnames(res), c("Estimate", "SE"))
+  expect_equal(dim(res), c(1, 2))
+})
+
+test_that(".merge_matrix() with duplicate naming is skipped", {
+  mat <- matrix(c(1, 2, 3, 4), ncol = 2, nrow = 2)
+  rownames(mat) <- c("elpd", "mse")
+
+  res <- .merge_matrix(
+    source = "insample", mat = mat, name = "rmse",
+    values = c(5, 6), margin = 1
+  )
+
+  expect_equal(c("elpd", "mse", "rmse"), rownames(res))
+  
+  expect_warning(
+    .merge_matrix(
+      source = "insample", mat = mat, name = "elpd",
+      values = c(5, 6), margin = 1
+    ),
+    regexp = "already present in results. Skipping the update."
+  )
+})
+
+# integration tests ------------------------------
+## loo_pred_measure() / pred_measure() / kfold_pred_measure() ---------
+
+test_that("pred_measure() updates loo results as expected", {
+  predperf_loo <- loo_pred_measure(
+    loo = loo1,
+    y = y,
+    mupred = mupred,
+    ylp = ylp,
+    measure = c("r2", "mse"),
+    save_psis = TRUE
+  )
+
+  updated_predperf <- pred_measure(
+    y = y,
+    mupred = mupred,
+    predperf = predperf_loo,
+    measure = "mae"
+  )
+
+  expect_equal(
+    rownames(updated_predperf$estimates),
+    c("elpd_loo", "p_loo", "looic", "r2_loo", "mse_loo", "mae_loo")
+  )
+  expect_equal(dim(updated_predperf$estimates), c(6, 2))
+})
+
+test_that("pred_measure() provides warning for duplicate measure", {
+  predperf_loo <- loo_pred_measure(
+    loo = loo1,
+    y = y,
+    mupred = mupred,
+    ylp = ylp,
+    measure = "r2",
+    save_psis = TRUE
+  )
+
+  expect_warning(
+    pred_measure(
+      y = y,
+      mupred = mupred,
+      predperf = predperf_loo,
+      measure = "r2"
+    ),
+    regexp = "already present in results. Skipping the update."
+  )
+
+  expect_error(
+    loo_pred_measure(
+      y = y,
+      mupred = mupred,
+      ylp = ylp,
+      loo = loo1,
+      measure = c("mse", "r2", "r2")
+    ),
+    regexp = "Duplicate measure"
+  )
+})
+
+test_that("loo_pred_measure() computes expected measures", {
+  predperf1 <- loo_pred_measure(
+    loo = loo1,
+    y = y,
+    mupred = mupred,
+    ylp = ylp,
+    measure = c("r2", "mse")
+  )
+
+  expect_equal(
+    rownames(predperf1$estimates),
+    c("elpd_loo", "p_loo", "looic", "r2_loo", "mse_loo")
+  )
+  expect_equal(dim(predperf1$estimates), c(5, 2))
+})
+
+test_that("kfold_pred_measure() works with rps as expected", {
+  res <- kfold_pred_measure(
+    y = y,
+    ypred = ypred,
+    mupred = mupred,
+    ylp = ylp,
+    measure = c("rps", "srps"),
+    kfold = kfold1,
+    control = list(
+      rps = list(size = max(ypred)),
+      srps = list(size = max(ypred))
+    )
+  )
+
+  expect_equal(
+    rownames(res$estimates),
+    c("elpd_kfold", "p_kfold", "kfoldic", "rps_kfold", "srps_kfold")
+  )
+})
+
+# pred_measure() with custom function ------------------------------
+test_that("insample_pred_measure() accepts a custom measure function", {
+  set.seed(42)
+  S <- 4L
+  n <- 8L
+  y <- rnorm(n)
+  mupred <- matrix(rnorm(S * n), nrow = S, ncol = n)
+  ylp <- matrix(rnorm(S * n), nrow = S, ncol = n)
+
+  custom_rmse <- function(y, mupred, log_weights = NULL) {
+    rmse(y, mupred, log_weights = log_weights)
+  }
+  attr(custom_rmse, "measure_name") <- "custom_rmse"
+
+  res <- insample_pred_measure(
+    y = y,
+    mupred = mupred,
+    ylp = ylp,
+    measure = custom_rmse
+  )
+
+  expect_true("custom_rmse" %in% rownames(res$estimates))
+  expect_true("custom_rmse" %in% colnames(res$pointwise))
+})
+
+test_that("insample_pred_measure() accepts mixed built-in and custom measures", {
+  set.seed(1)
+  S <- 4L
+  n <- 8L
+  y <- rnorm(n)
+  mupred <- matrix(rnorm(S * n), nrow = S, ncol = n)
+  ylp <- matrix(rnorm(S * n), nrow = S, ncol = n)
+
+  custom_rmse <- function(y, mupred, log_weights = NULL) {
+    rmse(y, mupred, log_weights = log_weights)
+  }
+  attr(custom_rmse, "measure_name") <- "custom_rmse"
+
+  res <- insample_pred_measure(
+    y = y,
+    mupred = mupred,
+    ylp = ylp,
+    measure = list("r2", custom_rmse = custom_rmse)
+  )
+
+  expect_true(all(c("r2", "custom_rmse") %in% rownames(res$estimates)))
+})
\ No newline at end of file
diff --git a/tests/testthat/test_pred_measures_helpers.R b/tests/testthat/test_pred_measures_helpers.R
new file mode 100644
index 00000000..c7c54817
--- /dev/null
+++ b/tests/testthat/test_pred_measures_helpers.R
@@ -0,0 +1,113 @@
+# load data -----------------------------
+res <- readRDS("data-for-tests/res_roaches.Rds")
+
+supported_measures_list <- getFromNamespace("supported_measures_list", "loo")
+
+# helpers for tests ------------------------------------------------
+.builtin_entry <- function(name) {
+  list(name = name, type = "builtin", key = name)
+}
+
+# .normalize_measure() ----------------------------------------------
+
+test_that(".normalize_measure() handles NULL and character input", {
+  expect_equal(.normalize_measure(NULL), list())
+  entries <- .normalize_measure(c("mse", "rps"))
+  expect_length(entries, 2)
+  expect_equal(entries[[1]], .builtin_entry("mse"))
+})
+
+test_that(".normalize_measure() handles a custom function", {
+  f <- function(y, mupred) list(estimate = 1, se = 0, pointwise = y)
+  attr(f, "measure_name") <- "custom_mae"
+  entries <- .normalize_measure(f)
+  expect_length(entries, 1)
+  expect_equal(entries[[1]]$type, "custom")
+  expect_equal(entries[[1]]$name, "custom_mae")
+})
+
+test_that(".normalize_measure() handles a mixed list", {
+  f <- function(y, mupred) list(estimate = 1, se = 0, pointwise = y)
+  attr(f, "measure_name") <- "custom_mae"
+  entries <- .normalize_measure(list("r2", custom_mae = f))
+  expect_length(entries, 2)
+  expect_equal(entries[[1]]$name, "r2")
+  expect_equal(entries[[2]]$name, "custom_mae")
+})
+
+test_that(".normalize_measure() errors on duplicate names", {
+  expect_error(
+    .normalize_measure(c("mse", "mse")),
+    regexp = "Duplicate measure"
+  )
+})
+
+test_that(".normalize_measure() errors on unnamed list function", {
+  f <- function(y, mupred) list(estimate = 1, se = 0, pointwise = y)
+  expect_error(
+    .normalize_measure(list(f)),
+    regexp = "must be named"
+  )
+})
+
+# .prepare_measures() -----------------------------------------------
+
+test_that(".prepare_measures() errors on invalid built-in names", {
+  expect_error(
+    .prepare_measures("pps", res$predperf, supported_measures_list),
+    regexp = "Invalid measure"
+  )
+})
+
+test_that(".prepare_measures() filters measures already in predperf", {  
+  expect_warning(
+    .prepare_measures(c("mse", "elpd"), 
+    predperf = res$predperf, supported_measures_list),
+    regexp = "already present in"
+  )
+
+  expect_warning(
+    .prepare_measures(c("mse", "elpd", "r2"), 
+    predperf = res$predperf, supported_measures_list),
+    regexp = "already present in"
+  )
+
+  entries <- .prepare_measures(
+    c("mse", "rps"),
+    predperf = res$predperf,
+    supported_measures_list
+  )
+  expect_equal(vapply(entries, `[[`, "", "name"), c("mse", "rps"))
+
+  entries <- .prepare_measures(
+    c("mse"),
+    predperf = res$predperf,
+    supported_measures_list
+  )
+  expect_equal(vapply(entries, `[[`, "", "name"), "mse")
+})
+
+# .validate_measure_result() ----------------------------------------
+
+test_that(".validate_measure_result() accepts standard and CRPS-style output", {
+  res_std <- list(estimate = 1, se = 0.1, pointwise = c(1, 2))
+  expect_invisible(.validate_measure_result(res_std, "m", n_obs = 2))
+
+  res_crps <- list(estimates = c(1, 0.1), pointwise = c(1, 2))
+  expect_invisible(.validate_measure_result(res_crps, "m", n_obs = 2))
+})
+
+test_that(".validate_measure_result() errors on invalid output", {
+  expect_error(
+    .validate_measure_result(list(estimate = 1), "m"),
+    regexp = "Missing"
+  )
+  expect_error(
+    .validate_measure_result(
+      list(estimate = 1, se = 0.1, pointwise = c(1, 2, 3)),
+      "m",
+      n_obs = 2
+    ),
+    regexp = "length 2, not 3"
+  )
+})
diff --git a/tests/testthat/test_print.R b/tests/testthat/test_print.R
new file mode 100644
index 00000000..234bb4ae
--- /dev/null
+++ b/tests/testthat/test_print.R
@@ -0,0 +1,84 @@
+# load data
+temp <- readRDS("data-for-tests/res_roaches.Rds")
+
+measure_specs <- function(temp) {
+  rank_size <- max(temp$ypred)
+  rank_control <- list(
+    rps = list(size = rank_size),
+    srps = list(size = rank_size)
+  )
+  list(
+    list(measure = "r2", args = list(y = temp$y, mupred = temp$mupred)),
+    list(measure = "rmse", args = list(y = temp$y, mupred = temp$mupred)),
+    list(measure = "mse", args = list(y = temp$y, mupred = temp$mupred)),
+    list(measure = "mae", args = list(y = temp$y, mupred = temp$mupred)),
+    list(
+      measure = "rps",
+      args = list(y = temp$y, ypred = temp$ypred),
+      control = rank_control
+    ),
+    list(measure = "crps", args = list(y = temp$y, ypred = temp$ypred)),
+    list(
+      measure = "srps",
+      args = list(y = temp$y, ypred = temp$ypred),
+      control = rank_control
+    ),
+    list(measure = "scrps", args = list(y = temp$y, ypred = temp$ypred)),
+    list(measure = "mlpd", args = list(ylp = temp$ylp))
+  )
+}
+
+run_measure_snapshots <- function(loo_start, measures) {
+  loo_iter <- loo_start
+  for (measure in measures) {
+    loo_prev <- loo_iter
+    call_args <- c(
+      measure$args,
+      list(
+        measure = measure$measure,
+        loo = loo_iter,
+        save_psis = TRUE
+      )
+    )
+    if (!is.null(measure$control)) {
+      call_args$control <- measure$control
+    }
+    loo_iter <- do.call(loo_pred_measure, call_args)
+    common_rows <- intersect(
+      rownames(loo_prev$estimates),
+      rownames(loo_iter$estimates)
+    )
+    common_cols <- intersect(
+      colnames(loo_prev$pointwise),
+      colnames(loo_iter$pointwise)
+    )
+    expect_equal(
+      loo_iter$estimates[common_rows, , drop = FALSE],
+      loo_prev$estimates[common_rows, , drop = FALSE],
+      info = measure$measure
+    )
+    expect_equal(
+      loo_iter$pointwise[, common_cols, drop = FALSE],
+      loo_prev$pointwise[, common_cols, drop = FALSE],
+      info = measure$measure
+    )
+    expect_snapshot_output(print(loo_iter))
+  }
+  loo_iter
+}
+
+test_that("loo_pred_measure print snapshots", {
+  loo_ordered <- run_measure_snapshots(temp$loo, measure_specs(temp))
+  loo_shuffled <- run_measure_snapshots(
+    temp$loo,
+    with(set.seed(0), sample(measure_specs(temp)))
+  )
+  expect_setequal(
+    rownames(loo_ordered$estimates),
+    rownames(loo_shuffled$estimates)
+  )
+  expect_equal(
+    loo_ordered$estimates[rownames(loo_shuffled$estimates), , drop = FALSE],
+    loo_shuffled$estimates
+  )
+})
diff --git a/vignettes/articles-online-only/pred_measure-family.Rmd b/vignettes/articles-online-only/pred_measure-family.Rmd
new file mode 100644
index 00000000..20659746
--- /dev/null
+++ b/vignettes/articles-online-only/pred_measure-family.Rmd
@@ -0,0 +1,465 @@
+---
+title: "Computing predictive performance: The `pred_measure`-family"
+author: "Florence Bockting"
+output: 
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 3
+params:
+  EVAL: TRUE
+vignette: >
+  %\VignetteIndexEntry{Computing predictive performance: The `pred_measure`-family}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  message = FALSE,
+  warning = FALSE,
+  eval = params$EVAL,
+  fig.width = 7,
+  fig.height = 4,
+  fig.align = "center"
+)
+
+devtools::load_all(".")
+# library(brms)
+devtools::load_all("/u/21/wa.bocktif1/unix/GitHub/brms")
+library(dplyr)
+library(ggplot2)
+```
+
+## Introduction
+
+After fitting a Bayesian model, you often want predictive performance measures
+that are comparable across evaluation strategies: in-sample, PSIS-LOO,
+k-fold, and holdout data. The **pred_measures** package provides a unified
+workflow through four entry points:
+
+- `insample_pred_measure()` for in-sample evaluation
+- `loo_pred_measure()` for PSIS-LOO cross-validation
+- `kfold_pred_measure()` for k-fold cross-validation
+- `test_pred_measure()` for holdout / new data
+
+Each entry point can compute any supported measure in a single call when you
+supply the required inputs and pass the `measure` argument. 
+For the argument `measure` you can either specify a vector with predefined measure names whereby supported measures
+are:
+
+- **Density scores:** `elpd`, `mlpd`
+- **Distributional scores:** `rps`, `srps`
+- **Point metrics:** `mae`, `mse`, `rmse`, `r2`, `acc`, `bacc`
+
+Base measures (`elpd`, `ic`) are always included automatically; pass any
+additional names from the list above through the `measure` argument. You can 
+also obtain a list of supported measures via `supported_measures_list`:
+
+```{r supported-measures}
+supported_measures_list
+```
+
+When you want to compute a measure that is not yet supported, you can pass a custom function to the `measure` argument (see [Passing a custom function for a measure](#passing-a-custom-function-for-a-measure) below). In this case, the argument `measure` should be a list, whereby you can pass both predefined measure names and custom functions (e.g., `list("rps", "my_fun" = my_fun)`). The custom function must accept the same required arguments as the built-in version of that measure (see [Input requirements by measure](#input-requirements-by-measure) below), and you can pass additional arguments through `control`.
+
+For measure definitions and formulas, see the companion vignette 
+"overview-measures" [TODO].
+
+Besides computing all desired measures in one call, `pred_measure()` lets you 
+**add** measures to an existing result without recomputing what is already 
+stored (see [Adding measures incrementally](#adding-measures-incrementally-optional) below).
+
+**Prerequisites.** 
+
+This tutorial assumes a fitted **brms** model and familiarity
+with `brms::log_lik()`, `brms::posterior_predict()`, and
+`brms::posterior_epred()`.
+
+**What you will learn.**
+
+1. Compute all desired measures in one call to an entry point.
+2. Optionally accumulate measures incrementally with `pred_measure()`.
+3. Repeat the workflow under LOO, k-fold, and out-of-sample evaluation.
+4. Compare results across evaluation strategies.
+
+```{r load-data, include=FALSE}
+fit_sleep <- readRDS("data-for-vignettes/fit_sleep.Rds")
+fit_binary <- readRDS("data-for-vignettes/fit_binary.Rds")
+fit_roaches <- readRDS("data-for-vignettes/fit_roaches.Rds")
+fit_penguins <- readRDS("data-for-vignettes/fit_penguins.Rds")
+res_sleep_test <- readRDS("../../tests/testthat/data-for-tests/res_sleep_test.Rds")
+```
+
+## Quick start
+
+The sleep-study model from **lme4** predicts reaction time (ms) from days of
+sleep deprivation. The shortest path from a fitted model to printed measures is
+a single call to an entry point:
+
+```{r quick-start}
+sleep_measure <- insample_pred_measure(
+  ylp = brms::log_lik(fit_sleep),
+  y = fit_sleep$data$y,
+  ypred = brms::posterior_predict(fit_sleep),
+  measure = c("rps", "srps")
+)
+
+print(sleep_measure)
+```
+
+Base measures (`elpd`, `ic`) are always computed; pass additional measure names
+in `measure`. The rest of this tutorial explains how inputs map to measures and
+how to switch evaluation modes.
+
+## Core concepts
+
+### Object structure
+
+Every function returns a **pred_measure** object which is a list with:
+
+| Component | Description |
+| :--- | :--- |
+| `$estimates` | Summary estimate and SE for each measure |
+| `$pointwise` | Observation-level contributions (one column per measure) |
+| `$diagnostics` | PSIS diagnostics (LOO and Kfold-CV only) |
+| `$log_weights` | Log weights used for LOO (LOO-CV only) |
+
+The `print()` method shows `$estimates` in a readable table. Use
+`str(result)` when you need the full structure.
+
+Every object also carries attributes that identify the evaluation mode and data
+dimensions. The table below summarizes what differs across approaches; all
+objects share `attr(result, "dims")` (posterior draws × observations) and
+`attr(result, "source")` (one of `"insample"`, `"loo"`, `"kfold"`, `"test"`).
+
+| | In-sample | PSIS-LOO | K-fold | Holdout (Test) |
+| :--- | :--- | :--- | :--- | :--- |
+| **Class** | `insample_pred_measure` | `loo_pred_measure` (+ classes from `loo` object) | `kfold_pred_measure` (+ classes from `kfold` object) | `test_pred_measure` |
+| **List slots** | `$estimates`, `$pointwise` | `$estimates`, `$pointwise`, `$diagnostics`, `$log_weights`; (`$psis_object`) | `$estimates`, `$pointwise`; (`$diagnostics`) | `$estimates`, `$pointwise` |
+| **Measure suffix** | none (`elpd`, `ic`) | `_loo` (`elpd_loo`, `p_loo`, `ic_loo`, …) | `_kfold` (`elpd_kfold`, `p_kfold`, `kfoldic`, …) | `_test` (`elpd_test`, `ic_test`, …) |
+| **Inherited attrs** | -- | `yhash`, `model_name` from `loo` | `K`, `Ksub`, `folds`, `fold_type`, `joint`, `yhash`, `model_name` from `kfold` | -- |
+
+For holdout objects, `dims` reflects the test-set size (from `ylp_test`), not
+the training data. LOO and Kfold objects expose Pareto-k values in `$diagnostics$pareto_k` to assess the reliability of PSIS weights.
+
+### Input requirements by measure
+
+Supply the inputs each measure needs in a single entry-point call (see the
+[workflow overview](#workflow-overview) below). When adding measures
+incrementally, pass an existing object as `predperf` to `pred_measure()` so
+base estimates and weights are reused.
+
+| Measure | `ylp` | `y` | `ypred` | `mupred` |
+| :---: | :---: | :---: | :---: | :---: |
+| `elpd`, `mlpd`, `ic` | ✓ | | | |
+| `rps`, `srps` | | ✓ | ✓ | |
+| `acc`, `bacc` | | ✓ | | ✓ |
+| `mae`, `mse`, `rmse`, `r2` | | ✓ | | ✓ |
+
+- **`ylp`**: pointwise log predictive density matrix (`S` draws × `n` observations),
+  typically from `brms::log_lik(fit)`.
+- **`ypred`**: posterior predictive draws from `brms::posterior_predict()`.
+- **`mupred`**: posterior expected values from `brms::posterior_epred()`.
+
+### Two ways to compute measures
+
+| Approach | When to use | Example |
+| :--- | :--- | :--- |
+| **One-shot** | You know all measures upfront | `loo_pred_measure(..., measure = c("rps", "r2"))` |
+| **Incremental** | Explore interactively or add measures later | `pred_measure(..., predperf = existing, measure = "r2")` |
+
+### Workflow overview
+
+| Evaluation mode | Entry point | Primary inputs | 
+| :--- | :--- | :--- | :--- |
+| In-sample | `insample_pred_measure()` | `ylp`, plus `y`, `ypred`, `mupred` as needed | 
+| PSIS-LOO | `loo_pred_measure()` | `loo`, or `ylp` + `psis_object`, or `ylp` alone |
+| K-fold | `kfold_pred_measure()` | `kfold`, plus `y`, `ypred`, `mupred` as needed |
+| Holdout | `test_pred_measure()` | `ylp`, `ylp_test`, plus `y`, `ypred`, `mupred` as needed |
+
+Pass all required inputs and the full `measure` vector to the entry point for
+your evaluation mode. Use `pred_measure()` only when you want to extend an
+existing result (see below).
+
+## Continuous outcome: sleep study
+
+We use the pre-fitted Gaussian model on the sleep study throughout this section.
+
+### In-sample evaluation
+
+Pass all required inputs and the measures you want to `insample_pred_measure()`:
+
+```{r insample-all}
+ylp_sleep <- brms::log_lik(fit_sleep)
+ypred_sleep <- brms::posterior_predict(fit_sleep)
+mupred_sleep <- brms::posterior_epred(fit_sleep)
+
+sleep_measure <- insample_pred_measure(
+  ylp = ylp_sleep,
+  y = fit_sleep$data$y,
+  ypred = ypred_sleep,
+  mupred = mupred_sleep,
+  measure = c("rps", "srps", "r2")
+)
+print(sleep_measure)
+```
+
+`elpd` sums the expected log pointwise predictive density across observations;
+`ic` is `-2 × elpd`. Additional rows in the table correspond to the measures
+passed in `measure`.
+
+```{r pointwise-crps, fig.cap="Pointwise CRPS contributions across observations (in-sample)."}
+crps_pw <- sleep_measure$pointwise[, "rps"]
+ggplot(
+  data = data.frame(obs = seq_along(crps_pw), crps = crps_pw)
+) +
+  geom_dotplot(aes(crps), dotsize = 0.5) +
+  labs(
+    x = "CRPS_i",
+    title = "Dotplot for in-sample pointwise CRPS"
+  ) +
+  theme_minimal()
+```
+
+The entire structure of the returned object with corresponding 
+attributes is shown in the following
+
+```{r}
+str(sleep_measure)
+```
+
+#### Passing a custom function for a measure
+
+It is also possible to pass a custom function for any measure. The function must accept the same arguments as the built-in version of that measure, and you can pass additional arguments through `control`:
+
+```{r}
+# custom scoring rule
+hamming_loss <- function(y, ypred, log_weights = NULL, revert_sign = FALSE) {
+  if (is.null(log_weights)) {
+    hamming_i <- colMeans(sweep(ypred, 2, y, "!="))
+  } else {
+    w <- exp(.normalize_log_weights(log_weights))
+    hamming_i <- colSums(w * sweep(ypred, 2, y, "!="))
+  }
+  
+  res <- list(
+    estimate = mean(hamming_i),
+    se = sqrt(var(hamming_i) / length(hamming_i)),
+    pointwise = hamming_i
+  )
+  
+  .measure_output(res, revert_sign)
+}
+```
+
+```{r insample-custom-function}
+(insample_pred_measure(
+  ylp = brms::log_lik(fit_binary),
+  y = fit_binary$data$y,
+  ypred = res_binary$ypred,
+  measure = list("rps", hamming = hamming_loss)
+))
+```
+
+#### Adding measures incrementally
+
+If you already computed measures and want to add more without
+recomputing them, pass the existing object as `predperf` (**pred**icted 
+**perf**ormance measure) to `pred_measure()`:
+
+```{r insample-incremental}
+(pred_measure(
+  y = fit_sleep$data$y,
+  mupred = mupred_sleep,
+  predperf = sleep_measure,
+  measure = c("rmse", "mse")
+))
+```
+
+This two-step pattern is useful when exploring measures interactively.
+
+### PSIS-LOO cross-validation
+
+In-sample `elpd` uses posterior draws from $p(\theta \mid y)$. Comparing
+those predictions to the same data is **optimistically biased** for future or
+otherwise unseen observations: the model has already seen the data it is
+predicting ([Vehtari's CV-FAQ](https://users.aalto.fi/~ave/CV-FAQ.html),
+Section 4).
+
+**Cross-validation** estimates out-of-sample predictive performance by
+holding out part of the data, refitting (or reweighting) without that part,
+and scoring the held-out points. In **leave-one-out cross-validation (LOO-CV)**,
+each observation $y_i$ is predicted using a model fit to $y_{-i}$. The target
+quantity is the expected log pointwise predictive density (elppd), estimated by
+summing held-out log predictive densities across observations (see
+`vignette("overview-measures")` for the full
+definitions of within-sample, LOO, PSIS-LOO, and k-fold variants).
+
+**PSIS-LOO** approximates exact LOO-CV without refitting the model $n$ times.
+It reweights the full-data posterior draws $\theta^{(s)} \sim p(\theta \mid y)$
+with Pareto-smoothed importance sampling (PSIS) weights so that each
+observation $i$ is scored as if it had been left out. The resulting
+`elpd_loo` is an estimate of how well the model would predict new reaction
+times.
+
+Compute LOO measures in one call:
+
+```{r loo-all}
+loo_sleep <- brms::loo(fit_sleep, save_psis = TRUE)
+
+loo_measure <- loo_pred_measure(
+  loo = loo_sleep,
+  y = fit_sleep$data$y,
+  ypred = ypred_sleep,
+  mupred = mupred_sleep,
+  measure = c("rps", "srps", "r2")
+)
+print(loo_measure)
+```
+
+The estimates carry a `_loo` suffix (`elpd_loo`, `ic_loo`). Additional `p_loo` (effective number of parameters) is provided which is the difference between 
+`elpd_loo` and the non-cross-validated log posterior predictive density. 
+Furthermore, we get an additional entry with PSIS diagnostics in `$diagnostics`.
+
+```{r loo-pareto-k, fig.cap="Pareto-k diagnostics for PSIS-LOO. Values above 0.7 suggest unreliable LOO estimates for those observations."}
+str(loo_measure)
+```
+
+**Alternative LOO inputs.** 
+
+`loo_pred_measure()` accepts three equivalent input patterns:
+
+| Input | Example |
+| :--- | :--- |
+| Precomputed `loo` object | `loo_pred_measure(loo = loo_sleep, measure = ...)` |
+| `ylp` + `psis_object` | `loo_pred_measure(ylp = ylp, psis_object = psis_obj, measure = ...)` |
+| `ylp` only (PSIS computed internally) | `loo_pred_measure(ylp = ylp, measure = ...)` |
+
+To add measures incrementally after a base-only LOO call, pass the existing
+object as `predperf` to `pred_measure()`. When
+accumulating after computing only `elpd`/`ic`, use `save_psis = TRUE` in
+`loo_pred_measure()` so PSIS weights are stored for subsequent measures.
+
+Compare the in-sample and LOO ELPD values directly:
+
+```{r elpd-compare-table}
+data.frame(
+  method = c("In-sample", "PSIS-LOO"),
+  elpd = c(
+    sleep_measure$estimates["elpd", "Estimate"],
+    loo_measure$estimates["elpd_loo", "Estimate"]
+  ),
+  se = c(
+    sleep_measure$estimates["elpd", "SE"],
+    loo_measure$estimates["elpd_loo", "SE"]
+  )
+)
+```
+
+### K-fold cross-validation
+
+K-fold CV holds out groups of observations rather than single points.
+The same computation pattern applies to this approach: pass all inputs and 
+measures to `kfold_pred_measure()` with a `kfold` object from `brms::kfold()`:
+
+```{r kfold-measures}
+kfold_sleep <- brms::kfold(fit_sleep, save_fits = TRUE)
+mupred_kfold <- brms::kfold_predict(kfold_sleep, method = "fitted")$yrep
+ypred_kfold <- brms::kfold_predict(kfold_sleep, method = "predict")$yrep
+
+kfold_measure <- kfold_pred_measure(
+  y = fit_sleep$data$y,
+  ypred = ypred_kfold,
+  mupred = mupred_kfold,
+  kfold = kfold_sleep,
+  measure = c("rps", "srps", "r2")
+)
+print(kfold_measure)
+```
+
+Measure names carry a `_kfold` suffix (e.g. `elpd_kfold`, `rps_kfold`).
+The returned object inherits the attributes from the `brms::kfold` object:
+
+```{r}
+str(kfold_measure)
+```
+
+### Holdout evaluation
+
+For genuinely new data, use `test_pred_measure()` in the same one-shot way.
+We load precomputed predictive inputs from the package test fixtures (a random
+train/test split of the sleep study):
+
+```{r test-measures}
+test_measure <- test_pred_measure(
+  y = res_sleep_test$y_test,
+  ypred = res_sleep_test$ypred_test,
+  mupred = res_sleep_test$mupred_test,
+  ylp = res_sleep_test$ylp_train,
+  ylp_test = res_sleep_test$ylp_test,
+  measure = c("rps", "srps", "r2")
+)
+print(test_measure)
+```
+
+Holdout measures use a `_test` suffix and reflect performance on the 50
+held-out observations only.
+
+## Other outcome types
+
+The same pattern applies to non-Gaussian models. Each subsection uses
+a pre-fitted **brms** model.
+
+### Binary outcomes
+
+```{r binary-example}
+binary_measure <- insample_pred_measure(
+  ylp = brms::log_lik(fit_binary),
+  y = fit_binary$data$y,
+  mupred = brms::posterior_epred(fit_binary),
+  measure = c("acc", "bacc")
+)
+print(binary_measure)
+```
+
+### Count outcomes
+
+```{r count-example}
+ypred_roaches <- brms::posterior_predict(fit_roaches)
+
+roaches_measure <- insample_pred_measure(
+  ylp = brms::log_lik(fit_roaches),
+  y = fit_roaches$data$y,
+  ypred = ypred_roaches,
+  measure = c("rps", "srps"),
+  control = list(rps = list(size = as.integer(max(ypred_roaches))),
+                 srps = list(size = as.integer(max(ypred_roaches))))
+)
+print(roaches_measure)
+```
+
+### Categorical outcomes
+
+```{r categorical-example}
+penguins_measure <- insample_pred_measure(
+  ylp = brms::log_lik(fit_penguins),
+  y = as.integer(fit_penguins$data$y),
+  mupred = brms::posterior_epred(fit_penguins),
+  measure = c("acc", "bacc")
+)
+print(penguins_measure)
+```
+
+See `vignette("overview-measures", package = "pred_measures")` for which
+measures apply to each outcome type.
+
+## Further reading
+
+- `?supported_measures_list` — character vector of all supported measure names
+- `?pred_measure` — optional helper for adding measures to an existing predperf
+  object
+- `?insample_pred_measure`, `?loo_pred_measure`, `?kfold_pred_measure`,
+  `?test_pred_measure` — entry points for each evaluation mode
+- `vignette("overview-measures", package = "pred_measures")` — measure
+  definitions, formulas, and orientation (higher vs lower is better)
diff --git a/vignettes/articles-online-only/pred_measure-formulas.Rmd b/vignettes/articles-online-only/pred_measure-formulas.Rmd
new file mode 100644
index 00000000..3fb3f7eb
--- /dev/null
+++ b/vignettes/articles-online-only/pred_measure-formulas.Rmd
@@ -0,0 +1,911 @@
+---
+title: "Overview of scores and metrics"
+author: "Florence Bockting"
+date: "`r Sys.Date()`"
+output: 
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 3
+params:
+  EVAL: FALSE
+vignette: >
+  %\VignetteIndexEntry{Overview of scores and metrics}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+
+## Introduction
+
+In Bayesian model evaluation, predictive performance is assessed by comparing a model's posterior predictive distribution against observed data. Depending on whether the outcome is continuous, binary, or categorical, and whether we evaluate the entire probability distribution or a specific point estimate, different scoring rules and performance metrics are appropriate.
+
+The table below summarizes the core metrics implemented in our framework. 
+
+| Metric Keyword | Full Name | Metric Type | Orientation | Target Outcome Type |
+| :--- | :--- | :--- | :--- | :--- |
+| `lppd` / `elppd` | (Expected) Log Pointwise Predictive Density | Scoring Rule (Distributional) | Higher is Better | Any |
+| `acc` | Accuracy | Proper Scoring Rule | Higher is Better | Nominal Categorical |
+| `bacc` | Balanced Accuracy | Adjusted Classification Metric | Higher is Better | Imbalanced Categorical |
+| `brier` | Brier Score | Proper Scoring Rule | Lower is Better | Binary |
+| `crps` | Continuous Ranked Probability Score | Proper Scoring Rule (Distributional) | Lower is Better | Continuous |
+| `scrps` | Scaled Continuous Ranked Probability Score | Scale-Invariant Proper Scoring Rule | Higher is Better | Continuous |
+| `rps` | Ranked Probability Score | Proper Scoring Rule | Lower is Better | Ordered Categorical |
+| `srps` | Scaled Ranked Probability Score | Scale-Invariant Proper Scoring Rule | Higher is Better | Ordered Categorical |
+| `mae` | Mean Absolute Error | Point Estimation Metric | Lower is Better | Continuous |
+| `mse` | Mean Squared Error | Point Estimation Metric | Lower is Better | Continuous |
+| `rmse` | Root Mean Squared Error | Point Estimation Metric | Lower is Better | Continuous |
+| `r2` | Coefficient of Determination | Summary Fit Metric | Higher is Better | Continuous |
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(eval = TRUE, message = FALSE, warnings = FALSE)
+```
+
+## Data Examples
+```{r, message=FALSE, warnings=FALSE, include = FALSE}
+library(tidyr)
+library(dplyr)
+library(ggplot2)
+# devtools::load_all("../brms")
+```
+
+```{r, include = FALSE}
+plot_ptw <- function(ws, loo, kfold, label, show_estimate = TRUE){
+  
+  # 1. Safe estimation extractor
+  ptw_est <- function(x) {
+    if (is.null(x)) return(NA)
+    if (!is.null(x$estimates[1])) {
+      return(x$estimates[1])
+    }
+    x$estimates["Estimate"]
+  }
+
+  # 2. Build estimates and create the formatted labels right away
+  estimates <- tibble(
+    source = c("ws", "loo", "kfold"),
+    estimate = c(ptw_est(ws), ptw_est(loo), ptw_est(kfold))
+  ) |> 
+    mutate(source_label = if_else(
+      is.na(estimate), 
+      paste0(source, " (no data)"), 
+      paste0(source, " (est. = ", round(estimate, 2), ")")
+    ))
+  
+  # Turn into a factor to preserve the ws -> loo -> kfold order
+  estimates$source_label <- factor(estimates$source_label, levels = estimates$source_label)
+  
+  # 3. Safe data binding for the dots
+  g_data <- dplyr::bind_rows(
+    tibble(source = "ws", value = if (!is.null(ws)) ws$pointwise else as.numeric(NA)),
+    tibble(source = "loo", value = if (!is.null(loo)) loo$pointwise else as.numeric(NA)),
+    tibble(source = "kfold", value = if (!is.null(kfold)) kfold$pointwise else as.numeric(NA))
+  )
+  
+  # 4. Align facet names for the dots data
+  plot_df <- g_data |> 
+    left_join(estimates, by = "source") |>
+    mutate(source = source_label)
+  
+  # 5. Align facet names for the line data so ggplot matches them up perfectly
+  line_data <- estimates |> 
+    mutate(source = source_label)
+  
+  # Plotting
+  g <- ggplot(plot_df, mapping = aes(x = value)) +
+    geom_dotplot()
+  
+  if (isTRUE(show_estimate)) {
+    g <- g +
+      geom_vline(
+        data = line_data, # Uses the aligned facet labels now
+        aes(xintercept = estimate), color = "red", linetype = "solid", linewidth = 1
+      )
+  }
+
+  g + 
+    facet_wrap(~ source, nrow = 1) +
+    theme_classic() +
+    xlab(label)
+}
+```
+
+```{r}
+# see vignettes/articles-online-only/test-data/generate-data.R for code 
+# to generate these objects
+
+# poisson model; count data
+res_roaches <- readRDS("../../tests/testthat/data-for-tests/res_roaches.Rds")
+# normal model; continuous data
+res_sleep <- readRDS("../../tests/testthat/data-for-tests/res_sleep.Rds")
+# bernoulli model; binary data
+res_binary <- readRDS("../../tests/testthat/data-for-tests/res_binary.Rds")
+# categorical model; nominal categories
+res_penguins <- readRDS("../../tests/testthat/data-for-tests/res_penguins.Rds")  
+# binomial model; bounded count data 
+res_binomial <- readRDS("../../tests/testthat/data-for-tests/res_binomial.Rds")
+```
+
+## Log pointwise predictive density (lppd) 
+The log pointwise predictive density aggregates the log posterior predictive
+densities over a pointwise partition of the data $y = (y_1,\ldots,y_n)$:
+$$
+\text{lppd} = \sum_{i=1}^n \log \int p(y_i \mid \theta) p(\theta \mid y) d\theta 
+$$
+Monte Carlo estimator with posterior draws:
+$$
+\widehat{\text{lppd}} = \sum_{i=1}^n \log \left(\frac{1}{S}\sum_{s=1}^S 
+p(y_i\mid \theta^{(s)})\right)
+= \sum_{i=1}^n \underbrace{\Big[\text{log-sum-exp}_s\{\log p(y_i\mid 
+\theta^{(s)})\} - \log S\Big]}_{\text{lppd}_i}
+$$
+
+We use $\log p(y_i\mid \theta^{(s)})$ for numerical stability.
+
+### Conceptual variants
+We define pointwise terms $\text{lppd}_i$ and their sums. 
+For standard errors we use the sample variance across $i$:
+
+- $\widehat{\text{Var}}_i(\text{lppd}_i) = \frac{1}{n-1} \sum_{i=1}^n 
+(\text{lppd}_i - \text{lppd})^2 \text{, with } \text{lppd} = 
+\frac{1}{n}\sum_i \text{lppd}_i$
+- $se(\text{sum}) \approx \sqrt{n \cdot \widehat{\text{Var}}(\text{lppd}_i)}$
+- $se(\text{mean}) \approx \sqrt{\frac{\widehat{\text{Var}}(\text{lppd}_i)}{n}}$
+
+#### Within-sample (training data; optimistic for new data)
++ Draws: $\theta^{(s)} \sim p(\theta \mid y)$
++ Pointwise term: 
+$$
+\text{lppd}_i^\text{ws} = \text{log-sum-exp}_s \big(\log p(y_i 
+\mid \theta^s)\big) - \log(S)
+$$
++ Estimate and Standard error:
+  + $\text{lppd}^\text{sw} = \sum_i^n \text{lppd}_i^\text{ws}$
+  + $\widehat{\text{elppd}}^\text{sw} = \text{lppd}^\text{sw}$ 
+  (optimistically biased for future data)
+  + $se(\text{lppd}^\text{sw}) \approx \sqrt{n \cdot \widehat{\text{Var}}(\text{lppd}_i^\text{ws})}$
+
+#### Leave-one-out cross-validation (LOO-CV; held-out)
++ Draws: $\theta_{−i}^{(s)} \sim p(\theta \mid y_{−i})$
++ Pointwise term: 
+$$
+\text{lppd}_i^\text{loo} = \text{log-sum-exp}_s(\log p(y_i \mid \theta_{-i}^s)) - \log(S)
+$$
++ Estimate and Standard Error:
+  + $\text{lppd}^\text{loo} = \sum_i^n \text{lppd}_i^\text{loo}$, 
+  an estimate of elppd
+  + $se(\text{lppd}^\text{loo}) \approx \sqrt{n \cdot \widehat{\text{Var}}(\text{lppd}_i^\text{loo})}$
+
+#### PSIS-LOO (held-out approximation)
++ Reweights full-data draws $\theta^{(s)} \sim p(\theta \mid y)$ to approximate 
+$p(\theta \mid y_{−i})$ with normalized PSIS weights $\tilde w_i^{(s)}$ ($\sum \tilde w_i^{(s)} = 1$)
++ Pointwise term:
+$$
+\text{lppd}_i^\text{psis} = \text{log-sum-exp}_s\big(\log p(y_i \mid \theta^s) + \log \tilde w_i^{(s)}\big)
+$$
++ Estimate and Standard Error:
+  + $\text{lppd}^\text{psis} = \sum_i^n \text{lppd}_i^\text{psis}$, an estimate 
+  of elppd
+  + $se(\text{lppd}^\text{psis}) \approx \sqrt{n \cdot \widehat{\text{Var}}(\text{lppd}_i^\text{psis})}$
+
+#### K-fold cross-validation
+Partition indices into $K$ disjoint folds, $F_1,\ldots, F_K$. For each fold 
+$m$, fit the model on the training data $y_{-F_m}$, draw parameters from the 
+posterior distribution $\theta^{(m,s)} \sim p(\theta \mid y_{-F_m})$ and 
+evaluate fold $F_m$.
+
++ Draws: $\theta^{(m,s)} \sim p(\theta \mid y_{-F_m})$
++ Pointwise term:
+$$
+\text{lppd}_i^\text{kfold} = \text{log-sum-exp}_s\big(\log p(y_i\mid \theta^{(m,s)})\big) - \log S,\quad i \in F_m
+$$
++ Estimate and Standard Error:
+  + $\text{lppd}^\text{kfold} = \sum_i \text{lppd}_i^\text{kfold}$, an estimate
+   for elppd
+  + $se(\text{lppd}^\text{kfold}) \approx \sqrt{n \cdot \widehat{\text{Var}}(\text{lppd}_i^\text{kfold})}$
+
+## Expected log pointwise predictive density (elppd)
+**elppd** (expected log pointwise predictive density) is the sum, over a chosen 
+pointwise partition of the data, of the expected log posterior predictive 
+density for future observations drawn from the true data-generating 
+distribution $p_t$. Formally, 
+$$ 
+\mathrm{elppd}=\sum_{i=1}^n \mathbb{E}_{\tilde y_i \sim p_t} \left[\log \int p(\tilde y_i \mid \theta) p(\theta \mid y) d\theta \right] =\sum_{i=1}^n \mathbb{E}_{\tilde y_i \sim p_t} \left[\text{lppd}_i \right] 
+$$ 
+
+Because $p_t$ is unknown, **elppd** is estimated from data-either by 
+bias-correcting the in-sample lppd (e.g., WAIC) or via cross-validation 
+(e.g., LOO/Psis-LOO/K-fold) that evaluates predictive densities on held-out 
+data.
+$$
+\widehat{\text{elppd}} \approx \sum_i^n\text{lppd}_i
+$$
+
+
+```{r, warnings=FALSE, message=FALSE}
+# within-sample elpd
+elpd_ws <- elpd(ylp = res_roaches$ylp)
+message("within-sample elpd:")
+invisible(str(elpd_ws))
+
+# psis-loo elpd
+elpd_psis1 <- elpd(
+  ylp = res_roaches$ylp, 
+  log_weights = res_roaches$log_weights
+)
+elpd_psis2 <- elpd(
+  ylp = NULL, 
+  pointwise = res_roaches$loo$pointwise[, "elpd_loo"]
+)
+# expect_equal(elpd_psis1, elpd_psis2)
+message("psis-loo elpd:")
+invisible(str(elpd_psis1))
+
+# kfold elpd
+elpd_kfold <- elpd(
+  ylp = NULL, 
+  pointwise = res_roaches$kfold$pointwise[, "elpd_kfold"]
+)
+message("kfold elpd:")
+invisible(str(elpd_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(elpd_ws, elpd_psis1, elpd_kfold, "lppd_i", show_estimate = FALSE)
+```
+
+### Mean log pointwise predictive density
+The mean log predictive density (**mlpd**) is the average (across observations) 
+of the model's log predictive density.
+
+$$
+\text{mlpd} = \frac{1}{n} \widehat{\text{elppd}} = \frac{1}{n} \sum_i^n\text{lppd}_i
+$$
+The standard error of the mean is then
+$$
+se(\text{mlpd}) = \frac{se(\text{elppd})}{n}
+$$
+
+```{r}
+mlpd_ws <- mlpd(
+  ylp = res_roaches$ylp
+)
+mlpd_loo <- mlpd(
+  ylp = res_roaches$ylp, 
+  log_weights = res_roaches$log_weights
+)
+mlpd_kfold <- mlpd(
+  ylp = NULL, 
+  pointwise = res_roaches$kfold$pointwise[, "elpd_kfold"]
+)
+
+message("within-sample mlpd:")
+invisible(str(mlpd_ws))
+message("psis-loo mlpd:") 
+invisible(str(mlpd_loo))
+message("kfold mlpd:")
+invisible(str(mlpd_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(mlpd_ws, mlpd_loo, mlpd_kfold, "lppd_i")
+```
+
+## Log score vs. elpd
+The log score is the log of the predictive density (or probability) evaluated 
+at the observed value. With $S$ posterior draws $\theta^1, \ldots, \theta^S$ 
+from $p(\theta \mid y)$, the posterior predictive density is approximated by 
+Monte Carlo averaging
+$$
+\log p(y_i \mid y) \approx \log \Big[\frac{1}{S} \sum_{s=1}^S p(y_i \mid \theta^s) \Big]
+$$
+or using the log-sum-exp trick:
+$$
+\log p(y_i \mid y) \approx \Big[\text{log-sum-exp}_s\big(\log p(y_i \mid \theta^{(s)})\big) - \log S\Big]
+$$
+
+This definition looks equivalent to that we used in order to introduce the 
+elpd. What is then the difference between log score and elpd?
+
++ **Log score**: The realized value $\log p(y \mid p(\theta \mid y))$ for an 
+observed data point $y$ under a model's predictive distribution. It is a proper,
+ local scoring rule used to evaluate probabilistic predictions.
++ **elpd**: The expectation of the log score for a future, as-yet-unobserved 
+data point drawn from the data-generating process. It averages the log 
+predictive density over the distribution of future data, and its pointwise 
+sum over $n$ items is elppd; in practice we estimate it via cross-validation 
+or using information criteria that correct in-sample bias.
+
+## Accuracy (`acc`)
+
+Accuracy here is the posterior predictive probability assigned to the observed category, averaged across (weighted) posterior draws.
+Accuracy is a proper scoring rule suitable for nominal categorical outcomes and is positively oriented (higher is better). Accuracy ranges between 0 (all predictions are wrong) and 1 (all predictions are correct).
+
+- Predictive probability of the observed class, Monte Carlo:
+  $$
+  \hat p_i = \frac{1}{S}\sum_{s=1}^S \Pr(Y_i = y_i \mid \theta^{(s)}),\quad
+  \hat p_i^w = \sum_{s=1}^S \tilde w_i^{(s)}\,\Pr(Y_i = y_i \mid \theta^{(s)})
+  $$
+- Pointwise:
+  $$
+  \text{acc}_i = \hat p_i \ \text{ or }\ \hat p_i^w
+  $$
+- Estimate and Standard Error:
+  $$
+  \widehat{\text{acc}} = \frac{1}{n}\sum_{i=1}^n \text{acc}_i,\quad
+  se(\widehat{\text{acc}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{acc}_i)}{n}}
+  $$
+
+::: {.callout-tip}
+
+Instead of drawing a single label per sample (`ypred`), we use the expected probability under the posterior (`mupred`). This is the Bayes-optimal decision rule under 0-1 loss (0-1 loss = 1 - Accuracy).
+:::
+
+```{r}
+acc_ws <- acc(
+  y = as.numeric(res_penguins$y), 
+  mupred = res_penguins$mupred
+)
+acc_loo <- acc(
+  y = as.numeric(res_penguins$y), 
+  mupred = res_penguins$mupred,
+  log_weights = res_penguins$log_weights
+)
+# acc_kfold <- acc(
+#   y = as.numeric(res_penguins$y), 
+#   mupred = res_penguins$mupred_kfold
+# )
+  
+message("within-sample acc:")
+invisible(str(acc_ws))
+message("psis-loo acc:") 
+invisible(str(acc_loo))
+# message("kfold acc:") 
+# invisible(str(acc_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(acc_ws, acc_loo, NULL, "acc_i")
+```
+
+## Balanced Accuracy (`bacc`)
+
+Balanced accuracy averages the within-class mean of the predicted probability assigned to the correct class, giving each class equal weight.
+
+Let $\mathcal{C} = \{1, \ldots, K\}$ be the set of classes and 
+$n_c = |\{i : y_i = c\}|$ the number of observations in class $c$. The 
+class-specific accuracy is the mean predicted probability of the correct 
+category among observations that truly belong to class $c$:
+
+- Class-specific accuracy:
+  $$
+  \text{acc}_c = \frac{1}{n_c} \sum_{i: y_i=c} \hat p_i^w
+  $$
+- Balanced accuracy:
+  $$
+  \widehat{\text{bacc}} = \frac{1}{K}\sum_{c=1}^K \text{acc}_c
+  $$
+- A convenient pointwise contribution that yields the same mean:
+$$
+\text{bacc}_i = \frac{\hat p_i^w}{n_{y_i}}
+$$
+- Estimate and Standard Error:
+  $$
+  \widehat{\text{bacc}} = \frac{1}{K}\sum_{c=1}^K \frac{1}{n_c}\sum_{i: y_i=c} \hat p_i^w,\quad
+  se(\widehat{\text{bacc}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{bacc}_i)}{n}}
+  $$
+
+::: {.callout-note}
+When class frequencies are equal ($n_c = n/K$), balanced accuracy 
+reduces to standard accuracy. For binary outcomes ($K=2$), it equals the mean 
+of sensitivity and specificity.
+:::
+
+```{r}
+print(table(as.numeric(res_penguins$y)))
+
+bacc_ws <- bacc(
+  y = as.numeric(res_penguins$y), 
+  mupred = res_penguins$mupred
+)
+bacc_loo <- bacc(
+  y = as.numeric(res_penguins$y), 
+  mupred = res_penguins$mupred,
+  log_weights = res_penguins$log_weights
+)
+# bacc_kfold <- bacc(
+#   y = as.numeric(res_penguins$y), 
+#   mupred = res_penguins$mupred_kfold
+# )
+  
+message("within-sample bacc:")
+invisible(str(bacc_ws))
+message("psis-loo bacc:") 
+invisible(str(bacc_loo))
+# message("kfold bacc:") 
+# invisible(str(bacc_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(bacc_ws, bacc_loo, NULL, "bacc_i")
+```
+
+## Brier Score (`brier`)
+The Brier score is a strictly proper scoring rule for binary outcomes; it is the mean
+ squared error between the predicted probability and the observed binary outcome. The Brier score is 
+ negatively oriented (lower is better).
+ The brier score ranges within [0, 1].
+
+- Pointwise:
+  $$
+  \widehat{\text{brier}}_i^{\text{raw}} = \big(p_i - y_i\big)^2,\quad
+  p_i = 
+  \begin{cases}
+  \frac{1}{S}\sum_{s=1}^S \Pr(y_i=1 \mid \theta^{(s)}) & \text{(unweighted)}\\[4pt]
+  \sum_{s=1}^S \tilde w_i^{(s)}\,\Pr(y_i=1 \mid \theta^{(s)}) & \text{(PSIS weights)}
+  \end{cases}
+  $$
++ Estimate and Standard Error:
+  $$
+  \widehat{\text{bs}} = \frac{1}{n}\sum_{i=1}^n \text{bs}_i,\quad
+  se(\widehat{\text{bs}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{bs}_i)}{n}}
+  $$
+
+```{r}
+brier_ws <- brier(
+  y = res_binary$y, 
+  ypred = res_binary$ypred
+)
+brier_loo <- brier(
+  y = res_binary$y, 
+  ypred = res_binary$ypred, 
+  log_weights = res_binary$log_weights
+)
+brier_kfold <- brier(
+  y = res_binary$y, 
+  ypred = res_binary$ypred_kfold
+)
+
+message("within-sample brier:")
+invisible(str(brier_ws))
+message("psis-loo brier:") 
+invisible(str(brier_loo))
+message("kfold brier:") 
+invisible(str(brier_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(brier_ws, brier_loo, brier_kfold, "brier_i")
+```
+
+## Continuous Ranked Probability Score (`crps`)
+CRPS is a strictly proper scoring rule for continuous outcomes; it measures the squared CDF distance between the predictive CDF $F$ and the observed step function integrated over the support. It is the continuous limit of rps as $K \rightarrow \infty$. As for rps, also crps is negatively oriented (lower = better). The crps is on the same units as $y$.
+
+With predictive draws $Y^s \sim p(\tilde y \mid \theta^s)$:
+ 
+- Pointwise terms:
+$$
+\text{crps}_i = \int_{-\infty}^{\infty} \left(F_i(x) - \mathbf{1}(x \geq y_i)\right)^2 dx
+$$
+Equivalently, via the energy score representation (when $F_i$ has a finite 
+first moment):
+$$
+\text{crps}_i = \mathbb E_F\big[Y - y_i\big] - \frac{1}{2}\mathbb E_F \big[Y-Y'\big]
+$$
+where $Y, Y'$ are independent draws from the predictive distribution $F_i$. 
+Because computing $\mathbb{E}_F[|Y - Y'|]$ requires two independent sets of 
+predictive draws, we instead use the identity (valid when $F$ is continuous):
+$$
+\text{crps}_i = \mathbb E_F\big[Y - y_i\big] + \mathbb E_F\big[Y\big] - 2\mathbb E_F \big[Y\cdot F(Y)\big]
+$$
+which requires only a single set of draws.
+- Weighted Monte Carlo estimators (PSIS weights $\tilde w_i^{(s)}$; unweighted is the special case $\tilde w = 1/S$):
+$$
+\hat F_i^w(x) = \sum_{s=1}^S \tilde w_i^s \mathbf{1}(Y_i^s \leq x)
+$$
+- Computation:
+$$
+\mathbb{E}_F\big[|Y - y_i|\big] \approx \sum_{s=1}^S \tilde w_i^s \big|Y_i^s - y_i\big|, \qquad
+\mathbb{E}_F[Y] \approx \sum_{s=1}^S \tilde w_i^s Y_i^s,
+$$
+$$
+\mathbb{E}_F\big[Y \cdot F(Y)\big] \approx \sum_{s=1}^S \tilde w_i^s Y_i^s \hat F_i^w\!\left(Y_i^s\right)
+$$
+giving the pointwise estimator
+$$
+\widehat{\text{crps}}_i = \sum_s \tilde w_i^s \big|Y_i^s - y_i\big|
+  + \sum_s \tilde w_i^s Y_i^s
+  - 2 \sum_s \tilde w_i^s Y_i^s \hat F_i^w\!\left(Y_i^s\right)
+$$
++ Estimate and Standard Error:
+  $$
+  \widehat{\text{crps}} = \frac{1}{n}\sum_{i=1}^n \text{crps}_i,\quad
+  se(\widehat{\text{crps}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{crps}_i)}{n}}
+  $$
+
+```{r}
+crps_ws <- crps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred
+)
+crps_loo <- crps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred, 
+  log_weights = res_sleep$log_weights
+)
+crps_kfold <- crps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred_kfold
+)
+
+message("within-sample crps:")
+invisible(str(crps_ws))
+message("psis-loo crps:")
+invisible(str(crps_loo))
+message("kfold crps:")
+invisible(str(crps_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(crps_ws, crps_loo, crps_kfold, "crps_i")
+```
+
+### Scaled CRPS (`scrps`)
+
+SCRPS is a locally scale-invariant proper scoring rule that normalizes CRPS by predictive spread (denominator) and adds a log penalty on spread (second term) [Bolin & Wallin, 2023]:
+
+$$
+\text{scrps}_i = -\frac{\mathbb{E}_F\big[|Y - y_i|\big]}{\mathbb{E}_F\big[|Y - Y'|\big]}
+- \frac{1}{2}\log \mathbb{E}_F\big[|Y - Y'|\big]
+$$
+- Use the same weighted estimators for the two expectations as above.
+- Rather than requiring two independent draw matrices, we recover
+$\mathbb{E}_F[|Y-Y'|]$ from quantities already computed for the crps via the 
+identity
+$$
+\mathbb{E}_F\big[|Y - Y'|\big] = 4\,\mathbb{E}_F\big[Y \cdot F(Y)\big] - 2\,\mathbb{E}_F[Y]
+$$
+- With importance weights this becomes
+$$
+\widehat{\mathbb{E}}_F\big[|Y - Y'|\big] =
+  4\sum_s \tilde w_i^s Y_i^s \hat F_i^w\!\left(Y_i^s\right)
+  - 2\sum_s \tilde w_i^s Y_i^s
+$$
+so the pointwise scrps estimator is
+$$
+\widehat{\text{scrps}}_i =
+  -\frac{\displaystyle\sum_s \tilde w_i^s \big|Y_i^s - y_i\big|}
+        {\widehat{\mathbb{E}}_F\big[|Y-Y'|\big]}
+  - \frac{1}{2}\log\widehat{\mathbb{E}}_F\big[|Y-Y'|\big]
+$$
+- Estimate and Standard Error:
+  $$
+  \widehat{\text{scrps}} = \frac{1}{n}\sum_{i=1}^n \text{scrps}_i,\quad
+  se(\widehat{\text{scrps}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{scrps}_i)}{n}}
+  $$
+
+```{r}
+scrps_ws <- scrps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred
+)
+scrps_loo <- scrps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred, 
+  log_weights = res_sleep$log_weights
+)
+scrps_kfold <- scrps(
+  y = res_sleep$y, 
+  ypred = res_sleep$ypred_kfold
+)
+
+message("within-sample scrps:")
+invisible(str(scrps_ws))
+message("psis-loo scrps:")
+invisible(str(scrps_loo))
+message("kfold scrps:")
+invisible(str(scrps_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(scrps_ws, scrps_loo, scrps_kfold, "scrps_i")
+```
+
+## Ranked Probability Score (`rps`)
+RPS is a strictly proper scoring rule for ordered categorical outcomes with $K$ categories. It compares the predictive CDF to the observed 
+step CDF and generalizes the Brier score ($K=2$ recovers Brier). The standard 
+definition is negatively oriented (lower is better). The range of rps is $[0, K-1]$.
+
+- Predictive class probabilities and CDFs:
+  $$
+  \hat p_{i,k}^w = \sum_{s=1}^S \tilde w_i^{(s)}\,\Pr(Y_i=k\mid \theta^{(s)}),\quad
+  \hat F_{i,k}^w = \sum_{j=1}^k \hat p_{i,j}^w
+  $$
+- Observed step CDF $O_{i,k} = \mathbf{1}(y_i \le k)$.
+- Pointwise term:
+  $$
+  \widehat{\text{rps}}_i^{\text{raw}} = \sum_{k=1}^{K-1} (\hat F_{i,k}^w - O_{i,k})^2
+  $$
+- Estimate and Standard Error:
+  $$
+  \widehat{\text{rps}} = \frac{1}{n}\sum_{i=1}^n \text{rps}_i,\quad
+  se(\widehat{\text{rps}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{rps}_i)}{n}}
+  $$
+
+```{r}
+rps_ws <- rps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred,
+  size = 10
+)
+rps_loo <- rps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred, 
+  size = 10,
+  log_weights = res_binomial$log_weights
+)
+rps_kfold <- rps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred_kfold, 
+  size = 10
+)
+  
+message("within-sample rps:")
+invisible(str(rps_ws))
+message("psis-loo rps:")
+invisible(str(rps_loo))
+message("kfold rps:")
+invisible(str(rps_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(rps_ws, rps_loo, rps_kfold, "rps_i")
+```
+
+### Scaled RPS (`srps`)
+
+By analogy with SCRPS [Bolin & Wallin, 2023], the scaled RPS is
+
+$$
+\text{srps}_i = -\frac{\mathbb{E}_F\big[|Y - y_i|\big]}{\mathbb{E}_F\big[|Y - Y'|\big]}
+- \frac{1}{2}\log \mathbb{E}_F\big[|Y - Y'|\big]
+$$
+
+For ordered discrete support:
+
+- Weighted mean absolute deviation:
+  $$
+  \widehat{\mathbb{E}}_F[|Y-y_i|] = \sum_{k=1}^K \hat p_{i,k}^w\,|k - y_i|
+  $$
+- Predictive spread:
+  $$
+  \widehat{\mathbb{E}}_F[|Y - Y'|] = 2\sum_{k=1}^{K-1} \hat F_{i,k}^w\big(1 - \hat F_{i,k}^w\big)
+  $$
+Estimation and SE follow the same mean/variance recipe as above. The srps has a positive direction, i.e., "higher is better".
+
+```{r}
+srps_ws <- srps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred,
+  size = 10
+)
+srps_loo <- srps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred, 
+  size = 10,
+  log_weights = res_binomial$log_weights
+)
+srps_kfold <- srps(
+  y = res_binomial$y, 
+  ypred = res_binomial$ypred_kfold,
+  size = 10
+)
+  
+message("within-sample srps:")
+invisible(str(srps_ws))
+message("psis-loo srps:")
+invisible(str(srps_loo))
+message("kfold srps:")
+invisible(str(srps_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(srps_ws, srps_loo, srps_kfold, "srps_i")
+```
+
+## Mean absolute error (`mae`)
+MAE evaluates point predictions $\hat \mu_i$ against outcomes $y_i$ via 
+absolute loss. It has the same units as $y$ and has orientation "lower is better". 
+
+In our setup, we take `mupred` to be the vector of point predictions (one per 
+observation), obtained from posterior expectations (e.g., via `posterior_epred`)
+ or from PSIS-LOO expected predictions (via `E_loo`), as illustrated below.
+
+- Pointwise: 
+$$
+\text{mae}_i = \mid y_i − \hat \mu_i \mid
+$$ 
+- Point prediction:
+$$
+\hat \mu_i \approx
+\begin{cases}
+\frac{1}{S}\sum_{s=1}^S \mu_i^{(s)}, & \mu_i^{(s)} = \mathbb{E}[\tilde y_i \mid \theta^{(s)}] \quad \text{(within-sample)}\\[6pt]
+\sum_{s=1}^S \tilde w_i^{(s)}\,\mu_i^{(s)}, & \text{(PSIS-LOO held-out approximation)}
+\end{cases}
+$$
+- Estimate and Standard Error: 
+$$
+\widehat{\text{mae}} = \frac{1}{n} \sum_{i=1}^n \text{mae}_i, \quad 
+se(\widehat{\text{mae}}) = \sqrt{\frac{\widehat{\text{Var}}(\text{mae}_i)}{n}}
+$$
+
+```{r}
+mae_ws <- mae(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred
+)
+mae_loo <- mae(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_loo
+)
+mae_kfold <- mae(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_kfold
+)
+  
+message("within-sample mae:")
+invisible(str(mae_ws))
+message("psis-loo mae:")
+invisible(str(mae_loo))
+message("kfold mae:")
+invisible(str(mae_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(mae_ws, mae_loo, mae_kfold, "mae_i")
+```
+
+## (Root) mean squared error (`rmse`/`mse`)
+RMSE uses squared loss, has the same units as `y` and has a positive direction 
+("lower is better").
+RMSE evaluates point predictions $\hat\mu_i$ against observed outcomes $y_i$ 
+using squared loss and then takes the square root of the average. It has the 
+same units as $y$ and the direction "lower is better". In our setup, `mupred` 
+is the point prediction per observation, computed as the posterior predictive 
+mean (e.g., via `posterior_epred`) or its PSIS-LOO analogue via `E_loo`. RMSE 
+is the square root of the mean squared error (MSE).
+
+- Pointwise squared error: 
+  $$
+  \text{sqe}_i^{\text{raw}} = (y_i - \hat \mu_i)^2
+  $$
+- Point prediction:
+$$
+\hat \mu_i \approx
+\begin{cases}
+\frac{1}{S}\sum_{s=1}^S \mu_i^{(s)}, & \mu_i^{(s)} = \mathbb{E}[\tilde y_i \mid \theta^{(s)}] \quad \text{(within-sample)}\\[6pt]
+\sum_{s=1}^S \tilde w_i^{(s)}\,\mu_i^{(s)}, & \text{(PSIS-LOO held-out approximation)}
+\end{cases}
+$$
+- Estimates:
+$$
+\text{mse}^{\text{raw}} = \frac{1}{n}\sum_{i=1}^n \text{sqe}_i^{\text{raw}},\quad
+\text{rmse}^{\text{raw}} = \sqrt{\text{mse}^{\text{raw}}}
+$$
+- Standard Errors:
+  - For MSE:
+    $$
+    se(\text{mse}^{\text{raw}}) =
+    \sqrt{\frac{\widehat{\text{Var}}\big(\text{sqe}_i^{\text{raw}}\big)}{n}}
+    $$
+  - For RMSE (delta method):
+    $$
+    se(\text{rmse}^{\text{raw}}) \approx \frac{se(\text{mse}^{\text{raw}})}{2\,\text{rmse}^{\text{raw}}}
+    $$
+    If $\text{rmse} = 0$, set $se(\text{rmse}) = 0$.
+
+```{r}
+rmse_ws <- rmse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred
+)
+rmse_loo <- rmse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_loo
+)
+rmse_kfold <- rmse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_kfold
+)
+
+message("within-sample rmse:")
+invisible(str(rmse_ws))
+message("psis-loo rmse:")
+invisible(str(rmse_loo))
+message("kfold rmse:")
+invisible(str(rmse_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(rmse_ws, rmse_loo, rmse_kfold, "squared-error_i", show_estimate = FALSE)
+```
+
+```{r}
+mse_ws <- mse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred
+)
+mse_loo <- mse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_loo
+)
+mse_kfold <- mse(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_kfold
+)
+  
+message("within-sample mse:")
+invisible(str(mse_ws))
+message("psis-loo mse:")
+invisible(str(mse_loo))
+message("kfold mse:")
+invisible(str(mse_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(mse_ws, mse_loo, mse_kfold, "squared-error_i")
+```
+
+## Coefficient of determination (`r2`)
+R2 summarizes how well point predictions $\hat\mu_i$ explain the variation in 
+observed outcomes $y_i$. We use the same point predictions, $\hat\mu_i$, as in 
+the mae/rmse sections (i.e., the posterior predictive mean per observation). We 
+define a predictive R2 compatible with cross-validation, not Bayesian-`R^2` 
+based on a model’s residual variance parameter.
+
+- Definition:
+  $$
+  R^2 = 1 - \frac{\sum_{i=1}^n (y_i - \hat y_i)^2}{\sum_{i=1}^n (y_i - \bar y)^2},\quad \bar y = \frac{1}{n}\sum_{i=1}^n y_i
+  $$
+  with $\hat y_i = \hat \mu_i$ formed as within-sample means, PSIS-LOO 
+  reweighted means, or K-fold held-out means.
+- Equivalently in mean form:
+  $$
+  \widehat{\text{mse}} = \frac{1}{n}\sum_i (y_i - \hat y_i)^2,\quad
+  \widehat{\text{mse}}(y) = \frac{1}{n}\sum_i (y_i - \bar y)^2,\quad
+  \widehat{R}^2 = 1 - \frac{\widehat{\text{mse}}}{\widehat{\text{mse}}(y)}
+  $$
+- Standard error (first‑order delta method):
+  $$
+  se(\widehat{R}^2) = \left[
+  se(\widehat{\text{mse}})^2
+  - 2\,\frac{\widehat{\text{mse}}}{\widehat{\text{mse}}(y)}\,\frac{\widehat{\text{Cov}}\big((y_i-\hat y_i)^2,\,(y_i-\bar y)^2\big)}{n}
+  + \frac{\widehat{\text{mse}}^2}{\widehat{\text{mse}}(y)^2}\,\frac{\widehat{\text{Var}}\big((y_i-\bar y)^2\big)}{n}
+  \right]^{1/2}\cdot \frac{1}{\widehat{\text{mse}}(y)}
+  $$
+  where $\widehat{\text{mse}} = \frac{1}{n}\sum_{i=1}^n \text{sqe}_i$ and $\widehat{\text{mse}}(y) = \frac{1}{n}\sum_{i=1}^n (y_i - \bar y)^2$.
+
+```{r}
+r2_ws <- r2(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred
+)
+r2_loo <- r2(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_loo
+)
+r2_kfold <- r2(
+  y = res_sleep$y, 
+  mupred = res_sleep$mupred_kfold
+)
+
+message("within-sample r2:")
+invisible(str(r2_ws))
+message("psis-loo r2:")
+invisible(str(r2_loo))
+message("kfold r2:")
+invisible(str(r2_kfold))
+```
+
+```{r, fig.height=3, fig.width=7, fig.align="center"}
+plot_ptw(r2_ws, r2_loo, r2_kfold, "squared-error_i", show_estimate = FALSE)
+```
\ No newline at end of file