carbocation/auroc_from_r2.R

## auroc_from_r2.R
#' Compute expected AUROC from R^2 under the liability-threshold model
#'
#' This function implements the closed-form approximation for AUROC given the
#' proportion of variance explained (R^2) on the liability scale and the disease
#' prevalence p. It attempts to follow the liability-threshold/binormal ROC
#' derivation of Wray et al. (2010), *The Genetic Interpretation of Area under
#' the ROC Curve in Genomic Profiling* (PLoS Genetics 6(2): e1000864).
#'
#' In their notation, h2_L (heritability on the liability scale) plays the same
#' mathematical role as the input R^2 here: the variance explained in the latent
#' liability Y by the predictor score S. Under the assumptions that:
#'   - Y ~ N(0,1) is a latent liability with threshold c = qnorm(1-p) so that
#'     P(Y > c) = p (prevalence),
#'   - S is linearly correlated with Y with Corr(S,Y) = sqrt(r2),
#'   - (S,Y) are jointly normal,
#'
#' we write the AUROC as:
#'   AUROC = phi( (μ_pos - μ_neg) / sqrt(varS_pos + varS_neg) ),
#' where μ_pos, μ_neg and varS_pos, varS_neg are the class-conditional means
#' and variances of S under truncation at c, computed via standard truncated
#' normal formulas. This should be equivalent to Equation (3) in Wray et al.
#' (2010) after substituting r2 for h2_L.
#'
#' @param r2 proportion of variance explained on the liability scale (0 ≤ r2 < 1).
#' @param p prevalence of the positive class (0 < p < 1).
#' @return Expected AUROC under the liability-threshold model.
#' @examples
#' auroc_from_r2_p(0.1, 0.05)
#' # Compare with Wray et al. (2010), Eq. 3
auroc_from_r2_p <- function(r2, p) {
  stopifnot(r2 >= 0, r2 < 1, p > 0, p < 1)
  rho <- sqrt(r2)
  c <- qnorm(1 - p)               # cutoff so that P(Y>c)=p
  phi <- dnorm(c); Phi <- pnorm(c)

  # Class-conditional means of S
  mu_pos <- rho * (phi / p)            # E[S | Y>c]
  mu_neg <- rho * (-phi / Phi)         # E[S | Y<=c]

  # Truncated-normal variances of Y in each class
  varY_pos <- 1 + c*(phi/p) - (phi/p)^2
  varY_neg <- 1 - c*(phi/Phi) - (phi/Phi)^2

  # Class-conditional variances of S
  varS_pos <- (1 - r2) + r2 * varY_pos
  varS_neg <- (1 - r2) + r2 * varY_neg

  # AUROC = P(S_pos > S_neg) for independent draws
  z <- (mu_pos - mu_neg) / sqrt(varS_pos + varS_neg)
  pnorm(z)
}
	#' Compute expected AUROC from R^2 under the liability-threshold model
	#'
	#' This function implements the closed-form approximation for AUROC given the
	#' proportion of variance explained (R^2) on the liability scale and the disease
	#' prevalence p. It attempts to follow the liability-threshold/binormal ROC
	#' derivation of Wray et al. (2010), *The Genetic Interpretation of Area under
	#' the ROC Curve in Genomic Profiling* (PLoS Genetics 6(2): e1000864).
	#'
	#' In their notation, h2_L (heritability on the liability scale) plays the same
	#' mathematical role as the input R^2 here: the variance explained in the latent
	#' liability Y by the predictor score S. Under the assumptions that:
	#' - Y ~ N(0,1) is a latent liability with threshold c = qnorm(1-p) so that
	#' P(Y > c) = p (prevalence),
	#' - S is linearly correlated with Y with Corr(S,Y) = sqrt(r2),
	#' - (S,Y) are jointly normal,
	#'
	#' we write the AUROC as:
	#' AUROC = phi( (μ_pos - μ_neg) / sqrt(varS_pos + varS_neg) ),
	#' where μ_pos, μ_neg and varS_pos, varS_neg are the class-conditional means
	#' and variances of S under truncation at c, computed via standard truncated
	#' normal formulas. This should be equivalent to Equation (3) in Wray et al.
	#' (2010) after substituting r2 for h2_L.
	#'
	#' @param r2 proportion of variance explained on the liability scale (0 ≤ r2 < 1).
	#' @param p prevalence of the positive class (0 < p < 1).
	#' @return Expected AUROC under the liability-threshold model.
	#' @examples
	#' auroc_from_r2_p(0.1, 0.05)
	#' # Compare with Wray et al. (2010), Eq. 3
	auroc_from_r2_p <- function(r2, p) {
	stopifnot(r2 >= 0, r2 < 1, p > 0, p < 1)
	rho <- sqrt(r2)
	c <- qnorm(1 - p) # cutoff so that P(Y>c)=p
	phi <- dnorm(c); Phi <- pnorm(c)

	# Class-conditional means of S
	mu_pos <- rho * (phi / p) # E[S \| Y>c]
	mu_neg <- rho * (-phi / Phi) # E[S \| Y<=c]

	# Truncated-normal variances of Y in each class
	varY_pos <- 1 + c*(phi/p) - (phi/p)^2
	varY_neg <- 1 - c*(phi/Phi) - (phi/Phi)^2

	# Class-conditional variances of S
	varS_pos <- (1 - r2) + r2 * varY_pos
	varS_neg <- (1 - r2) + r2 * varY_neg

	# AUROC = P(S_pos > S_neg) for independent draws
	z <- (mu_pos - mu_neg) / sqrt(varS_pos + varS_neg)
	pnorm(z)
	}
No results found