Created
February 25, 2026 19:49
-
-
Save MichelNivard/f494813fe21a6cce33d1bb7e2eb55589 to your computer and use it in GitHub Desktop.
Fast block jackknife for OLS (based on LD score regression)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Fast block jackknife for linear regression (the LDSC trick): | |
| # compute block-wise XtX and Xty once, then get all 200 leave-one-block-out fits | |
| # by subtracting 1 block at a time and re-solving. | |
| block_jackknife_lm <- function(X, y, n_blocks = 200L, separators = NULL) { | |
| X <- as.matrix(X) | |
| y <- matrix(as.numeric(y), ncol = 1) | |
| n <- nrow(X); p <- ncol(X) | |
| stopifnot(nrow(y) == n, ncol(y) == 1, p <= n) | |
| if (is.null(separators)) { | |
| separators <- floor(seq(0, n, length.out = n_blocks + 1L)) | |
| separators[length(separators)] <- n | |
| } | |
| n_blocks <- length(separators) - 1L | |
| stopifnot(n_blocks >= 2L, separators[1] == 0L, separators[length(separators)] == n) | |
| # 1) block values: XtY (p-vector) and XtX (p x p) per block | |
| XtY_blk <- matrix(0, n_blocks, p) | |
| XtX_blk <- array(0, dim = c(n_blocks, p, p)) | |
| for (b in seq_len(n_blocks)) { | |
| i0 <- separators[b] + 1L | |
| i1 <- separators[b + 1L] | |
| Xb <- X[i0:i1, , drop = FALSE] | |
| yb <- y[i0:i1, , drop = FALSE] | |
| XtY_blk[b, ] <- drop(t(Xb) %*% yb) | |
| XtX_blk[b, , ] <- t(Xb) %*% Xb | |
| } | |
| # 2) whole-data estimate | |
| XtY_tot <- colSums(XtY_blk) | |
| XtX_tot <- apply(XtX_blk, c(2, 3), sum) | |
| est <- solve(XtX_tot, XtY_tot) # p-vector | |
| # 3) delete-1-block estimates (leave-one-block-out) | |
| delete_vals <- matrix(0, n_blocks, p) | |
| for (b in seq_len(n_blocks)) { | |
| XtY_del <- XtY_tot - XtY_blk[b, ] | |
| XtX_del <- XtX_tot - XtX_blk[b, , ] | |
| delete_vals[b, ] <- solve(XtX_del, XtY_del) | |
| } | |
| # 4) pseudovalues -> jackknife SE | |
| pseudo <- n_blocks * matrix(est, n_blocks, p, byrow = TRUE) - (n_blocks - 1L) * delete_vals | |
| cov_jk <- stats::cov(pseudo) / n_blocks | |
| se_jk <- sqrt(diag(cov_jk)) | |
| list( | |
| est = est, | |
| se = se_jk, | |
| cov = cov_jk, | |
| delete_values = delete_vals, | |
| separators = separators | |
| ) | |
| } | |
| # tiny demo | |
| set.seed(1) | |
| n <- 20000; p <- 3 | |
| X <- cbind(1, matrix(rnorm(n * (p - 1)), n, p - 1)) | |
| beta_true <- c(0.5, -0.2, 0.1) | |
| y <- drop(X %*% beta_true + rnorm(n)) | |
| fit <- block_jackknife_lm(X, y, n_blocks = 200) | |
| cbind(beta_hat = fit$est, jk_se = fit$se) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment