Skip to content

Instantly share code, notes, and snippets.

@jrosell
Last active September 26, 2025 07:36
Show Gist options
  • Select an option

  • Save jrosell/1f69ea18762785958e947afe13480914 to your computer and use it in GitHub Desktop.

Select an option

Save jrosell/1f69ea18762785958e947afe13480914 to your computer and use it in GitHub Desktop.
Methods for computing Risk Ratio and Risk Difference.
stopifnot(requireNamespace("rlang"))
rlang::check_installed("pak")
pkgs <- rlang::chr(
"tidymodels",
"marginaleffects",
"parameters",
"modelbased",
"dplyr",
"stringr",
"tibble",
"tidyr",
"boot",
"epibasix",
"fixest",
"sandwich",
"emmeans"
)
pak::pak(pkgs)
libs <- ifelse(names(pkgs) == "", pkgs, names(pkgs))
libs <- if (length(libs) == 0) pkgs else libs
lapply(libs, library, quiet = TRUE, character.only = TRUE) |> invisible()
convert_dataframe_2x2 <- \(data, treatment_var, treatment_level, outcome_var) {
if (!treatment_var %in% names(data)) {
stop("Treatment variable not found in data")
}
if (!outcome_var %in% names(data)) {
stop("Outcome variable not found in data")
}
if (!treatment_level %in% data[[treatment_var]]) {
stop("Treatment level not found")
}
counts_matrix <- data |>
mutate(
treatment = ifelse(
.data[[treatment_var]] == treatment_level,
"Experimental",
"Control"
),
outcome = ifelse(.data[[outcome_var]] == 1, "Event", "NoEvent")
) |>
count(treatment, outcome) |>
pivot_wider(
names_from = outcome,
values_from = n
) |>
arrange(match(treatment, c("Experimental", "Control"))) |>
column_to_rownames("treatment") |>
as.matrix()
matrix(
as.numeric(counts_matrix),
nrow = nrow(counts_matrix),
dimnames = dimnames(counts_matrix)
)
}
compute_risks_raw <- function(
data,
treatment_var,
treatment_level,
outcome_var
) {
counts_matrix <- convert_dataframe_2x2(
data,
treatment_var = treatment_var,
treatment_level = treatment_level,
outcome_var = outcome_var
)
epi_measures <- epi2x2(counts_matrix)
epi_measures |>
unclass() |>
enframe() |>
filter(
name %in%
rlang::chr(
"OR",
"OR.CIL",
"OR.CIU",
"RR", # Cohort
"RR.CIL", # Cohort
"RR.CIU", # Cohort
"rdCo", # Cohort
"rdCo.CIL", # Cohort
"rdCo.CIU", # Cohort
# "rdCC", # Case-Control
# "rdCC.CIL", # Case-Control
# "rdCC.CIU" # Case-Control
)
) |>
unnest(value) |>
pivot_wider()
}
compute_risks_bootstraps <- function(
data,
treatment_var,
outcome_var,
treatment_level,
n_boot = 1000,
conf = 0.95
) {
boot_fn <- function(d, i) {
d_boot <- d[i, ]
computed <-
compute_risks_raw(
data = d_boot,
treatment_var = treatment_var,
treatment_level = treatment_level,
outcome_var = outcome_var
) |>
select(OR, RR, rdCo)
res <- as.double(computed)
names(res) <- names(computed)
res
}
set.seed(123)
boot_res <- boot::boot(data, boot_fn, R = n_boot)
alpha <- (1 - conf) / 2
RR <- mean(boot_res$t[, 2])
rdCo <- mean(boot_res$t[, 3])
RR_CI <- quantile(boot_res$t[, 2], probs = c(alpha, 1 - alpha))
rdCo_CI <- quantile(boot_res$t[, 3], probs = c(alpha, 1 - alpha))
tibble(
Method = "bootstraps",
Measure = c("Risk Ratio", "Risk Difference"),
Estimate = c(RR, rdCo),
CI_low = c(RR_CI[1], rdCo_CI[1]),
CI_high = c(RR_CI[2], rdCo_CI[2])
)
}
compute_risks_model_marginal <- function(
model,
treatment_var
) {
# Risk ratio
RR_me <- marginaleffects::avg_comparisons(
model,
variables = treatment_var,
type = "response",
newdata = datagrid(model),
comparison = "ratio"
)
# Risk Difference
RD_me <- marginaleffects::avg_comparisons(
model,
variables = treatment_var,
type = "response",
newdata = datagrid(model),
comparison = "difference"
)
tibble(
Method = "marginal",
Measure = c("Risk Ratio", "Risk Difference"),
Estimate = c(RR_me$estimate, RD_me$estimate),
CI_low = c(RR_me$conf.low, RD_me$conf.low),
CI_high = c(RR_me$conf.high, RD_me$conf.high)
)
}
compute_risks_model <- function(
model,
treatment_var,
method = c("marginal", "adjusted", "mechanistic", "cate"),
newdata = NULL
) {
method <- match.arg(method)
if (method == "marginal") {
return(
compute_risks_model_marginal(model, treatment_var = treatment_var)
)
} else if (method == "adjusted") {
newdata <- model$model
} else if (method == "mechanistic") {
if (is.null(newdata)) stop("Must supply newdata for mechanistic mode")
}
if (method == "cate") {
# Risk Ratio
RR_me <- marginaleffects::comparisons(
model,
variables = treatment_var,
type = "response",
newdata = NULL,
comparison = "ratio"
)
# Risk Difference
RD_me <- marginaleffects::comparisons(
model,
variables = treatment_var,
type = "response",
newdata = NULL,
comparison = "difference"
)
return(tibble::tibble(
Method = method,
Measure = c("Risk Ratio", "Risk Difference"),
Estimate = c(
mean(RR_me$estimate),
mean(RD_me$estimate)
),
CI_low = c(
mean(RR_me$conf.low),
mean(RD_me$conf.low)
),
CI_high = c(
mean(RR_me$conf.high),
mean(RD_me$conf.high)
)
))
}
# Risk Ratio
RR_me <- marginaleffects::avg_comparisons(
model,
variables = treatment_var,
type = "response",
newdata = newdata,
comparison = "ratio"
)
# Risk Difference
RD_me <- marginaleffects::avg_comparisons(
model,
variables = treatment_var,
type = "response",
newdata = newdata,
comparison = "difference"
)
tibble::tibble(
Method = method,
Measure = c("Risk Ratio", "Risk Difference"),
Estimate = c(RR_me$estimate, RD_me$estimate),
CI_low = c(RR_me$conf.low, RD_me$conf.low),
CI_high = c(RR_me$conf.high, RD_me$conf.high)
)
}
## Run
set.seed(123)
n <- 1000
data <-
tibble(
treatment = sample(c("yes", "no"), n, replace = TRUE),
covariate = rnorm(n),
age = round(rnorm(n, mean = 60, sd = 4)),
hospital = sample(c("A", "B", "C"), n, replace = TRUE),
visits = rpois(n, lambda = 5),
outcome = rbinom(
n,
size = 1,
prob = plogis(-1 + 0.5 * (treatment == "yes") + 1.5 * covariate) # covariate has strong effect
)
) |>
glimpse()
treatment_var = "treatment"
outcome_var = "outcome"
treatment_level = "yes"
convert_dataframe_2x2(
data,
treatment_var = treatment_var,
treatment_level = treatment_level,
outcome_var = outcome_var
)
compute_risks_bootstraps(
data = data,
treatment_var = treatment_var,
treatment_level = treatment_level,
outcome_var = outcome_var,
n_boot = 100
)
one_level_mod <- glm(
outcome ~ treatment,
data = data,
family = binomial
)
compute_risks_model(
model = one_level_mod,
treatment_var = treatment_var,
method = "marginal"
)
compute_risks_model(
model = one_level_mod,
treatment_var = treatment_var,
method = "adjusted"
)
compute_risks_model(
model = one_level_mod,
treatment_var = treatment_var,
method = "cate"
)
covariates_mod <- glm(
outcome ~ treatment + covariate,
data = data,
family = binomial
)
compute_risks_model(
model = covariates_mod,
treatment_var = treatment_var,
method = "marginal"
)
compute_risks_model(
model = covariates_mod,
treatment_var = treatment_var,
method = "adjusted"
)
compute_risks_model(
model = covariates_mod,
treatment_var = treatment_var,
method = "cate"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment