jrosell/compute-risk-ratio-risk-difference.R

## compute-risk-ratio-risk-difference.R
stopifnot(requireNamespace("rlang"))
rlang::check_installed("pak")
pkgs <- rlang::chr(
  "tidymodels",
  "marginaleffects",
  "parameters",
  "modelbased",
  "dplyr",
  "stringr",
  "tibble",
  "tidyr",
  "boot",
  "epibasix",
  "fixest",
  "sandwich",
  "emmeans"
)
pak::pak(pkgs)
libs <- ifelse(names(pkgs) == "", pkgs, names(pkgs))
libs <- if (length(libs) == 0) pkgs else libs
lapply(libs, library, quiet = TRUE, character.only = TRUE) |> invisible()

convert_dataframe_2x2 <- \(data, treatment_var, treatment_level, outcome_var) {
  if (!treatment_var %in% names(data)) {
    stop("Treatment variable not found in data")
  }
  if (!outcome_var %in% names(data)) {
    stop("Outcome variable not found in data")
  }
  if (!treatment_level %in% data[[treatment_var]]) {
    stop("Treatment level not found")
  }
  counts_matrix <- data |>
    mutate(
      treatment = ifelse(
        .data[[treatment_var]] == treatment_level,
        "Experimental",
        "Control"
      ),
      outcome = ifelse(.data[[outcome_var]] == 1, "Event", "NoEvent")
    ) |>
    count(treatment, outcome) |>
    pivot_wider(
      names_from = outcome,
      values_from = n
    ) |>
    arrange(match(treatment, c("Experimental", "Control"))) |>
    column_to_rownames("treatment") |>
    as.matrix()

  matrix(
    as.numeric(counts_matrix),
    nrow = nrow(counts_matrix),
    dimnames = dimnames(counts_matrix)
  )
}


compute_risks_raw <- function(
  data,
  treatment_var,
  treatment_level,
  outcome_var
) {
  counts_matrix <- convert_dataframe_2x2(
    data,
    treatment_var = treatment_var,
    treatment_level = treatment_level,
    outcome_var = outcome_var
  )
  epi_measures <- epi2x2(counts_matrix)
  epi_measures |>
    unclass() |>
    enframe() |>
    filter(
      name %in%
        rlang::chr(
          "OR",
          "OR.CIL",
          "OR.CIU",
          "RR", # Cohort
          "RR.CIL", # Cohort
          "RR.CIU", # Cohort
          "rdCo", # Cohort
          "rdCo.CIL", # Cohort
          "rdCo.CIU", # Cohort
          # "rdCC", # Case-Control
          # "rdCC.CIL", # Case-Control
          # "rdCC.CIU" # Case-Control
        )
    ) |>
    unnest(value) |>
    pivot_wider()
}


compute_risks_bootstraps <- function(
  data,
  treatment_var,
  outcome_var,
  treatment_level,
  n_boot = 1000,
  conf = 0.95
) {
  boot_fn <- function(d, i) {
    d_boot <- d[i, ]
    computed <-
      compute_risks_raw(
        data = d_boot,
        treatment_var = treatment_var,
        treatment_level = treatment_level,
        outcome_var = outcome_var
      ) |>
      select(OR, RR, rdCo)
    res <- as.double(computed)
    names(res) <- names(computed)
    res
  }
  set.seed(123)
  boot_res <- boot::boot(data, boot_fn, R = n_boot)
  alpha <- (1 - conf) / 2
  RR <- mean(boot_res$t[, 2])
  rdCo <- mean(boot_res$t[, 3])
  RR_CI <- quantile(boot_res$t[, 2], probs = c(alpha, 1 - alpha))
  rdCo_CI <- quantile(boot_res$t[, 3], probs = c(alpha, 1 - alpha))

  tibble(
    Method = "bootstraps",
    Measure = c("Risk Ratio", "Risk Difference"),
    Estimate = c(RR, rdCo),
    CI_low = c(RR_CI[1], rdCo_CI[1]),
    CI_high = c(RR_CI[2], rdCo_CI[2])
  )
}


compute_risks_model_marginal <- function(
  model,
  treatment_var
) {
  # Risk ratio
  RR_me <- marginaleffects::avg_comparisons(
    model,
    variables = treatment_var,
    type = "response",
    newdata = datagrid(model),
    comparison = "ratio"
  )
  # Risk Difference
  RD_me <- marginaleffects::avg_comparisons(
    model,
    variables = treatment_var,
    type = "response",
    newdata = datagrid(model),
    comparison = "difference"
  )
  tibble(
    Method = "marginal",
    Measure = c("Risk Ratio", "Risk Difference"),
    Estimate = c(RR_me$estimate, RD_me$estimate),
    CI_low = c(RR_me$conf.low, RD_me$conf.low),
    CI_high = c(RR_me$conf.high, RD_me$conf.high)
  )
}


compute_risks_model <- function(
  model,
  treatment_var,
  method = c("marginal", "adjusted", "mechanistic", "cate"),
  newdata = NULL
) {
  method <- match.arg(method)
  if (method == "marginal") {
    return(
      compute_risks_model_marginal(model, treatment_var = treatment_var)
    )
  } else if (method == "adjusted") {
    newdata <- model$model
  } else if (method == "mechanistic") {
    if (is.null(newdata)) stop("Must supply newdata for mechanistic mode")
  }
  if (method == "cate") {
    # Risk Ratio
    RR_me <- marginaleffects::comparisons(
      model,
      variables = treatment_var,
      type = "response",
      newdata = NULL,
      comparison = "ratio"
    )

    # Risk Difference
    RD_me <- marginaleffects::comparisons(
      model,
      variables = treatment_var,
      type = "response",
      newdata = NULL,
      comparison = "difference"
    )

    return(tibble::tibble(
      Method = method,
      Measure = c("Risk Ratio", "Risk Difference"),
      Estimate = c(
        mean(RR_me$estimate),
        mean(RD_me$estimate)
      ),
      CI_low = c(
        mean(RR_me$conf.low),
        mean(RD_me$conf.low)
      ),
      CI_high = c(
        mean(RR_me$conf.high),
        mean(RD_me$conf.high)
      )
    ))
  }
  # Risk Ratio
  RR_me <- marginaleffects::avg_comparisons(
    model,
    variables = treatment_var,
    type = "response",
    newdata = newdata,
    comparison = "ratio"
  )

  # Risk Difference
  RD_me <- marginaleffects::avg_comparisons(
    model,
    variables = treatment_var,
    type = "response",
    newdata = newdata,
    comparison = "difference"
  )

  tibble::tibble(
    Method = method,
    Measure = c("Risk Ratio", "Risk Difference"),
    Estimate = c(RR_me$estimate, RD_me$estimate),
    CI_low = c(RR_me$conf.low, RD_me$conf.low),
    CI_high = c(RR_me$conf.high, RD_me$conf.high)
  )
}


## Run

set.seed(123)
n <- 1000
data <-
  tibble(
    treatment = sample(c("yes", "no"), n, replace = TRUE),
    covariate = rnorm(n),
    age = round(rnorm(n, mean = 60, sd = 4)),
    hospital = sample(c("A", "B", "C"), n, replace = TRUE),
    visits = rpois(n, lambda = 5),
    outcome = rbinom(
      n,
      size = 1,
      prob = plogis(-1 + 0.5 * (treatment == "yes") + 1.5 * covariate) # covariate has strong effect
    )
  ) |>
  glimpse()


treatment_var = "treatment"
outcome_var = "outcome"
treatment_level = "yes"


convert_dataframe_2x2(
  data,
  treatment_var = treatment_var,
  treatment_level = treatment_level,
  outcome_var = outcome_var
)


compute_risks_bootstraps(
  data = data,
  treatment_var = treatment_var,
  treatment_level = treatment_level,
  outcome_var = outcome_var,
  n_boot = 100
)

one_level_mod <- glm(
  outcome ~ treatment,
  data = data,
  family = binomial
)

compute_risks_model(
  model = one_level_mod,
  treatment_var = treatment_var,
  method = "marginal"
)


compute_risks_model(
  model = one_level_mod,
  treatment_var = treatment_var,
  method = "adjusted"
)

compute_risks_model(
  model = one_level_mod,
  treatment_var = treatment_var,
  method = "cate"
)


covariates_mod <- glm(
  outcome ~ treatment + covariate,
  data = data,
  family = binomial
)

compute_risks_model(
  model = covariates_mod,
  treatment_var = treatment_var,
  method = "marginal"
)

compute_risks_model(
  model = covariates_mod,
  treatment_var = treatment_var,
  method = "adjusted"
)

compute_risks_model(
  model = covariates_mod,
  treatment_var = treatment_var,
  method = "cate"
)
	stopifnot(requireNamespace("rlang"))
	rlang::check_installed("pak")
	pkgs <- rlang::chr(
	"tidymodels",
	"marginaleffects",
	"parameters",
	"modelbased",
	"dplyr",
	"stringr",
	"tibble",
	"tidyr",
	"boot",
	"epibasix",
	"fixest",
	"sandwich",
	"emmeans"
	)
	pak::pak(pkgs)
	libs <- ifelse(names(pkgs) == "", pkgs, names(pkgs))
	libs <- if (length(libs) == 0) pkgs else libs
	lapply(libs, library, quiet = TRUE, character.only = TRUE) \|> invisible()

	convert_dataframe_2x2 <- \(data, treatment_var, treatment_level, outcome_var) {
	if (!treatment_var %in% names(data)) {
	stop("Treatment variable not found in data")
	}
	if (!outcome_var %in% names(data)) {
	stop("Outcome variable not found in data")
	}
	if (!treatment_level %in% data[[treatment_var]]) {
	stop("Treatment level not found")
	}
	counts_matrix <- data \|>
	mutate(
	treatment = ifelse(
	.data[[treatment_var]] == treatment_level,
	"Experimental",
	"Control"
	),
	outcome = ifelse(.data[[outcome_var]] == 1, "Event", "NoEvent")
	) \|>
	count(treatment, outcome) \|>
	pivot_wider(
	names_from = outcome,
	values_from = n
	) \|>
	arrange(match(treatment, c("Experimental", "Control"))) \|>
	column_to_rownames("treatment") \|>
	as.matrix()

	matrix(
	as.numeric(counts_matrix),
	nrow = nrow(counts_matrix),
	dimnames = dimnames(counts_matrix)
	)
	}


	compute_risks_raw <- function(
	data,
	treatment_var,
	treatment_level,
	outcome_var
	) {
	counts_matrix <- convert_dataframe_2x2(
	data,
	treatment_var = treatment_var,
	treatment_level = treatment_level,
	outcome_var = outcome_var
	)
	epi_measures <- epi2x2(counts_matrix)
	epi_measures \|>
	unclass() \|>
	enframe() \|>
	filter(
	name %in%
	rlang::chr(
	"OR",
	"OR.CIL",
	"OR.CIU",
	"RR", # Cohort
	"RR.CIL", # Cohort
	"RR.CIU", # Cohort
	"rdCo", # Cohort
	"rdCo.CIL", # Cohort
	"rdCo.CIU", # Cohort
	# "rdCC", # Case-Control
	# "rdCC.CIL", # Case-Control
	# "rdCC.CIU" # Case-Control
	)
	) \|>
	unnest(value) \|>
	pivot_wider()
	}


	compute_risks_bootstraps <- function(
	data,
	treatment_var,
	outcome_var,
	treatment_level,
	n_boot = 1000,
	conf = 0.95
	) {
	boot_fn <- function(d, i) {
	d_boot <- d[i, ]
	computed <-
	compute_risks_raw(
	data = d_boot,
	treatment_var = treatment_var,
	treatment_level = treatment_level,
	outcome_var = outcome_var
	) \|>
	select(OR, RR, rdCo)
	res <- as.double(computed)
	names(res) <- names(computed)
	res
	}
	set.seed(123)
	boot_res <- boot::boot(data, boot_fn, R = n_boot)
	alpha <- (1 - conf) / 2
	RR <- mean(boot_res$t[, 2])
	rdCo <- mean(boot_res$t[, 3])
	RR_CI <- quantile(boot_res$t[, 2], probs = c(alpha, 1 - alpha))
	rdCo_CI <- quantile(boot_res$t[, 3], probs = c(alpha, 1 - alpha))

	tibble(
	Method = "bootstraps",
	Measure = c("Risk Ratio", "Risk Difference"),
	Estimate = c(RR, rdCo),
	CI_low = c(RR_CI[1], rdCo_CI[1]),
	CI_high = c(RR_CI[2], rdCo_CI[2])
	)
	}


	compute_risks_model_marginal <- function(
	model,
	treatment_var
	) {
	# Risk ratio
	RR_me <- marginaleffects::avg_comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = datagrid(model),
	comparison = "ratio"
	)
	# Risk Difference
	RD_me <- marginaleffects::avg_comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = datagrid(model),
	comparison = "difference"
	)
	tibble(
	Method = "marginal",
	Measure = c("Risk Ratio", "Risk Difference"),
	Estimate = c(RR_me$estimate, RD_me$estimate),
	CI_low = c(RR_me$conf.low, RD_me$conf.low),
	CI_high = c(RR_me$conf.high, RD_me$conf.high)
	)
	}


	compute_risks_model <- function(
	model,
	treatment_var,
	method = c("marginal", "adjusted", "mechanistic", "cate"),
	newdata = NULL
	) {
	method <- match.arg(method)
	if (method == "marginal") {
	return(
	compute_risks_model_marginal(model, treatment_var = treatment_var)
	)
	} else if (method == "adjusted") {
	newdata <- model$model
	} else if (method == "mechanistic") {
	if (is.null(newdata)) stop("Must supply newdata for mechanistic mode")
	}
	if (method == "cate") {
	# Risk Ratio
	RR_me <- marginaleffects::comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = NULL,
	comparison = "ratio"
	)

	# Risk Difference
	RD_me <- marginaleffects::comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = NULL,
	comparison = "difference"
	)

	return(tibble::tibble(
	Method = method,
	Measure = c("Risk Ratio", "Risk Difference"),
	Estimate = c(
	mean(RR_me$estimate),
	mean(RD_me$estimate)
	),
	CI_low = c(
	mean(RR_me$conf.low),
	mean(RD_me$conf.low)
	),
	CI_high = c(
	mean(RR_me$conf.high),
	mean(RD_me$conf.high)
	)
	))
	}
	# Risk Ratio
	RR_me <- marginaleffects::avg_comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = newdata,
	comparison = "ratio"
	)

	# Risk Difference
	RD_me <- marginaleffects::avg_comparisons(
	model,
	variables = treatment_var,
	type = "response",
	newdata = newdata,
	comparison = "difference"
	)

	tibble::tibble(
	Method = method,
	Measure = c("Risk Ratio", "Risk Difference"),
	Estimate = c(RR_me$estimate, RD_me$estimate),
	CI_low = c(RR_me$conf.low, RD_me$conf.low),
	CI_high = c(RR_me$conf.high, RD_me$conf.high)
	)
	}


	## Run

	set.seed(123)
	n <- 1000
	data <-
	tibble(
	treatment = sample(c("yes", "no"), n, replace = TRUE),
	covariate = rnorm(n),
	age = round(rnorm(n, mean = 60, sd = 4)),
	hospital = sample(c("A", "B", "C"), n, replace = TRUE),
	visits = rpois(n, lambda = 5),
	outcome = rbinom(
	n,
	size = 1,
	prob = plogis(-1 + 0.5 * (treatment == "yes") + 1.5 * covariate) # covariate has strong effect
	)
	) \|>
	glimpse()


	treatment_var = "treatment"
	outcome_var = "outcome"
	treatment_level = "yes"


	convert_dataframe_2x2(
	data,
	treatment_var = treatment_var,
	treatment_level = treatment_level,
	outcome_var = outcome_var
	)


	compute_risks_bootstraps(
	data = data,
	treatment_var = treatment_var,
	treatment_level = treatment_level,
	outcome_var = outcome_var,
	n_boot = 100
	)

	one_level_mod <- glm(
	outcome ~ treatment,
	data = data,
	family = binomial
	)

	compute_risks_model(
	model = one_level_mod,
	treatment_var = treatment_var,
	method = "marginal"
	)


	compute_risks_model(
	model = one_level_mod,
	treatment_var = treatment_var,
	method = "adjusted"
	)

	compute_risks_model(
	model = one_level_mod,
	treatment_var = treatment_var,
	method = "cate"
	)


	covariates_mod <- glm(
	outcome ~ treatment + covariate,
	data = data,
	family = binomial
	)

	compute_risks_model(
	model = covariates_mod,
	treatment_var = treatment_var,
	method = "marginal"
	)

	compute_risks_model(
	model = covariates_mod,
	treatment_var = treatment_var,
	method = "adjusted"
	)

	compute_risks_model(
	model = covariates_mod,
	treatment_var = treatment_var,
	method = "cate"
	)
No results found