Last active
January 7, 2026 00:25
-
-
Save migurski/d5eb6cb498296d2116a1f2ef400336a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from scipy.optimize import root_scalar | |
| from scipy.special import expit # inverse logit | |
| def logit_shift_baseline(df, ndv_col, nrv_col, target=0.5): | |
| ndv = df[ndv_col].values | |
| nrv = df[nrv_col].values | |
| turn = ndv + nrv | |
| # Compute log-odds where turnout > 0 | |
| ldvs = np.where(turn > 0, np.log(ndv) - np.log(nrv), 0) | |
| # Find shift | |
| def objective(shift): | |
| return np.average(expit(ldvs + shift), weights=turn) - target | |
| result = root_scalar(objective, bracket=[-1, 1]) | |
| shift = result.root | |
| # Apply shift | |
| ndv_new = turn * expit(ldvs + shift) | |
| nrv_new = turn - ndv_new | |
| df[ndv_col] = ndv_new | |
| df[nrv_col] = nrv_new | |
| return df |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from scipy.optimize import root_scalar | |
| from scipy.special import expit # inverse logit (logistic function) | |
| def logit_shift_baseline(dataframe, dem_votes_col, rep_votes_col, target_vote_share=0.5): | |
| """ | |
| Adjust baseline election data to hit a target Democratic vote share using logit shift. | |
| Args: | |
| dataframe: DataFrame containing vote columns | |
| dem_votes_col: Column name for Democratic votes | |
| rep_votes_col: Column name for Republican votes | |
| target_vote_share: Target Democratic vote share (default 0.5 for 50%) | |
| Returns: | |
| DataFrame with adjusted vote columns | |
| """ | |
| dem_votes = dataframe[dem_votes_col].values | |
| rep_votes = dataframe[rep_votes_col].values | |
| total_turnout = dem_votes + rep_votes | |
| # Compute log-odds (logit) of Democratic vote share for each precinct | |
| # log(D/R) = log(D) - log(R) = logit(D/(D+R)) | |
| # Only calculate where turnout > 0 to avoid log(0) | |
| log_odds_dem = np.where(total_turnout > 0, | |
| np.log(dem_votes) - np.log(rep_votes), | |
| 0) | |
| # Find the shift amount that makes weighted average vote share = target | |
| def objective_function(shift_amount): | |
| # Apply shift to log-odds | |
| shifted_log_odds = log_odds_dem + shift_amount | |
| # Convert back to vote shares using inverse logit | |
| shifted_vote_shares = expit(shifted_log_odds) | |
| # Calculate weighted average (larger precincts weighted more) | |
| weighted_avg_vote_share = np.average(shifted_vote_shares, weights=total_turnout) | |
| # Return difference from target (solver finds where this = 0) | |
| return weighted_avg_vote_share - target_vote_share | |
| # Solve for the shift amount in the range [-1, 1] | |
| optimization_result = root_scalar(objective_function, bracket=[-1, 1]) | |
| optimal_shift = optimization_result.root | |
| # Apply the optimal shift to get new vote counts | |
| shifted_log_odds = log_odds_dem + optimal_shift | |
| shifted_vote_shares = expit(shifted_log_odds) | |
| adjusted_dem_votes = total_turnout * shifted_vote_shares | |
| adjusted_rep_votes = total_turnout - adjusted_dem_votes | |
| # Update the dataframe | |
| dataframe[dem_votes_col] = adjusted_dem_votes | |
| dataframe[rep_votes_col] = adjusted_rep_votes | |
| return dataframe |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' Logit Shift Baseline Data | |
| #' | |
| #' @param d_baseline baseline data containing vote columns | |
| #' @param ndv Unquoted Democratic vote column name | |
| #' @param nrv Unquoted Republican vote column name | |
| #' @param target target to logit shift to | |
| #' @param tol | |
| #' | |
| #' @returns a data frame with adjusted vote columns | |
| #' @export | |
| #' | |
| #' @examples | |
| #' # TODO | |
| logit_shift_baseline <- function(d_baseline, ndv, nrv, | |
| target = 0.5, | |
| tol = sqrt(.Machine$double.eps)) { | |
| if (missing(ndv) || missing(nrv)) { | |
| cli::cli_abort('Both {.arg ndv} and {.arg nrv} must be provided.') | |
| } | |
| ndv_q <- rlang::enquo(ndv) | |
| nrv_q <- rlang::enquo(nrv) | |
| ndv_vec <- dplyr::pull(d_baseline, !!ndv_q) | |
| nrv_vec <- dplyr::pull(d_baseline, !!nrv_q) | |
| turn <- ndv_vec + nrv_vec | |
| if (sum(turn) == 0) { | |
| return(d_baseline) | |
| } | |
| ldvs <- dplyr::if_else(turn > 0, log(ndv_vec) - log(nrv_vec), 0) | |
| res <- uniroot(function(shift) { | |
| stats::weighted.mean(plogis(ldvs + shift), turn) - target | |
| }, c(-1, 1), tol = tol) | |
| ldvs <- ldvs + res$root | |
| ndv_new <- turn * plogis(ldvs) | |
| nrv_new <- turn - ndv_new | |
| dplyr::mutate( | |
| d_baseline, | |
| !!rlang::as_name(ndv_q) := ndv_new, | |
| !!rlang::as_name(nrv_q) := nrv_new | |
| ) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment