migurski/logit-shift-terse.py

## logit-shift-terse.py
import numpy as np
from scipy.optimize import root_scalar
from scipy.special import expit  # inverse logit

def logit_shift_baseline(df, ndv_col, nrv_col, target=0.5):
    ndv = df[ndv_col].values
    nrv = df[nrv_col].values
    turn = ndv + nrv

    # Compute log-odds where turnout > 0
    ldvs = np.where(turn > 0, np.log(ndv) - np.log(nrv), 0)

    # Find shift
    def objective(shift):
        return np.average(expit(ldvs + shift), weights=turn) - target

    result = root_scalar(objective, bracket=[-1, 1])
    shift = result.root

    # Apply shift
    ndv_new = turn * expit(ldvs + shift)
    nrv_new = turn - ndv_new

    df[ndv_col] = ndv_new
    df[nrv_col] = nrv_new
    return df

## logit-shift-verbose.py
import numpy as np
from scipy.optimize import root_scalar
from scipy.special import expit  # inverse logit (logistic function)

def logit_shift_baseline(dataframe, dem_votes_col, rep_votes_col, target_vote_share=0.5):
    """
    Adjust baseline election data to hit a target Democratic vote share using logit shift.

    Args:
        dataframe: DataFrame containing vote columns
        dem_votes_col: Column name for Democratic votes
        rep_votes_col: Column name for Republican votes
        target_vote_share: Target Democratic vote share (default 0.5 for 50%)

    Returns:
        DataFrame with adjusted vote columns
    """
    dem_votes = dataframe[dem_votes_col].values
    rep_votes = dataframe[rep_votes_col].values
    total_turnout = dem_votes + rep_votes

    # Compute log-odds (logit) of Democratic vote share for each precinct
    # log(D/R) = log(D) - log(R) = logit(D/(D+R))
    # Only calculate where turnout > 0 to avoid log(0)
    log_odds_dem = np.where(total_turnout > 0,
                             np.log(dem_votes) - np.log(rep_votes),
                             0)

    # Find the shift amount that makes weighted average vote share = target
    def objective_function(shift_amount):
        # Apply shift to log-odds
        shifted_log_odds = log_odds_dem + shift_amount

        # Convert back to vote shares using inverse logit
        shifted_vote_shares = expit(shifted_log_odds)

        # Calculate weighted average (larger precincts weighted more)
        weighted_avg_vote_share = np.average(shifted_vote_shares, weights=total_turnout)

        # Return difference from target (solver finds where this = 0)
        return weighted_avg_vote_share - target_vote_share

    # Solve for the shift amount in the range [-1, 1]
    optimization_result = root_scalar(objective_function, bracket=[-1, 1])
    optimal_shift = optimization_result.root

    # Apply the optimal shift to get new vote counts
    shifted_log_odds = log_odds_dem + optimal_shift
    shifted_vote_shares = expit(shifted_log_odds)

    adjusted_dem_votes = total_turnout * shifted_vote_shares
    adjusted_rep_votes = total_turnout - adjusted_dem_votes

    # Update the dataframe
    dataframe[dem_votes_col] = adjusted_dem_votes
    dataframe[rep_votes_col] = adjusted_rep_votes

    return dataframe

## logit-shift.r
#' Logit Shift Baseline Data
#'
#' @param d_baseline baseline data containing vote columns
#' @param ndv Unquoted Democratic vote column name
#' @param nrv Unquoted Republican vote column name
#' @param target target to logit shift to
#' @param tol
#'
#' @returns a data frame with adjusted vote columns
#' @export
#'
#' @examples
#' # TODO
logit_shift_baseline <- function(d_baseline, ndv, nrv,
                                 target = 0.5,
                                 tol = sqrt(.Machine$double.eps)) {
  if (missing(ndv) || missing(nrv)) {
    cli::cli_abort('Both {.arg ndv} and {.arg nrv} must be provided.')
  }
  ndv_q <- rlang::enquo(ndv)
  nrv_q <- rlang::enquo(nrv)

  ndv_vec <- dplyr::pull(d_baseline, !!ndv_q)
  nrv_vec <- dplyr::pull(d_baseline, !!nrv_q)

  turn <- ndv_vec + nrv_vec

  if (sum(turn) == 0) {
    return(d_baseline)
  }

  ldvs <- dplyr::if_else(turn > 0, log(ndv_vec) - log(nrv_vec), 0)

  res <- uniroot(function(shift) {
    stats::weighted.mean(plogis(ldvs + shift), turn) - target
  }, c(-1, 1), tol = tol)

  ldvs <- ldvs + res$root

  ndv_new <- turn * plogis(ldvs)
  nrv_new <- turn - ndv_new

  dplyr::mutate(
    d_baseline,
    !!rlang::as_name(ndv_q) := ndv_new,
    !!rlang::as_name(nrv_q) := nrv_new
  )
}
	import numpy as np
	from scipy.optimize import root_scalar
	from scipy.special import expit # inverse logit

	def logit_shift_baseline(df, ndv_col, nrv_col, target=0.5):
	ndv = df[ndv_col].values
	nrv = df[nrv_col].values
	turn = ndv + nrv

	# Compute log-odds where turnout > 0
	ldvs = np.where(turn > 0, np.log(ndv) - np.log(nrv), 0)

	# Find shift
	def objective(shift):
	return np.average(expit(ldvs + shift), weights=turn) - target

	result = root_scalar(objective, bracket=[-1, 1])
	shift = result.root

	# Apply shift
	ndv_new = turn * expit(ldvs + shift)
	nrv_new = turn - ndv_new

	df[ndv_col] = ndv_new
	df[nrv_col] = nrv_new
	return df
	#' Logit Shift Baseline Data
	#'
	#' @param d_baseline baseline data containing vote columns
	#' @param ndv Unquoted Democratic vote column name
	#' @param nrv Unquoted Republican vote column name
	#' @param target target to logit shift to
	#' @param tol
	#'
	#' @returns a data frame with adjusted vote columns
	#' @export
	#'
	#' @examples
	#' # TODO
	logit_shift_baseline <- function(d_baseline, ndv, nrv,
	target = 0.5,
	tol = sqrt(.Machine$double.eps)) {
	if (missing(ndv) \|\| missing(nrv)) {
	cli::cli_abort('Both {.arg ndv} and {.arg nrv} must be provided.')
	}
	ndv_q <- rlang::enquo(ndv)
	nrv_q <- rlang::enquo(nrv)

	ndv_vec <- dplyr::pull(d_baseline, !!ndv_q)
	nrv_vec <- dplyr::pull(d_baseline, !!nrv_q)

	turn <- ndv_vec + nrv_vec

	if (sum(turn) == 0) {
	return(d_baseline)
	}

	ldvs <- dplyr::if_else(turn > 0, log(ndv_vec) - log(nrv_vec), 0)

	res <- uniroot(function(shift) {
	stats::weighted.mean(plogis(ldvs + shift), turn) - target
	}, c(-1, 1), tol = tol)

	ldvs <- ldvs + res$root

	ndv_new <- turn * plogis(ldvs)
	nrv_new <- turn - ndv_new

	dplyr::mutate(
	d_baseline,
	!!rlang::as_name(ndv_q) := ndv_new,
	!!rlang::as_name(nrv_q) := nrv_new
	)
	}