Skip to content

Instantly share code, notes, and snippets.

@diamonaj
Last active October 9, 2025 21:25
Show Gist options
  • Select an option

  • Save diamonaj/57c41208cb3d166cf8ed3ec90de63731 to your computer and use it in GitHub Desktop.

Select an option

Save diamonaj/57c41208cb3d166cf8ed3ec90de63731 to your computer and use it in GitHub Desktop.
#####################################
#####################################
#####################################
# Run a logistic regression and generate expected values of turnout, holding some predictors constant while others vary.
df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSjMB6H07nhXmgdARkAMkUVWt7QPlzdD-RrDor2g_BSL9vil8V4efJ-iO-nQtZuqPE_klZPi6qNJ9Pw/pub?gid=921072292&single=true&output=csv")
glm2 <- glm(turnout ~ ., data = df, family = binomial)
summary(glm2)
#white
#age
#educate
#income
#agesqrd
#####################################
#####################################
#####################################
# CODE CELL 2
newdata_creator <- function(white = NULL,
age = NULL,
educate = NULL,
income = NULL)
{
dataf <- data.frame(
white = white,
age = age,
educate = educate,
income = income,
agesqrd = age^2 / 100
)
return(dataf)
}
new_data_thisperson <- newdata_creator(0,
38,
12,
4)
predict(glm2, newdata = new_data_thisperson, type = "response")
#74%
new_data_educatedperson <- newdata_creator(0,
38,
20,
4)
predict(glm2, newdata = new_data_educatedperson, type = "response")
#92%
new_data_lastperson <- newdata_creator(mean(df$white),
38,
16,
mean(df$income))
predict(glm2, newdata = new_data_lastperson, type = "response")
#86.7%
#####################################
#####################################
#####################################
# CODE CELL 3
######obtain critical values for 99% confidence interval
###### wider than the 95% confidence interval
qq <- qnorm(c(0.005, 0.995)) # -2.575829 2.575829
# -2.575829 2.575829
new_data_lastperson <- newdata_creator(mean(df$white),
38,
16,
mean(df$income))
# now predict for that person, on the LINEAR SCALE
predict(glm2, newdata = new_data_lastperson, se.fit = TRUE)
## 1.88 is the point-estimate of the linear logistic prediction
## 0.038 is the standard error associated with that prediction
## we need to go 2.57 up and down from 1.88, then apply log f(x)
# THEN CALCULATE THE INTERVAL OF PROBABILITIES using logistic function---the "plogis()" command applies the simple logistic function we learned in class. qq[1] and qq[2] are +/- 2.57
plogis(1.880385 + qq[1]*0.0389619) # we are going down -2.57*
# lower bound 99% CI for prediction = 0.855700051195481
plogis(1.880385 + qq[2]*0.0389619) # we are going up +2.57*
# upper bound 99% CI for prediction = 0.878760472302368
0.878760472302368
###### LOOPING NOW THRU EVERY AGE ##################
storage_vector_undergrad <- matrix(NA, nrow = 2, ncol = 78)
for (age in c(18:95)) {
pr <- predict(glm2,
newdata = newdata_creator(mean(df$white),
age,
16,
mean(df$income)),
se.fit = TRUE)
storage_vector_undergrad[, age - 17] <-
c(plogis(pr$fit + qq[1]*pr$se.fit), # lower bound
plogis(pr$fit + qq[2]*pr$se.fit)) # upper bound
}
# LET'S LOOK AT THE STORAGE MATRIX
storage_vector_undergrad[,1:5]
#####################################
#####################################
#####################################
## CODE CELL 4
storage_vector_undergrad # to see relevant col, it's age - 17 (e.g., 38 - 17)
### REPEAT NOW FOR PEOPLE WITH ONLY 12 YEARS (HS) EDUCATION
storage_vector_highschool <- matrix(NA, nrow = 2, ncol = 78)
for (age in c(18:95)) {
pr <- predict(glm2,
newdata = newdata_creator(
mean(df$white),
age,
12,
mean(df$income)),
se.fit = TRUE)
storage_vector_highschool[, age - 17] <-
c(plogis(pr$fit + qq[1]*pr$se.fit), # lower bound
plogis(pr$fit + qq[2]*pr$se.fit)) # upper bound
}
# ONLY CREATE AXES AND LABELS--NO DATA PLOTTED
plot(x = c(1:100), y = c(1:100), type = "n",
xlim = c(18,95), ylim = c(0,1),
main = "Probability of voting by age",
xlab = "Age of Respondent",
ylab = "Probability of Voting")
#####################################
#####################################
#####################################
# CODE CELL 5
# ACTUALLY PLOT THE LINE SEGMENTS IN THE PLOT
# FIRST, CALL THE PLOT AGAIN
# ONLY CREATE AXES AND LABELS--NO DATA PLOTTED
plot(x = c(1:100), y = c(1:100), type = "n",
xlim = c(18,95), ylim = c(0,1),
main = "Probability of voting by age",
xlab = "Age of Respondent",
ylab = "Probability of Voting")
## THEN DRAW THE SEGMENTS IN THE PLOT
for (age in 18:95) {
segments(
x0 = age,
y0 = storage_vector_undergrad[1, age - 17],
x1 = age,
y1 = storage_vector_undergrad[2, age - 17],
lwd = 2)
}
for (age in 18:95) {
segments(
x0 = age,
y0 = storage_vector_highschool[1, age - 17],
x1 = age,
y1 = storage_vector_highschool[2, age - 17],
lwd = 2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment