Last active
October 9, 2025 21:25
-
-
Save diamonaj/57c41208cb3d166cf8ed3ec90de63731 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ##################################### | |
| ##################################### | |
| ##################################### | |
| # Run a logistic regression and generate expected values of turnout, holding some predictors constant while others vary. | |
| df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSjMB6H07nhXmgdARkAMkUVWt7QPlzdD-RrDor2g_BSL9vil8V4efJ-iO-nQtZuqPE_klZPi6qNJ9Pw/pub?gid=921072292&single=true&output=csv") | |
| glm2 <- glm(turnout ~ ., data = df, family = binomial) | |
| summary(glm2) | |
| #white | |
| #age | |
| #educate | |
| #income | |
| #agesqrd | |
| ##################################### | |
| ##################################### | |
| ##################################### | |
| # CODE CELL 2 | |
| newdata_creator <- function(white = NULL, | |
| age = NULL, | |
| educate = NULL, | |
| income = NULL) | |
| { | |
| dataf <- data.frame( | |
| white = white, | |
| age = age, | |
| educate = educate, | |
| income = income, | |
| agesqrd = age^2 / 100 | |
| ) | |
| return(dataf) | |
| } | |
| new_data_thisperson <- newdata_creator(0, | |
| 38, | |
| 12, | |
| 4) | |
| predict(glm2, newdata = new_data_thisperson, type = "response") | |
| #74% | |
| new_data_educatedperson <- newdata_creator(0, | |
| 38, | |
| 20, | |
| 4) | |
| predict(glm2, newdata = new_data_educatedperson, type = "response") | |
| #92% | |
| new_data_lastperson <- newdata_creator(mean(df$white), | |
| 38, | |
| 16, | |
| mean(df$income)) | |
| predict(glm2, newdata = new_data_lastperson, type = "response") | |
| #86.7% | |
| ##################################### | |
| ##################################### | |
| ##################################### | |
| # CODE CELL 3 | |
| ######obtain critical values for 99% confidence interval | |
| ###### wider than the 95% confidence interval | |
| qq <- qnorm(c(0.005, 0.995)) # -2.575829 2.575829 | |
| # -2.575829 2.575829 | |
| new_data_lastperson <- newdata_creator(mean(df$white), | |
| 38, | |
| 16, | |
| mean(df$income)) | |
| # now predict for that person, on the LINEAR SCALE | |
| predict(glm2, newdata = new_data_lastperson, se.fit = TRUE) | |
| ## 1.88 is the point-estimate of the linear logistic prediction | |
| ## 0.038 is the standard error associated with that prediction | |
| ## we need to go 2.57 up and down from 1.88, then apply log f(x) | |
| # THEN CALCULATE THE INTERVAL OF PROBABILITIES using logistic function---the "plogis()" command applies the simple logistic function we learned in class. qq[1] and qq[2] are +/- 2.57 | |
| plogis(1.880385 + qq[1]*0.0389619) # we are going down -2.57* | |
| # lower bound 99% CI for prediction = 0.855700051195481 | |
| plogis(1.880385 + qq[2]*0.0389619) # we are going up +2.57* | |
| # upper bound 99% CI for prediction = 0.878760472302368 | |
| 0.878760472302368 | |
| ###### LOOPING NOW THRU EVERY AGE ################## | |
| storage_vector_undergrad <- matrix(NA, nrow = 2, ncol = 78) | |
| for (age in c(18:95)) { | |
| pr <- predict(glm2, | |
| newdata = newdata_creator(mean(df$white), | |
| age, | |
| 16, | |
| mean(df$income)), | |
| se.fit = TRUE) | |
| storage_vector_undergrad[, age - 17] <- | |
| c(plogis(pr$fit + qq[1]*pr$se.fit), # lower bound | |
| plogis(pr$fit + qq[2]*pr$se.fit)) # upper bound | |
| } | |
| # LET'S LOOK AT THE STORAGE MATRIX | |
| storage_vector_undergrad[,1:5] | |
| ##################################### | |
| ##################################### | |
| ##################################### | |
| ## CODE CELL 4 | |
| storage_vector_undergrad # to see relevant col, it's age - 17 (e.g., 38 - 17) | |
| ### REPEAT NOW FOR PEOPLE WITH ONLY 12 YEARS (HS) EDUCATION | |
| storage_vector_highschool <- matrix(NA, nrow = 2, ncol = 78) | |
| for (age in c(18:95)) { | |
| pr <- predict(glm2, | |
| newdata = newdata_creator( | |
| mean(df$white), | |
| age, | |
| 12, | |
| mean(df$income)), | |
| se.fit = TRUE) | |
| storage_vector_highschool[, age - 17] <- | |
| c(plogis(pr$fit + qq[1]*pr$se.fit), # lower bound | |
| plogis(pr$fit + qq[2]*pr$se.fit)) # upper bound | |
| } | |
| # ONLY CREATE AXES AND LABELS--NO DATA PLOTTED | |
| plot(x = c(1:100), y = c(1:100), type = "n", | |
| xlim = c(18,95), ylim = c(0,1), | |
| main = "Probability of voting by age", | |
| xlab = "Age of Respondent", | |
| ylab = "Probability of Voting") | |
| ##################################### | |
| ##################################### | |
| ##################################### | |
| # CODE CELL 5 | |
| # ACTUALLY PLOT THE LINE SEGMENTS IN THE PLOT | |
| # FIRST, CALL THE PLOT AGAIN | |
| # ONLY CREATE AXES AND LABELS--NO DATA PLOTTED | |
| plot(x = c(1:100), y = c(1:100), type = "n", | |
| xlim = c(18,95), ylim = c(0,1), | |
| main = "Probability of voting by age", | |
| xlab = "Age of Respondent", | |
| ylab = "Probability of Voting") | |
| ## THEN DRAW THE SEGMENTS IN THE PLOT | |
| for (age in 18:95) { | |
| segments( | |
| x0 = age, | |
| y0 = storage_vector_undergrad[1, age - 17], | |
| x1 = age, | |
| y1 = storage_vector_undergrad[2, age - 17], | |
| lwd = 2) | |
| } | |
| for (age in 18:95) { | |
| segments( | |
| x0 = age, | |
| y0 = storage_vector_highschool[1, age - 17], | |
| x1 = age, | |
| y1 = storage_vector_highschool[2, age - 17], | |
| lwd = 2) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment