jimcrozier/stupid , not advisable simple covid lm

## stupid , not advisable simple covid lm
#if needed, uncomment out the follow lines and install R packages:
# install.packages("dplyr")
# install.packages("poliscidata")
# install.packages("covidregionaldata")
library(dplyr)
library(poliscidata)
library(covidregionaldata)
library(plotly)

#get regional covid stats
region <- USA$new(verbose = TRUE, steps = TRUE, get = TRUE)
covid_region_lst = region$return()
#convert to dataframe
covid_region_df = covid_region_lst$raw$state %>%
  mutate(state = trimws(tolower(gsub(" ","",state))))

#just look at what we're dealing with here
ga = covid_region_df %>% filter(state=="georgia") %>% arrange(date)
ga %>% plot_ly(y=~cases, x = ~date, type = "scatter", mode = "line")
ga %>% plot_ly(y=~deaths, x = ~date, type = "scatter", mode = "line")
# so, looks cumulative deaths and cases


#get population to normalize
pop = poliscidata::states %>%
  dplyr::select(state, pop2010) %>%
  mutate(state = trimws(tolower(state)))

#merge in the covid data to the polisci data
covid = covid_region_df %>%
  #probably want to turn this time series into something flat but then
  #again, do your worst
  group_by(state) %>%
  summarise(#just persisting the pop through the summary
            all_deaths = max(deaths), #it's cumulative, so just grabbing max
            all_cases = max(cases)) %>%
  left_join(pop) %>%
  drop_na() %>% #only has 50 states
  mutate(pct_deaths = all_deaths/pop2010,
         pct_cases = all_cases/pop2010)

#now that we have a state level variable, grab all the polisci stuff
poli = poliscidata::states %>%
  mutate(state = trimws(tolower(state)))

#join it all in to our depvar
analytical_df = covid %>%
  left_join(poli)


#stupid regression - probably telling us nothing:
summary(lm(pct_cases ~ obama_win12, data = analytical_df))

# Call:
#   lm(formula = pct_cases ~ obama_win12, data = analytical_df)
#
# Residuals:
#   Min        1Q    Median        3Q       Max
# -0.069344 -0.012815  0.001071  0.015339  0.048739
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
#   (Intercept)     0.120855   0.004934  24.494  < 2e-16 ***
#   obama_win12Yes -0.024475   0.006842  -3.577 0.000806 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.02417 on 48 degrees of freedom
# Multiple R-squared:  0.2105,	Adjusted R-squared:  0.194
# F-statistic:  12.8 on 1 and 48 DF,  p-value: 0.0008058

#another stupid regression that noone should use for anything:
summary(lm(pct_deaths ~ obama_win12, data = analytical_df))

# Call:
#   lm(formula = pct_deaths ~ obama_win12, data = analytical_df)
#
# Residuals:
#   Min         1Q     Median         3Q        Max
# -0.0013562 -0.0003281  0.0001092  0.0003972  0.0012870
#
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)
#   (Intercept)     0.0018631  0.0001310  14.218   <2e-16 ***
#   obama_win12Yes -0.0001367  0.0001817  -0.753    0.455
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.0006419 on 48 degrees of freedom
# Multiple R-squared:  0.01166,	Adjusted R-squared:  -0.00893
# F-statistic: 0.5663 on 1 and 48 DF,  p-value: 0.4554


#lots of other choices, all of the ones I played with told a similar story
names(analytical_df)
# [1] "state"             "pop2010"           "avg_time_pct_cov"  "all_deaths"        "all_cases"         "pct_deaths"        "pct_cases"
# [8] "abort_rank3"       "abortion_rank12"   "adv_or_more"       "ba_or_more"        "cig_tax12"         "cig_tax12_3"       "conserv_advantage"
# [15] "conserv_public"    "dem_advantage"     "govt_worker"       "gun_rank3"         "gun_rank11"        "gun_scale11"       "hr_cons_rank11"
# [22] "hr_conserv11"      "hr_lib_rank11"     "hr_liberal11"      "hs_or_more"        "obama2012"         "obama_win12"       "pop2000"
# [29] "pop2010_hun_thou"  "popchng0010"       "popchngpct"        "pot_policy"        "prochoice"         "prolife"           "relig_cath"
# [36] "relig_prot"        "relig_high"        "relig_low"         "religiosity3"      "romney2012"        "smokers12"         "stateid"
# [43] "to_0812"           "uninsured_pct"     "abort_rate05"      "abort_rate08"      "abortlaw3"         "abortlaw10"        "alcohol"
# [50] "attend_pct"        "battle04"          "blkleg"            "blkpct04"          "blkpct08"          "blkpct10"          "bush00"
# [57] "bush04"            "carfatal"          "carfatal07"        "cig_tax"           "cig_tax_3"         "cigarettes"        "college"
# [64] "conpct_m"          "cons_hr06"         "cons_hr09"         "cook_index"        "cook_index3"       "defexpen"          "demhr11"
# [71] "dem_hr09"          "demnat06"          "dempct_m"          "demstate06"        "demstate09"        "demstate13"        "density"
# [78] "division"          "earmarks_pcap"     "evm"               "evo"               "evo2012"           "evr2012"           "gay_policy"
# [85] "gay_policy2"       "gay_policy_con"    "gay_support"       "gay_support3"      "gb_win00"          "gb_win04"          "gore00"
# [92] "gun_check"         "gun_dealer"        "gun_murder10"      "gun_rank_rev"      "gunlaw_rank"       "gunlaw_rank3_rev"  "gunlaw_scale"
# [99] "hispanic04"        "hispanic08"        "hispanic10"        "indpct_m"          "kerry04"           "libpct_m"          "mccain08"
# [106] "modpct_m"          "nader00"           "obama08"           "obama_win08"       "over64"            "permit"            "pop_18_24"
# [113] "pop_18_24_10"      "prcapinc"          "region"            "relig_import"      "religiosity"       "reppct_m"          "rtw"
# [120] "secularism"        "secularism3"       "seniority_sen2"    "south"             "to_0004"           "to_0408"           "trnout00"
# [127] "trnout04"          "unemploy"          "union04"           "union07"           "union10"           "urban"             "vep00_turnout"
# [134] "vep04_turnout"     "vep08_turnout"     "vep12_turnout"     "womleg_2007"       "womleg_2010"       "womleg_2011"       "womleg_2015"
	#if needed, uncomment out the follow lines and install R packages:
	# install.packages("dplyr")
	# install.packages("poliscidata")
	# install.packages("covidregionaldata")
	library(dplyr)
	library(poliscidata)
	library(covidregionaldata)
	library(plotly)

	#get regional covid stats
	region <- USA$new(verbose = TRUE, steps = TRUE, get = TRUE)
	covid_region_lst = region$return()
	#convert to dataframe
	covid_region_df = covid_region_lst$raw$state %>%
	mutate(state = trimws(tolower(gsub(" ","",state))))

	#just look at what we're dealing with here
	ga = covid_region_df %>% filter(state=="georgia") %>% arrange(date)
	ga %>% plot_ly(y=~cases, x = ~date, type = "scatter", mode = "line")
	ga %>% plot_ly(y=~deaths, x = ~date, type = "scatter", mode = "line")
	# so, looks cumulative deaths and cases


	#get population to normalize
	pop = poliscidata::states %>%
	dplyr::select(state, pop2010) %>%
	mutate(state = trimws(tolower(state)))

	#merge in the covid data to the polisci data
	covid = covid_region_df %>%
	#probably want to turn this time series into something flat but then
	#again, do your worst
	group_by(state) %>%
	summarise(#just persisting the pop through the summary
	all_deaths = max(deaths), #it's cumulative, so just grabbing max
	all_cases = max(cases)) %>%
	left_join(pop) %>%
	drop_na() %>% #only has 50 states
	mutate(pct_deaths = all_deaths/pop2010,
	pct_cases = all_cases/pop2010)

	#now that we have a state level variable, grab all the polisci stuff
	poli = poliscidata::states %>%
	mutate(state = trimws(tolower(state)))

	#join it all in to our depvar
	analytical_df = covid %>%
	left_join(poli)


	#stupid regression - probably telling us nothing:
	summary(lm(pct_cases ~ obama_win12, data = analytical_df))

	# Call:
	# lm(formula = pct_cases ~ obama_win12, data = analytical_df)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -0.069344 -0.012815 0.001071 0.015339 0.048739
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 0.120855 0.004934 24.494 < 2e-16 ***
	# obama_win12Yes -0.024475 0.006842 -3.577 0.000806 ***
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.02417 on 48 degrees of freedom
	# Multiple R-squared: 0.2105, Adjusted R-squared: 0.194
	# F-statistic: 12.8 on 1 and 48 DF, p-value: 0.0008058

	#another stupid regression that noone should use for anything:
	summary(lm(pct_deaths ~ obama_win12, data = analytical_df))

	# Call:
	# lm(formula = pct_deaths ~ obama_win12, data = analytical_df)
	#
	# Residuals:
	# Min 1Q Median 3Q Max
	# -0.0013562 -0.0003281 0.0001092 0.0003972 0.0012870
	#
	# Coefficients:
	# Estimate Std. Error t value Pr(>\|t\|)
	# (Intercept) 0.0018631 0.0001310 14.218 <2e-16 ***
	# obama_win12Yes -0.0001367 0.0001817 -0.753 0.455
	# ---
	# Signif. codes: 0 ‘*’ 0.001 ‘’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
	#
	# Residual standard error: 0.0006419 on 48 degrees of freedom
	# Multiple R-squared: 0.01166, Adjusted R-squared: -0.00893
	# F-statistic: 0.5663 on 1 and 48 DF, p-value: 0.4554


	#lots of other choices, all of the ones I played with told a similar story
	names(analytical_df)
	# [1] "state" "pop2010" "avg_time_pct_cov" "all_deaths" "all_cases" "pct_deaths" "pct_cases"
	# [8] "abort_rank3" "abortion_rank12" "adv_or_more" "ba_or_more" "cig_tax12" "cig_tax12_3" "conserv_advantage"
	# [15] "conserv_public" "dem_advantage" "govt_worker" "gun_rank3" "gun_rank11" "gun_scale11" "hr_cons_rank11"
	# [22] "hr_conserv11" "hr_lib_rank11" "hr_liberal11" "hs_or_more" "obama2012" "obama_win12" "pop2000"
	# [29] "pop2010_hun_thou" "popchng0010" "popchngpct" "pot_policy" "prochoice" "prolife" "relig_cath"
	# [36] "relig_prot" "relig_high" "relig_low" "religiosity3" "romney2012" "smokers12" "stateid"
	# [43] "to_0812" "uninsured_pct" "abort_rate05" "abort_rate08" "abortlaw3" "abortlaw10" "alcohol"
	# [50] "attend_pct" "battle04" "blkleg" "blkpct04" "blkpct08" "blkpct10" "bush00"
	# [57] "bush04" "carfatal" "carfatal07" "cig_tax" "cig_tax_3" "cigarettes" "college"
	# [64] "conpct_m" "cons_hr06" "cons_hr09" "cook_index" "cook_index3" "defexpen" "demhr11"
	# [71] "dem_hr09" "demnat06" "dempct_m" "demstate06" "demstate09" "demstate13" "density"
	# [78] "division" "earmarks_pcap" "evm" "evo" "evo2012" "evr2012" "gay_policy"
	# [85] "gay_policy2" "gay_policy_con" "gay_support" "gay_support3" "gb_win00" "gb_win04" "gore00"
	# [92] "gun_check" "gun_dealer" "gun_murder10" "gun_rank_rev" "gunlaw_rank" "gunlaw_rank3_rev" "gunlaw_scale"
	# [99] "hispanic04" "hispanic08" "hispanic10" "indpct_m" "kerry04" "libpct_m" "mccain08"
	# [106] "modpct_m" "nader00" "obama08" "obama_win08" "over64" "permit" "pop_18_24"
	# [113] "pop_18_24_10" "prcapinc" "region" "relig_import" "religiosity" "reppct_m" "rtw"
	# [120] "secularism" "secularism3" "seniority_sen2" "south" "to_0004" "to_0408" "trnout00"
	# [127] "trnout04" "unemploy" "union04" "union07" "union10" "urban" "vep00_turnout"
	# [134] "vep04_turnout" "vep08_turnout" "vep12_turnout" "womleg_2007" "womleg_2010" "womleg_2011" "womleg_2015"
No results found