Last active
November 30, 2018 14:05
-
-
Save tannenberg/3ab9bc8180c0765870c75202769db066 to your computer and use it in GitHub Desktop.
Age-tenure graph for all prime ministers of Sweden (more web-scraping with rvest, then mostly tidy but also some base R)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #webscraping wiki to create a age-tenure graph of Swedish PMs | |
| #inspired by https://gist.github.com/acoppock | |
| library(rvest) | |
| library(tidyverse) | |
| library(stringr) | |
| library(lubridate) | |
| library(ggrepel) | |
| library(stringr) | |
| # lets scrape data from wiki | |
| df <- read_html("https://en.wikipedia.org/wiki/List_of_Prime_Ministers_of_Sweden") %>% | |
| html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>% | |
| purrr::pluck(1) %>% | |
| html_table(fill= TRUE) | |
| head(df) | |
| #that went so so... | |
| # remove and rename cols, don't know how do to this "tidy" | |
| df[1] <- NULL | |
| colnames(df) = df[1, ] # the first row will be the header | |
| df = df[-1, ] | |
| names(df)[3] <- "term_start" | |
| names(df)[4] <- "term_end" | |
| # tidy from now one | |
| df <- df %>% | |
| mutate(name = sub("\\(.*", "", `Name(Birth–Death)`), | |
| born = as.numeric(sub("\\D*(\\d{4}).*", "\\1", `Name(Birth–Death)`)), | |
| term_start = str_replace_all(term_start, "\\[1]|\\[2]", ""), | |
| #term_end = str_replace_all(term_end, "\\[1]|\\[2]", ""), | |
| term_start = as.numeric(str_sub(term_start, start = -4)), | |
| term_end = lead(term_start), | |
| term_end = if_else(is.na(term_end), year(today()), term_end), | |
| parti = `Political Party`, | |
| parti = ifelse(name=="Olof Palme", "Social Democrats", parti), | |
| parti = ifelse(parti=="Moderate Party(Alliance for Sweden)" | parti == "General Electoral League", | |
| "Moderate Party", parti), | |
| parti = ifelse(parti=="Freeminded People's Party", "People's Party", parti), | |
| parti = ifelse(parti=="Farmers' League", "Centre Party", parti), | |
| pm_num = 1:length(name)) %>% | |
| select(name, born, term_start, term_end, parti, pm_num) %>% | |
| gather(key, year, term_start, term_end) %>% | |
| mutate(age = year - born, | |
| torch = if_else(key == "term_start", pm_num, as.integer(pm_num + 1)) | |
| ) | |
| label_df <- df %>% filter(key == "term_start") | |
| cols <- c("Social Democrats" = "#cb181d","Moderate Party" = "#084594","Centre Party" = "#4daf4a", | |
| "People's Party" = "#2b8cbe", "Liberal Coalition Party" = "#7bccc4", | |
| "Independent" = "#807dba", "National Party" = "#6a3d9a") | |
| breaks <- c("Social Democrats", "Moderate Party", "Centre Party", "People's Party", | |
| "Liberal Coalition Party", "Independent", "National Party") | |
| #labs <- c("Social Democrats" = "Socialdemokraterna","Moderate Party" = "Moderaterna", | |
| # "Centre Party" = "Centerpartiet", "People's Party" = "Folkpartiet", | |
| # "Liberal Coalition Party" = "Liberala Samlingspartiet", "Independent" = "Partilös", | |
| # "National Party" = "Nationella partiet") | |
| ggplot(df, aes(x = year, y = age, group = pm_num)) + | |
| geom_point(aes(color = parti), size = 2) + | |
| geom_line(aes(color = parti), size = 1) + | |
| geom_text_repel(data = label_df, aes(label = name), size = 3, nudge_y = -1) + | |
| scale_color_manual(name ="Party", | |
| values = cols, | |
| breaks = breaks | |
| #labels = labs | |
| ) + | |
| geom_line(aes(group = torch), linetype = "dotted", alpha = 0.5) + | |
| theme_classic() + | |
| ylab("Age")+ | |
| xlab("Year") + | |
| labs(title = "How old was the Swedish Prime Minister?", | |
| subtitle = "Code at: https://gist.github.com/tannenberg", | |
| caption = "Source: en.wikipedia.org/wiki/List_of_Prime_Ministers_of_Sweden") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment