Skip to content

Instantly share code, notes, and snippets.

@tannenberg
Last active November 30, 2018 14:05
Show Gist options
  • Select an option

  • Save tannenberg/3ab9bc8180c0765870c75202769db066 to your computer and use it in GitHub Desktop.

Select an option

Save tannenberg/3ab9bc8180c0765870c75202769db066 to your computer and use it in GitHub Desktop.
Age-tenure graph for all prime ministers of Sweden (more web-scraping with rvest, then mostly tidy but also some base R)
#webscraping wiki to create a age-tenure graph of Swedish PMs
#inspired by https://gist.github.com/acoppock
library(rvest)
library(tidyverse)
library(stringr)
library(lubridate)
library(ggrepel)
library(stringr)
# lets scrape data from wiki
df <- read_html("https://en.wikipedia.org/wiki/List_of_Prime_Ministers_of_Sweden") %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>%
purrr::pluck(1) %>%
html_table(fill= TRUE)
head(df)
#that went so so...
# remove and rename cols, don't know how do to this "tidy"
df[1] <- NULL
colnames(df) = df[1, ] # the first row will be the header
df = df[-1, ]
names(df)[3] <- "term_start"
names(df)[4] <- "term_end"
# tidy from now one
df <- df %>%
mutate(name = sub("\\(.*", "", `Name(Birth–Death)`),
born = as.numeric(sub("\\D*(\\d{4}).*", "\\1", `Name(Birth–Death)`)),
term_start = str_replace_all(term_start, "\\[1]|\\[2]", ""),
#term_end = str_replace_all(term_end, "\\[1]|\\[2]", ""),
term_start = as.numeric(str_sub(term_start, start = -4)),
term_end = lead(term_start),
term_end = if_else(is.na(term_end), year(today()), term_end),
parti = `Political Party`,
parti = ifelse(name=="Olof Palme", "Social Democrats", parti),
parti = ifelse(parti=="Moderate Party(Alliance for Sweden)" | parti == "General Electoral League",
"Moderate Party", parti),
parti = ifelse(parti=="Freeminded People's Party", "People's Party", parti),
parti = ifelse(parti=="Farmers' League", "Centre Party", parti),
pm_num = 1:length(name)) %>%
select(name, born, term_start, term_end, parti, pm_num) %>%
gather(key, year, term_start, term_end) %>%
mutate(age = year - born,
torch = if_else(key == "term_start", pm_num, as.integer(pm_num + 1))
)
label_df <- df %>% filter(key == "term_start")
cols <- c("Social Democrats" = "#cb181d","Moderate Party" = "#084594","Centre Party" = "#4daf4a",
"People's Party" = "#2b8cbe", "Liberal Coalition Party" = "#7bccc4",
"Independent" = "#807dba", "National Party" = "#6a3d9a")
breaks <- c("Social Democrats", "Moderate Party", "Centre Party", "People's Party",
"Liberal Coalition Party", "Independent", "National Party")
#labs <- c("Social Democrats" = "Socialdemokraterna","Moderate Party" = "Moderaterna",
# "Centre Party" = "Centerpartiet", "People's Party" = "Folkpartiet",
# "Liberal Coalition Party" = "Liberala Samlingspartiet", "Independent" = "Partilös",
# "National Party" = "Nationella partiet")
ggplot(df, aes(x = year, y = age, group = pm_num)) +
geom_point(aes(color = parti), size = 2) +
geom_line(aes(color = parti), size = 1) +
geom_text_repel(data = label_df, aes(label = name), size = 3, nudge_y = -1) +
scale_color_manual(name ="Party",
values = cols,
breaks = breaks
#labels = labs
) +
geom_line(aes(group = torch), linetype = "dotted", alpha = 0.5) +
theme_classic() +
ylab("Age")+
xlab("Year") +
labs(title = "How old was the Swedish Prime Minister?",
subtitle = "Code at: https://gist.github.com/tannenberg",
caption = "Source: en.wikipedia.org/wiki/List_of_Prime_Ministers_of_Sweden")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment