Skip to content

Instantly share code, notes, and snippets.

@tannenberg
Last active December 7, 2018 09:57
Show Gist options
  • Select an option

  • Save tannenberg/c94758ab3d3bfe089913b04943cfba21 to your computer and use it in GitHub Desktop.

Select an option

Save tannenberg/c94758ab3d3bfe089913b04943cfba21 to your computer and use it in GitHub Desktop.
First try at web-scraping with rvest to plot age-tenure of the Regents of Sweden
#webscraping livrustkammaren to create a age-tenure graph of Swedish regents
#inspired by https://gist.github.com/acoppock
library(rvest)
library(tidyverse)
library(stringr)
library(lubridate)
library(ggrepel)
library(stringr)
# lets scrape data from regentlängden
name <- read_html("http://livrustkammaren.se/sv/utforska/regentlangden") %>%
html_nodes("h3") %>%
html_text() %>%
str_sub(start = 3) %>%
data.frame(regent = .)
df <- read_html("http://livrustkammaren.se/sv/utforska/regentlangden") %>%
html_nodes("#node-349 p") %>%
html_text() %>%
gsub("[^0-9]", "", .) %>%
data.frame(years = .) %>%
filter(years != "" & years != "23") %>%
mutate(born = as.numeric(str_sub(years, end = 4)),
term_start = as.numeric(str_sub(years, start = 9, end = 12)),
term_end = as.numeric(str_sub(years, start = 13, end = 16)),
term_start = ifelse(is.na(term_start), 1973, term_start), #current king who by not having died ruins the pattern...
term_end = ifelse(is.na(term_end), year(today()), term_end),
regent_num = 1:length(term_start)) %>%
select(-years) %>%
bind_cols(name) %>%
gather(key, year, term_start, term_end) %>%
mutate(age = year - born,
torch = if_else(key == "term_start", regent_num, as.integer(regent_num + 1)))
df_lab <- df %>% filter(key == "term_start")
ggplot(df, aes(x = year, y = age, group = regent)) +
geom_point(aes(color = age), size = 2) +
geom_line(aes(color = age), size = 1) +
geom_text_repel(data = df_lab, aes(label = regent), size = 3, nudge_y = -1, nudge_x = 10) +
geom_line(aes(group = torch), linetype = "dotted", alpha = 0.5) +
scale_color_viridis_c(begin = .5) +
theme_bw() +
ylab("Age")+
xlab("Year") +
theme(legend.position = "none") +
labs(title = "How old was the Regent of Sweden?",
subtitle = "Code at: https://gist.github.com/tannenberg",
caption = "Source: livrustkammaren.se")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment