Skip to content

Instantly share code, notes, and snippets.

@gabrielzanlorenssi
Created November 5, 2024 18:23
Show Gist options
  • Select an option

  • Save gabrielzanlorenssi/701d2f58a9eb55eea5582fbf6da4d1f0 to your computer and use it in GitHub Desktop.

Select an option

Save gabrielzanlorenssi/701d2f58a9eb55eea5582fbf6da4d1f0 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(readxl)
library(rvest)
library(httr)
library(xml2)
library(lubridate)
library(sf)
# Scrap API ---------------------------------------------------------------
#---- safe function
get_inmet_api <- function(id) {
url <- paste0("https://apiprevmet3.inmet.gov.br/avisos/rss/", id)
# get content
content <- GET(url)
# info
ninfo <- read_xml(content) %>%
xml_children() %>%
xml_name()
pos <- which(ninfo=="info")
info <- read_xml(content) %>%
xml_child(search=pos)
return(info)
}
safe_get_inmet_api <- safely(.f = get_inmet_api)
#---- ids for scrapping
# 35789:45638
max = 45712
scrap35 <- map(c(35789:36500), safe_get_inmet_api, .progress = TRUE)
scrap36 <- map(c(36501:37500), safe_get_inmet_api, .progress = TRUE)
scrap37 <- map(c(37501:38500), safe_get_inmet_api, .progress = TRUE)
scrap38 <- map(c(38501:39500), safe_get_inmet_api, .progress = TRUE)
scrap39 <- map(c(39501:40500), safe_get_inmet_api, .progress = TRUE)
scrap40 <- map(c(40501:41500), safe_get_inmet_api, .progress = TRUE)
scrap41 <- map(c(41501:42500), safe_get_inmet_api, .progress = TRUE)
scrap42 <- map(c(42501:43500), safe_get_inmet_api, .progress = TRUE)
scrap43 <- map(c(43501:44500), safe_get_inmet_api, .progress = TRUE)
scrap44 <- map(c(44501:max), safe_get_inmet_api, .progress = TRUE)
#--- read info
read_info <- function(info) {
# evento
evento = xml_child(info, search = 3) %>% xml_text()
# urgencia
urgencia = xml_child(info, search = 5) %>% xml_text()
# severidade
severidade = xml_child(info, search = 6) %>% xml_text()
# onset
onset = xml_child(info, search = 8) %>% xml_text()
# expires
expires = xml_child(info, search = 9) %>% xml_text()
# web
web = xml_child(info, search = 14) %>% xml_text()
# municipios
municipios = xml_child(info, search = 19) %>% xml_text()
# area
area = xml_child(info, search = 21) %>% xml_text()
tibble(evento, urgencia, severidade,
onset, expires, web, municipios, area)
}
#---- read all and save
# 35
x <- map_df(scrap35 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap35b.rds")
# 36
x <- map_df(scrap36 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap36b.rds")
# 37
x <- map_df(scrap37 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap37b.rds")
# 38
x <- map_df(scrap38 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap38b.rds")
# 39
x <- map_df(scrap39 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap39b.rds")
# 40
x <- map_df(scrap40 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap40b.rds")
# 41
x <- map_df(scrap41 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap41b.rds")
# 42
x <- map_df(scrap42 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap42b.rds")
# 43
x <- map_df(scrap43 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap43b.rds")
# 44
x <- map_df(scrap44 %>% map("result") %>% compact(), .f=read_info, .progress=TRUE)
write_rds(x, "scrap44b.rds")
# Analysis ----------------------------------------------------------------
#--- read all
data <- map_df(list.files(pattern="scrap"), read_rds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment