Skip to content

Instantly share code, notes, and snippets.

@GiulSposito
Last active January 15, 2026 14:05
Show Gist options
  • Select an option

  • Save GiulSposito/c44d4004b091796cbc3e4dd1d95f9d91 to your computer and use it in GitHub Desktop.

Select an option

Save GiulSposito/c44d4004b091796cbc3e4dd1d95f9d91 to your computer and use it in GitHub Desktop.
Registro Civil por mês...
library(httr2)
library(tibble)
library(dplyr)
library(readr)
`%||%` <- function(x, y) if (!is.null(x)) x else y
# ---- helper: extrai o valor de um cookie do header Set-Cookie
extract_cookie_from_set_cookie <- function(set_cookie_vec, cookie_name) {
# set_cookie_vec pode ser um vector com várias linhas
if (length(set_cookie_vec) == 0 || all(is.na(set_cookie_vec))) return(NULL)
# tenta achar a linha que começa com "COOKIE="
idx <- which(grepl(paste0("^", cookie_name, "="), set_cookie_vec))
if (length(idx) == 0) return(NULL)
line <- set_cookie_vec[idx[1]]
# pega tudo entre "COOKIE=" e o primeiro ";"
sub(paste0("^", cookie_name, "=([^;]*).*"), "\\1", line)
}
registrocivil_start_session <- function(
landing_url = "https://transparencia.registrocivil.org.br/inicio",
user_agent = NULL
) {
ua <- user_agent %||% paste0(
"Mozilla/5.0 (", Sys.info()[["sysname"]], ") R/", getRversion()
)
jar <- tempfile(fileext = ".txt")
resp <- request(landing_url) |>
req_headers(
"accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"user-agent" = ua
) |>
req_options(cookiejar = jar, cookiefile = jar) |>
req_perform()
# Set-Cookie pode vir como múltiplos valores
hdrs <- resp_headers(resp)
set_cookie <- hdrs[["set-cookie"]]
# dependendo do server, isso pode vir como string única; normaliza pra vector
if (!is.null(set_cookie) && length(set_cookie) == 1) {
# alguns servidores juntam cookies em uma string; tentar dividir em ", " pode ser perigoso
# aqui mantemos como 1 linha e ainda funciona se começar com o cookie procurado
set_cookie_vec <- set_cookie
} else {
set_cookie_vec <- set_cookie
}
xsrf_cookie <- extract_cookie_from_set_cookie(set_cookie_vec, "XSRF-TOKEN")
session_cookie <- extract_cookie_from_set_cookie(set_cookie_vec, "_session")
x_xsrf_token <- if (!is.null(xsrf_cookie) && nzchar(xsrf_cookie)) {
utils::URLdecode(xsrf_cookie)
} else {
NULL
}
# Opcional: extrair meta csrf-token do HTML (se pacotes existirem)
x_csrf_token <- NULL
html_txt <- tryCatch(resp_body_string(resp), error = function(e) NULL)
if (!is.null(html_txt) &&
requireNamespace("xml2", quietly = TRUE) &&
requireNamespace("rvest", quietly = TRUE)) {
doc <- tryCatch(xml2::read_html(html_txt), error = function(e) NULL)
if (!is.null(doc)) {
node <- rvest::html_element(doc, 'meta[name="csrf-token"]')
val <- rvest::html_attr(node, "content")
if (!is.na(val) && nzchar(val)) x_csrf_token <- val
}
}
list(
cookiejar = jar,
xsrf_cookie = xsrf_cookie,
session_cookie = session_cookie,
x_xsrf_token = x_xsrf_token,
x_csrf_token = x_csrf_token,
user_agent = ua
)
}
registrocivil_all_name2 <- function(
session,
start_date,
end_date,
translate = 1,
base_url = "https://transparencia.registrocivil.org.br/api/record/all-name",
...,
fail_fast = TRUE
) {
req <- request(base_url) |>
req_url_query(
start_date = start_date,
end_date = end_date,
translate = translate,
...
) |>
req_headers(
"accept" = "application/json, text/plain, */*",
"x-requested-with" = "XMLHttpRequest",
"referer" = "https://transparencia.registrocivil.org.br/inicio",
"user-agent" = session$user_agent
) |>
# reaplica cookies automaticamente
req_options(cookiejar = session$cookiejar, cookiefile = session$cookiejar)
if (!is.null(session$x_xsrf_token)) req <- req |> req_headers("x-xsrf-token" = session$x_xsrf_token)
if (!is.null(session$x_csrf_token)) req <- req |> req_headers("x-csrf-token" = session$x_csrf_token)
resp <- req |> req_perform()
if (fail_fast) resp <- resp |> resp_check_status()
payload <- resp |> resp_body_json(simplifyVector = TRUE)
if (is.null(payload$status) || payload$status != 1) {
stop(
"Resposta inesperada (status != 1). ",
"HTTP: ", resp_status(resp), ". ",
"Corpo (parcial): ",
substr(resp_body_string(resp), 1, 500)
)
}
as_tibble(payload$data) |>
mutate(
total = parse_integer(as.character(total)),
name = as.character(name)
)
}
registrocivil_all_name <- function(
start_date, end_date, translate = 1, ...,
landing_url = "https://transparencia.registrocivil.org.br/inicio"
) {
sess <- registrocivil_start_session(landing_url = landing_url)
registrocivil_all_name2(sess, start_date, end_date, translate = translate, ...)
}
# Exemplo:
df <- registrocivil_all_name("2025-01-01", "2025-01-31", translate = 1)
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment