Last active
January 15, 2026 14:05
-
-
Save GiulSposito/c44d4004b091796cbc3e4dd1d95f9d91 to your computer and use it in GitHub Desktop.
Registro Civil por mês...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(httr2) | |
| library(tibble) | |
| library(dplyr) | |
| library(readr) | |
| `%||%` <- function(x, y) if (!is.null(x)) x else y | |
| # ---- helper: extrai o valor de um cookie do header Set-Cookie | |
| extract_cookie_from_set_cookie <- function(set_cookie_vec, cookie_name) { | |
| # set_cookie_vec pode ser um vector com várias linhas | |
| if (length(set_cookie_vec) == 0 || all(is.na(set_cookie_vec))) return(NULL) | |
| # tenta achar a linha que começa com "COOKIE=" | |
| idx <- which(grepl(paste0("^", cookie_name, "="), set_cookie_vec)) | |
| if (length(idx) == 0) return(NULL) | |
| line <- set_cookie_vec[idx[1]] | |
| # pega tudo entre "COOKIE=" e o primeiro ";" | |
| sub(paste0("^", cookie_name, "=([^;]*).*"), "\\1", line) | |
| } | |
| registrocivil_start_session <- function( | |
| landing_url = "https://transparencia.registrocivil.org.br/inicio", | |
| user_agent = NULL | |
| ) { | |
| ua <- user_agent %||% paste0( | |
| "Mozilla/5.0 (", Sys.info()[["sysname"]], ") R/", getRversion() | |
| ) | |
| jar <- tempfile(fileext = ".txt") | |
| resp <- request(landing_url) |> | |
| req_headers( | |
| "accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| "user-agent" = ua | |
| ) |> | |
| req_options(cookiejar = jar, cookiefile = jar) |> | |
| req_perform() | |
| # Set-Cookie pode vir como múltiplos valores | |
| hdrs <- resp_headers(resp) | |
| set_cookie <- hdrs[["set-cookie"]] | |
| # dependendo do server, isso pode vir como string única; normaliza pra vector | |
| if (!is.null(set_cookie) && length(set_cookie) == 1) { | |
| # alguns servidores juntam cookies em uma string; tentar dividir em ", " pode ser perigoso | |
| # aqui mantemos como 1 linha e ainda funciona se começar com o cookie procurado | |
| set_cookie_vec <- set_cookie | |
| } else { | |
| set_cookie_vec <- set_cookie | |
| } | |
| xsrf_cookie <- extract_cookie_from_set_cookie(set_cookie_vec, "XSRF-TOKEN") | |
| session_cookie <- extract_cookie_from_set_cookie(set_cookie_vec, "_session") | |
| x_xsrf_token <- if (!is.null(xsrf_cookie) && nzchar(xsrf_cookie)) { | |
| utils::URLdecode(xsrf_cookie) | |
| } else { | |
| NULL | |
| } | |
| # Opcional: extrair meta csrf-token do HTML (se pacotes existirem) | |
| x_csrf_token <- NULL | |
| html_txt <- tryCatch(resp_body_string(resp), error = function(e) NULL) | |
| if (!is.null(html_txt) && | |
| requireNamespace("xml2", quietly = TRUE) && | |
| requireNamespace("rvest", quietly = TRUE)) { | |
| doc <- tryCatch(xml2::read_html(html_txt), error = function(e) NULL) | |
| if (!is.null(doc)) { | |
| node <- rvest::html_element(doc, 'meta[name="csrf-token"]') | |
| val <- rvest::html_attr(node, "content") | |
| if (!is.na(val) && nzchar(val)) x_csrf_token <- val | |
| } | |
| } | |
| list( | |
| cookiejar = jar, | |
| xsrf_cookie = xsrf_cookie, | |
| session_cookie = session_cookie, | |
| x_xsrf_token = x_xsrf_token, | |
| x_csrf_token = x_csrf_token, | |
| user_agent = ua | |
| ) | |
| } | |
| registrocivil_all_name2 <- function( | |
| session, | |
| start_date, | |
| end_date, | |
| translate = 1, | |
| base_url = "https://transparencia.registrocivil.org.br/api/record/all-name", | |
| ..., | |
| fail_fast = TRUE | |
| ) { | |
| req <- request(base_url) |> | |
| req_url_query( | |
| start_date = start_date, | |
| end_date = end_date, | |
| translate = translate, | |
| ... | |
| ) |> | |
| req_headers( | |
| "accept" = "application/json, text/plain, */*", | |
| "x-requested-with" = "XMLHttpRequest", | |
| "referer" = "https://transparencia.registrocivil.org.br/inicio", | |
| "user-agent" = session$user_agent | |
| ) |> | |
| # reaplica cookies automaticamente | |
| req_options(cookiejar = session$cookiejar, cookiefile = session$cookiejar) | |
| if (!is.null(session$x_xsrf_token)) req <- req |> req_headers("x-xsrf-token" = session$x_xsrf_token) | |
| if (!is.null(session$x_csrf_token)) req <- req |> req_headers("x-csrf-token" = session$x_csrf_token) | |
| resp <- req |> req_perform() | |
| if (fail_fast) resp <- resp |> resp_check_status() | |
| payload <- resp |> resp_body_json(simplifyVector = TRUE) | |
| if (is.null(payload$status) || payload$status != 1) { | |
| stop( | |
| "Resposta inesperada (status != 1). ", | |
| "HTTP: ", resp_status(resp), ". ", | |
| "Corpo (parcial): ", | |
| substr(resp_body_string(resp), 1, 500) | |
| ) | |
| } | |
| as_tibble(payload$data) |> | |
| mutate( | |
| total = parse_integer(as.character(total)), | |
| name = as.character(name) | |
| ) | |
| } | |
| registrocivil_all_name <- function( | |
| start_date, end_date, translate = 1, ..., | |
| landing_url = "https://transparencia.registrocivil.org.br/inicio" | |
| ) { | |
| sess <- registrocivil_start_session(landing_url = landing_url) | |
| registrocivil_all_name2(sess, start_date, end_date, translate = translate, ...) | |
| } | |
| # Exemplo: | |
| df <- registrocivil_all_name("2025-01-01", "2025-01-31", translate = 1) | |
| df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment