Skip to content

Instantly share code, notes, and snippets.

@maxwellamaral
Last active November 16, 2025 12:23
Show Gist options
  • Select an option

  • Save maxwellamaral/ea326ca55bce95d66e8f64b3c71d2ff1 to your computer and use it in GitHub Desktop.

Select an option

Save maxwellamaral/ea326ca55bce95d66e8f64b3c71d2ff1 to your computer and use it in GitHub Desktop.
Baixar patentes no Google Patents

Código para baixar as patentes do Google Patents

"""
Script para baixar PDFs de patentes do Google Patents.

Este script percorre uma lista de URLs de patentes do Google Patents,
converte cada página HTML em PDF usando wkhtmltopdf via pdfkit,
e salva os arquivos na pasta 'pdfs' com nomes baseados no código da patente.

Requisitos:
- wkhtmltopdf instalado e configurado.
- Bibliotecas Python: pdfkit, tqdm.

Uso:
    python save_google_patents.py

Autor: [Seu Nome ou Automatizado]
Data: 16 de novembro de 2025
"""

import pdfkit  # Biblioteca para converter HTML para PDF usando wkhtmltopdf
import os  # Para operações do sistema de arquivos
from tqdm import tqdm  # Para exibir barra de progresso

# Configurar o caminho do wkhtmltopdf
# wkhtmltopdf é necessário para a conversão HTML -> PDF
config = pdfkit.configuration(wkhtmltopdf=r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe')

# Lista de URLs das patentes a serem baixadas
# Cada URL aponta para uma página de patente no Google Patents
urls = [
    "https://patents.google.com/patent/JP2004013885A/en",
    "https://patents.google.com/patent/US20110216006A1/en",
    "https://patents.google.com/patent/GB2311888A/en",
    "https://patents.google.com/patent/JP2003150299A/en",
    "https://patents.google.com/patent/US20210286441A1/en",
    "https://patents.google.com/patent/BR102016004580A2/en",
    "https://patents.google.com/patent/US7898527B1/en",
    "https://patents.google.com/patent/KR102553516B1/en",
    "https://patents.google.com/patent/BR102017002857A2/en",
    "https://patents.google.com/patent/US20230102500A1/en",
    "https://patents.google.com/patent/US11919159B1/en",
    "https://patents.google.com/patent/US20240399564A1/en",
    "https://patents.google.com/patent/WO2005089357A2/en",
    "https://patents.google.com/patent/AU2020103820A4/en",
    "https://patents.google.com/patent/US9111545B2/en",
    "https://patents.google.com/patent/WO2023101159A1/en",
    "https://patents.google.com/patent/US7965196B2/en",
    "https://patents.google.com/patent/US8475172B2/en",
    "https://patents.google.com/patent/JP2001075473A/en",
    "https://patents.google.com/patent/US8774878B2/en",
    "https://patents.google.com/patent/US8552983B2/en",
    "https://patents.google.com/patent/US8494859B2/en",
    "https://patents.google.com/patent/US20100109918A1/en",
    "https://patents.google.com/patent/WO2001039375A1/en",
    "https://patents.google.com/patent/US20100145729A1/en",
    "https://patents.google.com/patent/US9318029B2/en",
    "https://patents.google.com/patent/US10601980B1/en",
    "https://patents.google.com/patent/JP2025141216A/en",
    "https://patents.google.com/patent/CN108852620A/en",
    "https://patents.google.com/patent/IT201700014209A1/en",
    "https://patents.google.com/patent/US10395555B2/en",
    "https://patents.google.com/patent/CN105105772B/en",
    "https://patents.google.com/patent/WO2005077092A2/en",
    "https://patents.google.com/patent/US20120178064A1/en",
    "https://patents.google.com/patent/CN208255530U/en",
    "https://patents.google.com/patent/US20130311528A1/en",
    "https://patents.google.com/patent/US7251605B2/en",
    "https://patents.google.com/patent/US20130289970A1/en",
    "https://patents.google.com/patent/DE202020004941U1/en",
    "https://patents.google.com/patent/KR102142033B1/en",
    "https://patents.google.com/patent/CN107358955A/en",
    "https://patents.google.com/patent/US9495351B1/en",
    "https://patents.google.com/patent/KR20150115436A/en",
    "https://patents.google.com/patent/AU2019100545A4/en",
    "https://patents.google.com/patent/US20190051210A1/en",
    "https://patents.google.com/patent/CN207410509U/en",
    "https://patents.google.com/patent/US20060286513A1/en",
    "https://patents.google.com/patent/JP2025042165A/en",
    "https://patents.google.com/patent/RU198673U1/en",
    "https://patents.google.com/patent/KR101897202B1/en",
    "https://patents.google.com/patent/GB2338539A/en",
    "https://patents.google.com/patent/US7155389B2/en",
    "https://patents.google.com/patent/US5486112A/en",
    "https://patents.google.com/patent/US5571020A/en",
    "https://patents.google.com/patent/US5047952A/en",
    "https://patents.google.com/patent/US4520501A/en",
    "https://patents.google.com/patent/US3831296A/en",
    "https://patents.google.com/patent/BR102022003549A2/en",
    "https://patents.google.com/patent/US20050106536A1/en",
    "https://patents.google.com/patent/ES1219550U/en",
    "https://patents.google.com/patent/US20250138554A1/en",
    "https://patents.google.com/patent/US20250201147A1/en",
    "https://patents.google.com/patent/JP2025056787A/en",
    "https://patents.google.com/patent/DE10029483A1/en",
    "https://patents.google.com/patent/IT201600087785A1/en",
    "https://patents.google.com/patent/ES1293499U/en",
    "https://patents.google.com/patent/WO2001024140A1/en",
    "https://patents.google.com/patent/JPH09248315A/en",
    "https://patents.google.com/patent/US20040001734A1/en",
    "https://patents.google.com/patent/JPS59500604A/en",
    "https://patents.google.com/patent/AU2021100994A4/en",
    "https://patents.google.com/patent/KR20230099038A/en",
    "https://patents.google.com/patent/US20040008871A1/en",
    "https://patents.google.com/patent/BR102015026178A2/en",
    "https://patents.google.com/patent/ES1255339U/en",
    "https://patents.google.com/patent/ES1302772U/en",
    "https://patents.google.com/patent/US20250078574A1/en",
    "https://patents.google.com/patent/CA1200011A/en",
    "https://patents.google.com/patent/RU188793U1/en",
    "https://patents.google.com/patent/BR102014028249A2/en",
    "https://patents.google.com/patent/CN107889025A/en",
    "https://patents.google.com/patent/AU2021102464A4/en",
    "https://patents.google.com/patent/CN206574091U/en",
    "https://patents.google.com/patent/BR102018075811A2/en"
]

# Criar a pasta pdfs se não existir
# Garante que o diretório de saída existe antes de salvar os PDFs
os.makedirs('pdfs', exist_ok=True)

# Loop principal: baixar cada patente como PDF
# Usa tqdm para exibir barra de progresso
for url in tqdm(urls, desc="Baixando PDFs"):
    # Extrair o código da patente da URL
    # A URL tem o formato: https://patents.google.com/patent/{codigo}/en
    # O código é a quinta parte quando dividido por '/'
    parts = url.split('/')
    codigo = parts[4]  # índice 4 é o código
    
    # Definir o caminho de saída do PDF
    output_path = f'pdfs/{codigo}.pdf'
    
    try:
        # Baixar e converter a página HTML para PDF
        # pdfkit.from_url baixa o HTML da URL e o converte para PDF
        pdfkit.from_url(url, output_path, configuration=config)
        # Nota: O print de sucesso foi removido para não interferir na barra de progresso
    except Exception as e:
        # Em caso de erro, imprimir a mensagem de erro
        print(f'Erro ao baixar {codigo}: {e}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment