Código para baixar as patentes do Google Patents
"""
Script para baixar PDFs de patentes do Google Patents.
Este script percorre uma lista de URLs de patentes do Google Patents,
converte cada página HTML em PDF usando wkhtmltopdf via pdfkit,
e salva os arquivos na pasta 'pdfs' com nomes baseados no código da patente.
Requisitos:
- wkhtmltopdf instalado e configurado.
- Bibliotecas Python: pdfkit, tqdm.
Uso:
python save_google_patents.py
Autor: [Seu Nome ou Automatizado]
Data: 16 de novembro de 2025
"""
import pdfkit # Biblioteca para converter HTML para PDF usando wkhtmltopdf
import os # Para operações do sistema de arquivos
from tqdm import tqdm # Para exibir barra de progresso
# Configurar o caminho do wkhtmltopdf
# wkhtmltopdf é necessário para a conversão HTML -> PDF
config = pdfkit.configuration(wkhtmltopdf=r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe')
# Lista de URLs das patentes a serem baixadas
# Cada URL aponta para uma página de patente no Google Patents
urls = [
"https://patents.google.com/patent/JP2004013885A/en",
"https://patents.google.com/patent/US20110216006A1/en",
"https://patents.google.com/patent/GB2311888A/en",
"https://patents.google.com/patent/JP2003150299A/en",
"https://patents.google.com/patent/US20210286441A1/en",
"https://patents.google.com/patent/BR102016004580A2/en",
"https://patents.google.com/patent/US7898527B1/en",
"https://patents.google.com/patent/KR102553516B1/en",
"https://patents.google.com/patent/BR102017002857A2/en",
"https://patents.google.com/patent/US20230102500A1/en",
"https://patents.google.com/patent/US11919159B1/en",
"https://patents.google.com/patent/US20240399564A1/en",
"https://patents.google.com/patent/WO2005089357A2/en",
"https://patents.google.com/patent/AU2020103820A4/en",
"https://patents.google.com/patent/US9111545B2/en",
"https://patents.google.com/patent/WO2023101159A1/en",
"https://patents.google.com/patent/US7965196B2/en",
"https://patents.google.com/patent/US8475172B2/en",
"https://patents.google.com/patent/JP2001075473A/en",
"https://patents.google.com/patent/US8774878B2/en",
"https://patents.google.com/patent/US8552983B2/en",
"https://patents.google.com/patent/US8494859B2/en",
"https://patents.google.com/patent/US20100109918A1/en",
"https://patents.google.com/patent/WO2001039375A1/en",
"https://patents.google.com/patent/US20100145729A1/en",
"https://patents.google.com/patent/US9318029B2/en",
"https://patents.google.com/patent/US10601980B1/en",
"https://patents.google.com/patent/JP2025141216A/en",
"https://patents.google.com/patent/CN108852620A/en",
"https://patents.google.com/patent/IT201700014209A1/en",
"https://patents.google.com/patent/US10395555B2/en",
"https://patents.google.com/patent/CN105105772B/en",
"https://patents.google.com/patent/WO2005077092A2/en",
"https://patents.google.com/patent/US20120178064A1/en",
"https://patents.google.com/patent/CN208255530U/en",
"https://patents.google.com/patent/US20130311528A1/en",
"https://patents.google.com/patent/US7251605B2/en",
"https://patents.google.com/patent/US20130289970A1/en",
"https://patents.google.com/patent/DE202020004941U1/en",
"https://patents.google.com/patent/KR102142033B1/en",
"https://patents.google.com/patent/CN107358955A/en",
"https://patents.google.com/patent/US9495351B1/en",
"https://patents.google.com/patent/KR20150115436A/en",
"https://patents.google.com/patent/AU2019100545A4/en",
"https://patents.google.com/patent/US20190051210A1/en",
"https://patents.google.com/patent/CN207410509U/en",
"https://patents.google.com/patent/US20060286513A1/en",
"https://patents.google.com/patent/JP2025042165A/en",
"https://patents.google.com/patent/RU198673U1/en",
"https://patents.google.com/patent/KR101897202B1/en",
"https://patents.google.com/patent/GB2338539A/en",
"https://patents.google.com/patent/US7155389B2/en",
"https://patents.google.com/patent/US5486112A/en",
"https://patents.google.com/patent/US5571020A/en",
"https://patents.google.com/patent/US5047952A/en",
"https://patents.google.com/patent/US4520501A/en",
"https://patents.google.com/patent/US3831296A/en",
"https://patents.google.com/patent/BR102022003549A2/en",
"https://patents.google.com/patent/US20050106536A1/en",
"https://patents.google.com/patent/ES1219550U/en",
"https://patents.google.com/patent/US20250138554A1/en",
"https://patents.google.com/patent/US20250201147A1/en",
"https://patents.google.com/patent/JP2025056787A/en",
"https://patents.google.com/patent/DE10029483A1/en",
"https://patents.google.com/patent/IT201600087785A1/en",
"https://patents.google.com/patent/ES1293499U/en",
"https://patents.google.com/patent/WO2001024140A1/en",
"https://patents.google.com/patent/JPH09248315A/en",
"https://patents.google.com/patent/US20040001734A1/en",
"https://patents.google.com/patent/JPS59500604A/en",
"https://patents.google.com/patent/AU2021100994A4/en",
"https://patents.google.com/patent/KR20230099038A/en",
"https://patents.google.com/patent/US20040008871A1/en",
"https://patents.google.com/patent/BR102015026178A2/en",
"https://patents.google.com/patent/ES1255339U/en",
"https://patents.google.com/patent/ES1302772U/en",
"https://patents.google.com/patent/US20250078574A1/en",
"https://patents.google.com/patent/CA1200011A/en",
"https://patents.google.com/patent/RU188793U1/en",
"https://patents.google.com/patent/BR102014028249A2/en",
"https://patents.google.com/patent/CN107889025A/en",
"https://patents.google.com/patent/AU2021102464A4/en",
"https://patents.google.com/patent/CN206574091U/en",
"https://patents.google.com/patent/BR102018075811A2/en"
]
# Criar a pasta pdfs se não existir
# Garante que o diretório de saída existe antes de salvar os PDFs
os.makedirs('pdfs', exist_ok=True)
# Loop principal: baixar cada patente como PDF
# Usa tqdm para exibir barra de progresso
for url in tqdm(urls, desc="Baixando PDFs"):
# Extrair o código da patente da URL
# A URL tem o formato: https://patents.google.com/patent/{codigo}/en
# O código é a quinta parte quando dividido por '/'
parts = url.split('/')
codigo = parts[4] # índice 4 é o código
# Definir o caminho de saída do PDF
output_path = f'pdfs/{codigo}.pdf'
try:
# Baixar e converter a página HTML para PDF
# pdfkit.from_url baixa o HTML da URL e o converte para PDF
pdfkit.from_url(url, output_path, configuration=config)
# Nota: O print de sucesso foi removido para não interferir na barra de progresso
except Exception as e:
# Em caso de erro, imprimir a mensagem de erro
print(f'Erro ao baixar {codigo}: {e}')