Skip to content

Instantly share code, notes, and snippets.

@nickfox-taterli
Created January 15, 2026 01:09
Show Gist options
  • Select an option

  • Save nickfox-taterli/a42d7e39fe371c8544dcc1736d0609e9 to your computer and use it in GitHub Desktop.

Select an option

Save nickfox-taterli/a42d7e39fe371c8544dcc1736d0609e9 to your computer and use it in GitHub Desktop.
将PDF文件的每一页转换为高质量图片
#!/usr/bin/env python3
"""
PDF转图片工具
将PDF文件的每一页转换为高质量图片
"""
import os
from pathlib import Path
from pdf2image import convert_from_path
from PIL import Image
def pdf_to_images(pdf_path, output_dir="output", dpi=600, fmt="PNG", batch_size=10):
"""
将PDF转换为图片(分批处理以减少内存使用)
参数:
pdf_path: PDF文件路径
output_dir: 输出目录
dpi: 图片分辨率(默认600 DPI,最高质量)
fmt: 图片格式(PNG、JPEG等)
batch_size: 每批处理的页数(默认10页)
"""
# 检查PDF文件是否存在
if not os.path.exists(pdf_path):
raise FileNotFoundError(f"PDF文件不存在: {pdf_path}")
# 创建输出目录
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
print(f"正在处理PDF文件: {pdf_path}")
print(f"输出目录: {output_path}")
print(f"分辨率: {dpi} DPI")
print(f"图片格式: {fmt}")
print(f"每批处理页数: {batch_size}")
print("-" * 50)
try:
# 先获取总页数
from pdf2image import pdfinfo_from_path
info = pdfinfo_from_path(pdf_path)
total_pages = info["Pages"]
print(f"PDF总页数: {total_pages}")
print("-" * 50)
pdf_name = Path(pdf_path).stem
saved_count = 0
# 分批处理
for start_page in range(1, total_pages + 1, batch_size):
end_page = min(start_page + batch_size - 1, total_pages)
print(f"正在处理第 {start_page}-{end_page} 页...")
# 转换当前批次的页面
images = convert_from_path(
pdf_path,
dpi=dpi,
first_page=start_page,
last_page=end_page,
fmt=fmt,
thread_count=2,
use_pdftocairo=True,
transparent=False
)
# 保存当前批次的图片
for i, image in enumerate(images, start=start_page):
# 生成文件名
output_filename = f"{pdf_name}_page_{i:04d}.{fmt.lower()}"
output_filepath = output_path / output_filename
# 保存图片,使用最高质量
if fmt.upper() == "JPEG" or fmt.upper() == "JPG":
image.save(output_filepath, format=fmt, quality=100, optimize=True)
else:
image.save(output_filepath, format=fmt, optimize=True)
saved_count += 1
print(f" 已保存: {output_filename} (尺寸: {image.size[0]}x{image.size[1]})")
# 清理内存
del images
print("-" * 50)
print(f"转换完成! 共 {saved_count} 页图片已保存到: {output_path}")
return True
except Exception as e:
print(f"转换失败: {str(e)}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
# 配置参数 - 600 DPI最高质量,分批处理
PDF_FILE = "stm32mp157d.pdf"
OUTPUT_DIR = "output/stm32mp157d"
DPI = 600 # 最高质量
FORMAT = "PNG" # PNG格式支持无损压缩
BATCH_SIZE = 5 # 每批处理5页以减少内存使用
# 执行转换
success = pdf_to_images(
pdf_path=PDF_FILE,
output_dir=OUTPUT_DIR,
dpi=DPI,
fmt=FORMAT,
batch_size=BATCH_SIZE
)
if success:
print("\n✓ 所有操作成功完成!")
else:
print("\n✗ 转换过程中出现错误")
exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment