Created
January 15, 2026 01:09
-
-
Save nickfox-taterli/a42d7e39fe371c8544dcc1736d0609e9 to your computer and use it in GitHub Desktop.
将PDF文件的每一页转换为高质量图片
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| PDF转图片工具 | |
| 将PDF文件的每一页转换为高质量图片 | |
| """ | |
| import os | |
| from pathlib import Path | |
| from pdf2image import convert_from_path | |
| from PIL import Image | |
| def pdf_to_images(pdf_path, output_dir="output", dpi=600, fmt="PNG", batch_size=10): | |
| """ | |
| 将PDF转换为图片(分批处理以减少内存使用) | |
| 参数: | |
| pdf_path: PDF文件路径 | |
| output_dir: 输出目录 | |
| dpi: 图片分辨率(默认600 DPI,最高质量) | |
| fmt: 图片格式(PNG、JPEG等) | |
| batch_size: 每批处理的页数(默认10页) | |
| """ | |
| # 检查PDF文件是否存在 | |
| if not os.path.exists(pdf_path): | |
| raise FileNotFoundError(f"PDF文件不存在: {pdf_path}") | |
| # 创建输出目录 | |
| output_path = Path(output_dir) | |
| output_path.mkdir(exist_ok=True) | |
| print(f"正在处理PDF文件: {pdf_path}") | |
| print(f"输出目录: {output_path}") | |
| print(f"分辨率: {dpi} DPI") | |
| print(f"图片格式: {fmt}") | |
| print(f"每批处理页数: {batch_size}") | |
| print("-" * 50) | |
| try: | |
| # 先获取总页数 | |
| from pdf2image import pdfinfo_from_path | |
| info = pdfinfo_from_path(pdf_path) | |
| total_pages = info["Pages"] | |
| print(f"PDF总页数: {total_pages}") | |
| print("-" * 50) | |
| pdf_name = Path(pdf_path).stem | |
| saved_count = 0 | |
| # 分批处理 | |
| for start_page in range(1, total_pages + 1, batch_size): | |
| end_page = min(start_page + batch_size - 1, total_pages) | |
| print(f"正在处理第 {start_page}-{end_page} 页...") | |
| # 转换当前批次的页面 | |
| images = convert_from_path( | |
| pdf_path, | |
| dpi=dpi, | |
| first_page=start_page, | |
| last_page=end_page, | |
| fmt=fmt, | |
| thread_count=2, | |
| use_pdftocairo=True, | |
| transparent=False | |
| ) | |
| # 保存当前批次的图片 | |
| for i, image in enumerate(images, start=start_page): | |
| # 生成文件名 | |
| output_filename = f"{pdf_name}_page_{i:04d}.{fmt.lower()}" | |
| output_filepath = output_path / output_filename | |
| # 保存图片,使用最高质量 | |
| if fmt.upper() == "JPEG" or fmt.upper() == "JPG": | |
| image.save(output_filepath, format=fmt, quality=100, optimize=True) | |
| else: | |
| image.save(output_filepath, format=fmt, optimize=True) | |
| saved_count += 1 | |
| print(f" 已保存: {output_filename} (尺寸: {image.size[0]}x{image.size[1]})") | |
| # 清理内存 | |
| del images | |
| print("-" * 50) | |
| print(f"转换完成! 共 {saved_count} 页图片已保存到: {output_path}") | |
| return True | |
| except Exception as e: | |
| print(f"转换失败: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| if __name__ == "__main__": | |
| # 配置参数 - 600 DPI最高质量,分批处理 | |
| PDF_FILE = "stm32mp157d.pdf" | |
| OUTPUT_DIR = "output/stm32mp157d" | |
| DPI = 600 # 最高质量 | |
| FORMAT = "PNG" # PNG格式支持无损压缩 | |
| BATCH_SIZE = 5 # 每批处理5页以减少内存使用 | |
| # 执行转换 | |
| success = pdf_to_images( | |
| pdf_path=PDF_FILE, | |
| output_dir=OUTPUT_DIR, | |
| dpi=DPI, | |
| fmt=FORMAT, | |
| batch_size=BATCH_SIZE | |
| ) | |
| if success: | |
| print("\n✓ 所有操作成功完成!") | |
| else: | |
| print("\n✗ 转换过程中出现错误") | |
| exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment