Created
December 2, 2025 23:24
-
-
Save farzadhallaji/008bccc967cf6c2ef92f0129a76ee408 to your computer and use it in GitHub Desktop.
splitting pdf file into several files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # pip install pypdf | |
| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| from typing import List, Tuple | |
| from pypdf import PdfReader, PdfWriter | |
| def parse_ranges(ranges_str: str, total_pages: int) -> List[Tuple[int, int]]: | |
| """ | |
| Parse page ranges like: "1-62,63-116" (1-based, inclusive). | |
| Supports open-ended like "63-" meaning 63..total_pages. | |
| Returns list of (start, end) as 1-based inclusive ints. | |
| """ | |
| out: List[Tuple[int, int]] = [] | |
| for part in ranges_str.split(","): | |
| part = part.strip() | |
| if not part: | |
| continue | |
| if "-" not in part: | |
| raise ValueError(f"Bad range '{part}'. Use like 1-62.") | |
| a, b = (x.strip() for x in part.split("-", 1)) | |
| if a == "": | |
| raise ValueError(f"Bad range '{part}': start page missing.") | |
| start = int(a) | |
| end = total_pages if b == "" else int(b) | |
| if start < 1: | |
| raise ValueError(f"Bad range '{part}': start must be >= 1.") | |
| if end < start: | |
| raise ValueError(f"Bad range '{part}': end must be >= start.") | |
| if end > total_pages: | |
| raise ValueError(f"Bad range '{part}': end ({end}) > total pages ({total_pages}).") | |
| out.append((start, end)) | |
| # optional sanity: detect overlaps | |
| used = set() | |
| for s, e in out: | |
| for p in range(s, e + 1): | |
| if p in used: | |
| raise ValueError(f"Overlapping ranges: page {p} appears more than once.") | |
| used.add(p) | |
| return out | |
| def split_pdf_ranges(input_pdf: str | Path, ranges: List[Tuple[int, int]], output_dir: str | Path) -> List[Path]: | |
| input_pdf = Path(input_pdf) | |
| output_dir = Path(output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| reader = PdfReader(str(input_pdf)) | |
| total_pages = len(reader.pages) | |
| outputs: List[Path] = [] | |
| for start, end in ranges: | |
| writer = PdfWriter() | |
| for i in range(start - 1, end): # to 0-based, end is inclusive | |
| writer.add_page(reader.pages[i]) | |
| out_path = output_dir / f"{input_pdf.stem}_{start:03d}-{end:03d}.pdf" | |
| with out_path.open("wb") as f: | |
| writer.write(f) | |
| outputs.append(out_path) | |
| return outputs | |
| def main(): | |
| ap = argparse.ArgumentParser(description="Split a PDF into multiple PDFs by page ranges.") | |
| ap.add_argument("input_pdf", help="Path to input PDF") | |
| ap.add_argument("--ranges", required=True, help='Ranges like "1-62,63-116" (1-based, inclusive)') | |
| ap.add_argument("--out", default="splits", help="Output directory (default: splits)") | |
| args = ap.parse_args() | |
| reader = PdfReader(args.input_pdf) | |
| total_pages = len(reader.pages) | |
| ranges = parse_ranges(args.ranges, total_pages) | |
| outputs = split_pdf_ranges(args.input_pdf, ranges, args.out) | |
| for p in outputs: | |
| print(p) | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python split_pdf.py main.pdf --ranges "1-78,79-117" --out out_pdfs