Skip to content

Instantly share code, notes, and snippets.

@farzadhallaji
Created December 2, 2025 23:24
Show Gist options
  • Select an option

  • Save farzadhallaji/008bccc967cf6c2ef92f0129a76ee408 to your computer and use it in GitHub Desktop.

Select an option

Save farzadhallaji/008bccc967cf6c2ef92f0129a76ee408 to your computer and use it in GitHub Desktop.
splitting pdf file into several files
# pip install pypdf
from __future__ import annotations
import argparse
from pathlib import Path
from typing import List, Tuple
from pypdf import PdfReader, PdfWriter
def parse_ranges(ranges_str: str, total_pages: int) -> List[Tuple[int, int]]:
"""
Parse page ranges like: "1-62,63-116" (1-based, inclusive).
Supports open-ended like "63-" meaning 63..total_pages.
Returns list of (start, end) as 1-based inclusive ints.
"""
out: List[Tuple[int, int]] = []
for part in ranges_str.split(","):
part = part.strip()
if not part:
continue
if "-" not in part:
raise ValueError(f"Bad range '{part}'. Use like 1-62.")
a, b = (x.strip() for x in part.split("-", 1))
if a == "":
raise ValueError(f"Bad range '{part}': start page missing.")
start = int(a)
end = total_pages if b == "" else int(b)
if start < 1:
raise ValueError(f"Bad range '{part}': start must be >= 1.")
if end < start:
raise ValueError(f"Bad range '{part}': end must be >= start.")
if end > total_pages:
raise ValueError(f"Bad range '{part}': end ({end}) > total pages ({total_pages}).")
out.append((start, end))
# optional sanity: detect overlaps
used = set()
for s, e in out:
for p in range(s, e + 1):
if p in used:
raise ValueError(f"Overlapping ranges: page {p} appears more than once.")
used.add(p)
return out
def split_pdf_ranges(input_pdf: str | Path, ranges: List[Tuple[int, int]], output_dir: str | Path) -> List[Path]:
input_pdf = Path(input_pdf)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
reader = PdfReader(str(input_pdf))
total_pages = len(reader.pages)
outputs: List[Path] = []
for start, end in ranges:
writer = PdfWriter()
for i in range(start - 1, end): # to 0-based, end is inclusive
writer.add_page(reader.pages[i])
out_path = output_dir / f"{input_pdf.stem}_{start:03d}-{end:03d}.pdf"
with out_path.open("wb") as f:
writer.write(f)
outputs.append(out_path)
return outputs
def main():
ap = argparse.ArgumentParser(description="Split a PDF into multiple PDFs by page ranges.")
ap.add_argument("input_pdf", help="Path to input PDF")
ap.add_argument("--ranges", required=True, help='Ranges like "1-62,63-116" (1-based, inclusive)')
ap.add_argument("--out", default="splits", help="Output directory (default: splits)")
args = ap.parse_args()
reader = PdfReader(args.input_pdf)
total_pages = len(reader.pages)
ranges = parse_ranges(args.ranges, total_pages)
outputs = split_pdf_ranges(args.input_pdf, ranges, args.out)
for p in outputs:
print(p)
if __name__ == "__main__":
main()
@farzadhallaji
Copy link
Author

python split_pdf.py main.pdf --ranges "1-78,79-117" --out out_pdfs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment