Skip to content

Instantly share code, notes, and snippets.

@yunho-c
Created March 1, 2026 19:09
Show Gist options
  • Select an option

  • Save yunho-c/f712ad0e24aef6053039d73acd4dcbc0 to your computer and use it in GitHub Desktop.

Select an option

Save yunho-c/f712ad0e24aef6053039d73acd4dcbc0 to your computer and use it in GitHub Desktop.
Convert PDF to Markdown using `pdf_oxide`
"""Simple CLI example: convert a PDF file to Markdown using pdf_oxide."""
from pathlib import Path
import typer
from pdf_oxide import PdfDocument
app = typer.Typer(
help="Convert a PDF file to Markdown using pdf_oxide (all pages by default).",
add_completion=False,
)
@app.command()
def convert(
pdf_path: Path = typer.Argument(..., help="Path to the input PDF file."),
output: Path | None = typer.Option(
None,
"--output",
"-o",
help="Output Markdown file path. Defaults to <input_stem>.md in current directory.",
),
detect_headings: bool = typer.Option(
True,
"--detect-headings/--no-detect-headings",
help="Detect headings based on font size.",
),
include_images: bool = typer.Option(
False,
"--include-images/--no-include-images",
help="Include images in Markdown output.",
),
preserve_layout: bool = typer.Option(
False,
"--preserve-layout/--no-preserve-layout",
help="Preserve visual layout in conversion.",
),
) -> None:
"""Convert a specified PDF file into Markdown."""
if not pdf_path.exists():
typer.secho(f"Input file not found: {pdf_path}", fg=typer.colors.RED, err=True)
raise typer.Exit(code=1)
if not pdf_path.is_file():
typer.secho(f"Input path is not a file: {pdf_path}", fg=typer.colors.RED, err=True)
raise typer.Exit(code=1)
if pdf_path.suffix.lower() != ".pdf":
typer.secho(
f"Warning: input file does not end with .pdf: {pdf_path.name}",
fg=typer.colors.YELLOW,
err=True,
)
output_path = output if output is not None else Path.cwd() / f"{pdf_path.stem}.md"
try:
doc = PdfDocument(str(pdf_path))
markdown = doc.to_markdown_all(
preserve_layout=preserve_layout,
detect_headings=detect_headings,
include_images=include_images,
)
except (OSError, RuntimeError) as exc:
typer.secho(f"Conversion failed: {exc}", fg=typer.colors.RED, err=True)
raise typer.Exit(code=1) from exc
try:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(markdown, encoding="utf-8")
except OSError as exc:
typer.secho(f"Failed to write output file: {exc}", fg=typer.colors.RED, err=True)
raise typer.Exit(code=1) from exc
typer.secho(f"Wrote Markdown to: {output_path}", fg=typer.colors.GREEN)
typer.echo(f"Characters written: {len(markdown)}")
if __name__ == "__main__":
app()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment