Skip to content

Instantly share code, notes, and snippets.

@thefranke
Created January 9, 2026 21:59
Show Gist options
  • Select an option

  • Save thefranke/e7b80eca835275f355fd2f0dbe080e7b to your computer and use it in GitHub Desktop.

Select an option

Save thefranke/e7b80eca835275f355fd2f0dbe080e7b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# Convert (flagged) articles in bulletty to PDF
#
import pypandoc
import toml
import frontmatter
import re
from urllib.parse import urljoin, urlparse
from pathlib import Path
from optparse import OptionParser
from appdirs import AppDirs
def replace_relative_urls(text, base_url):
pattern = r'\[[^\]]*\]\((.*?)\s*("(?:.*[^"])")?\s*\)'
return re.sub(pattern, lambda match: f'[{match.group(0).split("](")[0][1:]}]({urljoin(base_url, match.group(1))})', text)
def read_md(file_path: Path):
frontmattered = False
frontmatter = False
text = ""
with open(file_path) as f:
for l in f:
line = l.strip()
if not frontmattered and line == "---":
if frontmatter:
frontmattered = True
frontmatter = not frontmatter
if frontmatter:
line = line.replace(' =', ':')
text += line + '\n'
return text
def postprocess_md(text: str):
fm = frontmatter.loads(text)
# better formatting
fm["urlcolor"] = "Maroon"
# link back to original URL
fm["subtitle"] = "[%s](%s)" % (fm["url"], fm["url"])
# fix relative image URLs
base_url = fm["url"]
if base_url.endswith('.html'):
base_url = '/'.join(base_url.split('/')[:-1])
else:
base_url = '/'.join(base_url.split('/')[:-1])
fm.content = replace_relative_urls(fm.content, base_url)
return frontmatter.dumps(fm)
def make_pdf(file_path: Path, out_path: Path, regenerate = True):
pdf_path = out_path / Path(file_path.name.replace('.md', '.pdf'))
if regenerate or not pdf_path.exists():
text = read_md(file_path)
text = postprocess_md(text)
print("Converting %s" % (file_path.name))
try:
pypandoc.convert_text(text, 'pdf', format='md', outputfile=pdf_path, extra_args=["--pdf-engine=xelatex"])
except RuntimeError as e:
print("Error, skipping")
print(e)
def convert_cache(cache_path: Path, out_path: Path, flagged_only = True, regenerate = False):
if flagged_only:
later = toml.load(bulletty_cache / '.later.toml')
for f in later['read_later']:
md_file = cache_path / 'categories' / f
make_pdf(md_file, out_path, regenerate)
else:
for md_file in cache_path.rglob('*.md'):
make_pdf(md_file, out_path, regenerate)
if __name__ == '__main__':
parser = OptionParser()
# Adding boolean options
parser.add_option("-f", "--flaggedonly", action="store_true", default=True, help="Only convert read-later flagged articles")
parser.add_option("-r", "--regenerate", action="store_true", default=False, help="Force regeneration articles")
parser.add_option("-o", "--outdir", help="Specify path for PDF output")
parser.add_option("-c", "--cachedir", help="Specify bulletty cache directory (leave empty to detect automatically)")
(options, args) = parser.parse_args()
cache_dir = options.cachedir if options.cachedir else AppDirs("bulletty").user_data_dir
bulletty_cache = Path(cache_dir)
if not options.outdir:
print("Error: Please specify an output directory!\n")
parser.print_help()
exit()
convert_cache(bulletty_cache, options.outdir, options.flaggedonly, options.regenerate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment