Created
January 9, 2026 21:59
-
-
Save thefranke/e7b80eca835275f355fd2f0dbe080e7b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # | |
| # Convert (flagged) articles in bulletty to PDF | |
| # | |
| import pypandoc | |
| import toml | |
| import frontmatter | |
| import re | |
| from urllib.parse import urljoin, urlparse | |
| from pathlib import Path | |
| from optparse import OptionParser | |
| from appdirs import AppDirs | |
| def replace_relative_urls(text, base_url): | |
| pattern = r'\[[^\]]*\]\((.*?)\s*("(?:.*[^"])")?\s*\)' | |
| return re.sub(pattern, lambda match: f'[{match.group(0).split("](")[0][1:]}]({urljoin(base_url, match.group(1))})', text) | |
| def read_md(file_path: Path): | |
| frontmattered = False | |
| frontmatter = False | |
| text = "" | |
| with open(file_path) as f: | |
| for l in f: | |
| line = l.strip() | |
| if not frontmattered and line == "---": | |
| if frontmatter: | |
| frontmattered = True | |
| frontmatter = not frontmatter | |
| if frontmatter: | |
| line = line.replace(' =', ':') | |
| text += line + '\n' | |
| return text | |
| def postprocess_md(text: str): | |
| fm = frontmatter.loads(text) | |
| # better formatting | |
| fm["urlcolor"] = "Maroon" | |
| # link back to original URL | |
| fm["subtitle"] = "[%s](%s)" % (fm["url"], fm["url"]) | |
| # fix relative image URLs | |
| base_url = fm["url"] | |
| if base_url.endswith('.html'): | |
| base_url = '/'.join(base_url.split('/')[:-1]) | |
| else: | |
| base_url = '/'.join(base_url.split('/')[:-1]) | |
| fm.content = replace_relative_urls(fm.content, base_url) | |
| return frontmatter.dumps(fm) | |
| def make_pdf(file_path: Path, out_path: Path, regenerate = True): | |
| pdf_path = out_path / Path(file_path.name.replace('.md', '.pdf')) | |
| if regenerate or not pdf_path.exists(): | |
| text = read_md(file_path) | |
| text = postprocess_md(text) | |
| print("Converting %s" % (file_path.name)) | |
| try: | |
| pypandoc.convert_text(text, 'pdf', format='md', outputfile=pdf_path, extra_args=["--pdf-engine=xelatex"]) | |
| except RuntimeError as e: | |
| print("Error, skipping") | |
| print(e) | |
| def convert_cache(cache_path: Path, out_path: Path, flagged_only = True, regenerate = False): | |
| if flagged_only: | |
| later = toml.load(bulletty_cache / '.later.toml') | |
| for f in later['read_later']: | |
| md_file = cache_path / 'categories' / f | |
| make_pdf(md_file, out_path, regenerate) | |
| else: | |
| for md_file in cache_path.rglob('*.md'): | |
| make_pdf(md_file, out_path, regenerate) | |
| if __name__ == '__main__': | |
| parser = OptionParser() | |
| # Adding boolean options | |
| parser.add_option("-f", "--flaggedonly", action="store_true", default=True, help="Only convert read-later flagged articles") | |
| parser.add_option("-r", "--regenerate", action="store_true", default=False, help="Force regeneration articles") | |
| parser.add_option("-o", "--outdir", help="Specify path for PDF output") | |
| parser.add_option("-c", "--cachedir", help="Specify bulletty cache directory (leave empty to detect automatically)") | |
| (options, args) = parser.parse_args() | |
| cache_dir = options.cachedir if options.cachedir else AppDirs("bulletty").user_data_dir | |
| bulletty_cache = Path(cache_dir) | |
| if not options.outdir: | |
| print("Error: Please specify an output directory!\n") | |
| parser.print_help() | |
| exit() | |
| convert_cache(bulletty_cache, options.outdir, options.flaggedonly, options.regenerate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment