Skip to content

Instantly share code, notes, and snippets.

@Mlawrence95
Created January 4, 2026 18:42
Show Gist options
  • Select an option

  • Save Mlawrence95/8e786f169bfd87adf7be701163c6aede to your computer and use it in GitHub Desktop.

Select an option

Save Mlawrence95/8e786f169bfd87adf7be701163c6aede to your computer and use it in GitHub Desktop.
Fetches GoodReads feed (RSS), converts to a list of dictionaries, and dumps to YAML file. Dedupes subsequent runs if the old path is provided.
import feedparser # via conda install anaconda::feedparser
import yaml
from bs4 import BeautifulSoup
_GOODREADS_RSS_STREAM_URL = "https://www.goodreads.com/review/list_rss/<XXXXXXXXXX>?key=<XXXXXXXXXXXXXX>&shelf=<XXXX>"
# Old yaml lives here. We'll use it to ensure our new dump has unique values.
_EXISTING_YAML_PATH = "docs/_data/books.yml"
_NEW_YAML_PATH = "books.yaml"
def parse_entry(entry: dict) -> dict:
"""Extracts some fields from the Goodreads RSS listing."""
vals = {}
vals["title"] = entry["title"]
vals["author"] = entry["author_name"]
vals["cover"] = entry["book_large_image_url"]
vals["pages"] = entry["num_pages"]
# Description sometimes has HTML in it.
soup = BeautifulSoup(entry["book_description"], "html.parser")
vals["description"] = soup.text
# fill in later
vals["rating"] = 0
vals["genre"] = "unspecified"
vals["status"] = "Read"
vals["review"] = "Pending"
return vals
def filter_existing_books(all_parsed: list[dict], existing_path: str | None) -> list[dict]:
"""Loads previous results, then removes them from the new list of results if the book titles match."""
if not existing_path:
return all_parsed
with open(existing_path, "r", encoding="utf-8") as f:
existing_books = yaml.safe_load(f)
existing_book_keys = set(b["title"].lower() for b in existing_books)
return [e for e in all_parsed if e["title"].lower() not in existing_book_keys]
if __name__ == "__main__":
feed = feedparser.parse(_GOODREADS_RSS_STREAM_URL)
parsed_entries = []
for entry in feed.entries:
parsed_entries += [parse_entry(entry)]
filtered_values = filter_existing_books(parsed_entries, _EXISTING_YAML_PATH)
with open(_NEW_YAML_PATH, "w", encoding="utf-8") as file:
yaml.dump(
filtered_values,
file,
sort_keys=False,
default_flow_style=False,
allow_unicode=True,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment