Created
January 4, 2026 18:42
-
-
Save Mlawrence95/8e786f169bfd87adf7be701163c6aede to your computer and use it in GitHub Desktop.
Fetches GoodReads feed (RSS), converts to a list of dictionaries, and dumps to YAML file. Dedupes subsequent runs if the old path is provided.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import feedparser # via conda install anaconda::feedparser | |
| import yaml | |
| from bs4 import BeautifulSoup | |
| _GOODREADS_RSS_STREAM_URL = "https://www.goodreads.com/review/list_rss/<XXXXXXXXXX>?key=<XXXXXXXXXXXXXX>&shelf=<XXXX>" | |
| # Old yaml lives here. We'll use it to ensure our new dump has unique values. | |
| _EXISTING_YAML_PATH = "docs/_data/books.yml" | |
| _NEW_YAML_PATH = "books.yaml" | |
| def parse_entry(entry: dict) -> dict: | |
| """Extracts some fields from the Goodreads RSS listing.""" | |
| vals = {} | |
| vals["title"] = entry["title"] | |
| vals["author"] = entry["author_name"] | |
| vals["cover"] = entry["book_large_image_url"] | |
| vals["pages"] = entry["num_pages"] | |
| # Description sometimes has HTML in it. | |
| soup = BeautifulSoup(entry["book_description"], "html.parser") | |
| vals["description"] = soup.text | |
| # fill in later | |
| vals["rating"] = 0 | |
| vals["genre"] = "unspecified" | |
| vals["status"] = "Read" | |
| vals["review"] = "Pending" | |
| return vals | |
| def filter_existing_books(all_parsed: list[dict], existing_path: str | None) -> list[dict]: | |
| """Loads previous results, then removes them from the new list of results if the book titles match.""" | |
| if not existing_path: | |
| return all_parsed | |
| with open(existing_path, "r", encoding="utf-8") as f: | |
| existing_books = yaml.safe_load(f) | |
| existing_book_keys = set(b["title"].lower() for b in existing_books) | |
| return [e for e in all_parsed if e["title"].lower() not in existing_book_keys] | |
| if __name__ == "__main__": | |
| feed = feedparser.parse(_GOODREADS_RSS_STREAM_URL) | |
| parsed_entries = [] | |
| for entry in feed.entries: | |
| parsed_entries += [parse_entry(entry)] | |
| filtered_values = filter_existing_books(parsed_entries, _EXISTING_YAML_PATH) | |
| with open(_NEW_YAML_PATH, "w", encoding="utf-8") as file: | |
| yaml.dump( | |
| filtered_values, | |
| file, | |
| sort_keys=False, | |
| default_flow_style=False, | |
| allow_unicode=True, | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment