Created
January 20, 2026 09:42
-
-
Save Danm72/c66dcb0939c9a179f3364cd8958027b1 to your computer and use it in GitHub Desktop.
Extract AI-generated summaries from Granola meeting notes cache (macOS)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Extract AI-generated summaries from Granola meeting notes cache. | |
| Granola stores meeting data in a local cache file. The AI-generated summaries | |
| are stored in documentPanels (NOT in documents.notes_*). This script extracts | |
| those summaries and exports them as markdown files with a JSON index. | |
| Usage: | |
| python extract_granola_summaries.py list # List all documents with summaries | |
| python extract_granola_summaries.py extract # Extract to markdown files | |
| python extract_granola_summaries.py extract -o ./out # Specify output directory | |
| Requirements: | |
| - macOS with Granola installed | |
| - Python 3.9+ | |
| Author: Dan Malone | |
| License: MIT | |
| """ | |
| import argparse | |
| import json | |
| import sys | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Any | |
| # Default paths - modify OUTPUT_DIR as needed for your system | |
| GRANOLA_CACHE_PATH = Path.home() / "Library/Application Support/Granola/cache-v3.json" | |
| DEFAULT_OUTPUT_DIR = Path.home() / "granola-summaries" | |
| def extract_text_from_prosemirror(node: dict | Any) -> str: | |
| """ | |
| Recursively extract text from ProseMirror document structure. | |
| Granola uses ProseMirror for rich text storage. This function converts | |
| the nested document structure into plain markdown-ish text. | |
| """ | |
| if not isinstance(node, dict): | |
| return "" | |
| text_parts = [] | |
| node_type = node.get("type", "") | |
| if node_type == "text": | |
| text_parts.append(node.get("text", "")) | |
| elif node_type == "heading": | |
| text_parts.append("\n## ") | |
| elif node_type == "listItem": | |
| text_parts.append("\n- ") | |
| elif node_type == "paragraph": | |
| text_parts.append("\n") | |
| for child in node.get("content", []): | |
| text_parts.append(extract_text_from_prosemirror(child)) | |
| return "".join(text_parts) | |
| def load_granola_cache(cache_path: Path) -> dict: | |
| """ | |
| Load the Granola cache file. | |
| The cache file has a nested structure where the actual cache data | |
| is a JSON string inside the top-level 'cache' key. | |
| """ | |
| if not cache_path.exists(): | |
| print(f"Error: Granola cache not found at {cache_path}", file=sys.stderr) | |
| print("Make sure Granola is installed and has recorded at least one meeting.", file=sys.stderr) | |
| sys.exit(1) | |
| with open(cache_path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| # The actual cache is a JSON string inside the 'cache' key | |
| cache_str = data.get("cache", "") | |
| if isinstance(cache_str, str) and cache_str: | |
| return json.loads(cache_str) | |
| return data | |
| def extract_summaries(cache: dict) -> list[dict]: | |
| """ | |
| Extract AI-generated summaries from documentPanels. | |
| Returns a list of dicts with: | |
| - doc_id: Document ID | |
| - title: Document title | |
| - created_at: Document creation date (ISO format) | |
| - summary: Extracted summary text | |
| """ | |
| state = cache.get("state", {}) | |
| documents = state.get("documents", {}) | |
| document_panels = state.get("documentPanels", {}) | |
| summaries = [] | |
| for doc_id, panels in document_panels.items(): | |
| # Skip empty or invalid doc_ids | |
| if not doc_id or not doc_id.strip(): | |
| continue | |
| # Find the Summary panel | |
| summary_panel = None | |
| for panel_id, panel in panels.items(): | |
| if panel.get("title") == "Summary": | |
| summary_panel = panel | |
| break | |
| if not summary_panel: | |
| continue | |
| # Extract text from ProseMirror content | |
| content = summary_panel.get("content", {}) | |
| summary_text = extract_text_from_prosemirror(content).strip() | |
| if not summary_text: | |
| continue | |
| # Get document metadata | |
| doc = documents.get(doc_id, {}) | |
| title = doc.get("title", "Untitled") | |
| created_at = doc.get("created_at", "") | |
| summaries.append({ | |
| "doc_id": doc_id, | |
| "title": title, | |
| "created_at": created_at, | |
| "summary": summary_text, | |
| }) | |
| # Sort by created_at descending (newest first) | |
| summaries.sort(key=lambda x: x.get("created_at", ""), reverse=True) | |
| return summaries | |
| def format_date(iso_date: str) -> str: | |
| """Format ISO date string to human-readable format.""" | |
| if not iso_date: | |
| return "Unknown date" | |
| try: | |
| dt = datetime.fromisoformat(iso_date.replace("Z", "+00:00")) | |
| return dt.strftime("%Y-%m-%d %H:%M") | |
| except (ValueError, AttributeError): | |
| return iso_date | |
| def cmd_list(args: argparse.Namespace) -> None: | |
| """List all documents with summaries.""" | |
| cache = load_granola_cache(GRANOLA_CACHE_PATH) | |
| summaries = extract_summaries(cache) | |
| if not summaries: | |
| print("No documents with summaries found.") | |
| return | |
| print(f"Found {len(summaries)} documents with AI-generated summaries:\n") | |
| print(f"{'ID':<10} {'DATE':<18} {'TITLE'}") | |
| print("-" * 80) | |
| for summary in summaries: | |
| date_str = format_date(summary["created_at"]) | |
| doc_id_short = summary['doc_id'][:8] + "..." | |
| title_truncated = summary['title'][:45] + ("..." if len(summary['title']) > 45 else "") | |
| print(f"{doc_id_short:<10} {date_str:<18} {title_truncated}") | |
| print(f"\nTotal: {len(summaries)} documents") | |
| def cmd_extract(args: argparse.Namespace) -> None: | |
| """Extract all summaries to markdown files.""" | |
| cache = load_granola_cache(GRANOLA_CACHE_PATH) | |
| summaries = extract_summaries(cache) | |
| if not summaries: | |
| print("No documents with summaries found.") | |
| return | |
| output_dir = Path(args.output) if args.output else DEFAULT_OUTPUT_DIR | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"Extracting {len(summaries)} summaries to {output_dir}\n") | |
| # Write individual markdown files | |
| for summary in summaries: | |
| doc_id = summary["doc_id"] | |
| title = summary["title"] | |
| created_at = summary["created_at"] | |
| summary_text = summary["summary"] | |
| date_str = format_date(created_at) | |
| md_content = f"""# {title} | |
| **Date:** {date_str} | |
| **Document ID:** {doc_id} | |
| --- | |
| {summary_text} | |
| """ | |
| output_path = output_dir / f"{doc_id}.md" | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| f.write(md_content) | |
| print(f" Wrote: {output_path.name}") | |
| # Write JSON index | |
| index_path = output_dir / "index.json" | |
| with open(index_path, "w", encoding="utf-8") as f: | |
| json.dump(summaries, f, indent=2, ensure_ascii=False) | |
| print(f"\n Wrote index: {index_path}") | |
| print(f"\nTotal: {len(summaries)} summaries extracted to {output_dir}") | |
| def main() -> None: | |
| parser = argparse.ArgumentParser( | |
| description="Extract AI-generated summaries from Granola meeting notes cache", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| %(prog)s list List all meetings with summaries | |
| %(prog)s extract Extract to ~/granola-summaries/ | |
| %(prog)s extract -o ./summaries Extract to custom directory | |
| Note: This script reads from Granola's local cache file. The AI-generated | |
| summaries are in documentPanels, not in documents.notes_* (which contains | |
| user-entered notes only). | |
| """ | |
| ) | |
| subparsers = parser.add_subparsers(dest="command", help="Available commands") | |
| # list command | |
| list_parser = subparsers.add_parser("list", help="List all documents with summaries") | |
| list_parser.set_defaults(func=cmd_list) | |
| # extract command | |
| extract_parser = subparsers.add_parser( | |
| "extract", help="Extract all summaries to markdown files" | |
| ) | |
| extract_parser.add_argument( | |
| "-o", "--output", | |
| help=f"Output directory (default: {DEFAULT_OUTPUT_DIR})" | |
| ) | |
| extract_parser.set_defaults(func=cmd_extract) | |
| args = parser.parse_args() | |
| if not args.command: | |
| parser.print_help() | |
| sys.exit(1) | |
| args.func(args) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment