Skip to content

Instantly share code, notes, and snippets.

@Danm72
Created January 20, 2026 09:42
Show Gist options
  • Select an option

  • Save Danm72/c66dcb0939c9a179f3364cd8958027b1 to your computer and use it in GitHub Desktop.

Select an option

Save Danm72/c66dcb0939c9a179f3364cd8958027b1 to your computer and use it in GitHub Desktop.
Extract AI-generated summaries from Granola meeting notes cache (macOS)
#!/usr/bin/env python3
"""
Extract AI-generated summaries from Granola meeting notes cache.
Granola stores meeting data in a local cache file. The AI-generated summaries
are stored in documentPanels (NOT in documents.notes_*). This script extracts
those summaries and exports them as markdown files with a JSON index.
Usage:
python extract_granola_summaries.py list # List all documents with summaries
python extract_granola_summaries.py extract # Extract to markdown files
python extract_granola_summaries.py extract -o ./out # Specify output directory
Requirements:
- macOS with Granola installed
- Python 3.9+
Author: Dan Malone
License: MIT
"""
import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
# Default paths - modify OUTPUT_DIR as needed for your system
GRANOLA_CACHE_PATH = Path.home() / "Library/Application Support/Granola/cache-v3.json"
DEFAULT_OUTPUT_DIR = Path.home() / "granola-summaries"
def extract_text_from_prosemirror(node: dict | Any) -> str:
"""
Recursively extract text from ProseMirror document structure.
Granola uses ProseMirror for rich text storage. This function converts
the nested document structure into plain markdown-ish text.
"""
if not isinstance(node, dict):
return ""
text_parts = []
node_type = node.get("type", "")
if node_type == "text":
text_parts.append(node.get("text", ""))
elif node_type == "heading":
text_parts.append("\n## ")
elif node_type == "listItem":
text_parts.append("\n- ")
elif node_type == "paragraph":
text_parts.append("\n")
for child in node.get("content", []):
text_parts.append(extract_text_from_prosemirror(child))
return "".join(text_parts)
def load_granola_cache(cache_path: Path) -> dict:
"""
Load the Granola cache file.
The cache file has a nested structure where the actual cache data
is a JSON string inside the top-level 'cache' key.
"""
if not cache_path.exists():
print(f"Error: Granola cache not found at {cache_path}", file=sys.stderr)
print("Make sure Granola is installed and has recorded at least one meeting.", file=sys.stderr)
sys.exit(1)
with open(cache_path, "r", encoding="utf-8") as f:
data = json.load(f)
# The actual cache is a JSON string inside the 'cache' key
cache_str = data.get("cache", "")
if isinstance(cache_str, str) and cache_str:
return json.loads(cache_str)
return data
def extract_summaries(cache: dict) -> list[dict]:
"""
Extract AI-generated summaries from documentPanels.
Returns a list of dicts with:
- doc_id: Document ID
- title: Document title
- created_at: Document creation date (ISO format)
- summary: Extracted summary text
"""
state = cache.get("state", {})
documents = state.get("documents", {})
document_panels = state.get("documentPanels", {})
summaries = []
for doc_id, panels in document_panels.items():
# Skip empty or invalid doc_ids
if not doc_id or not doc_id.strip():
continue
# Find the Summary panel
summary_panel = None
for panel_id, panel in panels.items():
if panel.get("title") == "Summary":
summary_panel = panel
break
if not summary_panel:
continue
# Extract text from ProseMirror content
content = summary_panel.get("content", {})
summary_text = extract_text_from_prosemirror(content).strip()
if not summary_text:
continue
# Get document metadata
doc = documents.get(doc_id, {})
title = doc.get("title", "Untitled")
created_at = doc.get("created_at", "")
summaries.append({
"doc_id": doc_id,
"title": title,
"created_at": created_at,
"summary": summary_text,
})
# Sort by created_at descending (newest first)
summaries.sort(key=lambda x: x.get("created_at", ""), reverse=True)
return summaries
def format_date(iso_date: str) -> str:
"""Format ISO date string to human-readable format."""
if not iso_date:
return "Unknown date"
try:
dt = datetime.fromisoformat(iso_date.replace("Z", "+00:00"))
return dt.strftime("%Y-%m-%d %H:%M")
except (ValueError, AttributeError):
return iso_date
def cmd_list(args: argparse.Namespace) -> None:
"""List all documents with summaries."""
cache = load_granola_cache(GRANOLA_CACHE_PATH)
summaries = extract_summaries(cache)
if not summaries:
print("No documents with summaries found.")
return
print(f"Found {len(summaries)} documents with AI-generated summaries:\n")
print(f"{'ID':<10} {'DATE':<18} {'TITLE'}")
print("-" * 80)
for summary in summaries:
date_str = format_date(summary["created_at"])
doc_id_short = summary['doc_id'][:8] + "..."
title_truncated = summary['title'][:45] + ("..." if len(summary['title']) > 45 else "")
print(f"{doc_id_short:<10} {date_str:<18} {title_truncated}")
print(f"\nTotal: {len(summaries)} documents")
def cmd_extract(args: argparse.Namespace) -> None:
"""Extract all summaries to markdown files."""
cache = load_granola_cache(GRANOLA_CACHE_PATH)
summaries = extract_summaries(cache)
if not summaries:
print("No documents with summaries found.")
return
output_dir = Path(args.output) if args.output else DEFAULT_OUTPUT_DIR
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Extracting {len(summaries)} summaries to {output_dir}\n")
# Write individual markdown files
for summary in summaries:
doc_id = summary["doc_id"]
title = summary["title"]
created_at = summary["created_at"]
summary_text = summary["summary"]
date_str = format_date(created_at)
md_content = f"""# {title}
**Date:** {date_str}
**Document ID:** {doc_id}
---
{summary_text}
"""
output_path = output_dir / f"{doc_id}.md"
with open(output_path, "w", encoding="utf-8") as f:
f.write(md_content)
print(f" Wrote: {output_path.name}")
# Write JSON index
index_path = output_dir / "index.json"
with open(index_path, "w", encoding="utf-8") as f:
json.dump(summaries, f, indent=2, ensure_ascii=False)
print(f"\n Wrote index: {index_path}")
print(f"\nTotal: {len(summaries)} summaries extracted to {output_dir}")
def main() -> None:
parser = argparse.ArgumentParser(
description="Extract AI-generated summaries from Granola meeting notes cache",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s list List all meetings with summaries
%(prog)s extract Extract to ~/granola-summaries/
%(prog)s extract -o ./summaries Extract to custom directory
Note: This script reads from Granola's local cache file. The AI-generated
summaries are in documentPanels, not in documents.notes_* (which contains
user-entered notes only).
"""
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# list command
list_parser = subparsers.add_parser("list", help="List all documents with summaries")
list_parser.set_defaults(func=cmd_list)
# extract command
extract_parser = subparsers.add_parser(
"extract", help="Extract all summaries to markdown files"
)
extract_parser.add_argument(
"-o", "--output",
help=f"Output directory (default: {DEFAULT_OUTPUT_DIR})"
)
extract_parser.set_defaults(func=cmd_extract)
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
args.func(args)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment