lizthegrey/SKILL.md

## parse_timeline.py
#!/usr/bin/env python3
"""Parse Bluesky timeline data from MCP tool results.

This script correctly extracts author handles from post opening tags
to avoid misattribution bugs from repost tags.
"""

import json
import re
import sys
import argparse

# Technology keywords for filtering
TECH_KEYWORDS = [
    # Observability/SRE
    'observability', 'honeycomb', 'opentelemetry', 'otel', 'monitoring', 'tracing',
    'metrics', 'sre', 'reliability', 'incident', 'postmortem', 'on-call',

    # Infrastructure
    'kubernetes', 'k8s', 'docker', 'aws', 'gcp', 'azure', 'cloud', 'infrastructure',
    'devops', 'distributed', 'microservices', 'performance', 'latency', 'scale',

    # Databases
    'database', 'postgres', 'postgresql', 'mysql', 'redis', 'kafka', 'mongodb',
    'cassandra', 'elasticsearch', 'sql', 'nosql',

    # Programming
    'python', 'rust', 'golang', 'go', 'typescript', 'javascript', 'java', 'c++',
    'programming', 'compiler', 'debugging', 'testing', 'ci/cd', 'deployment',

    # Security
    'security', 'encryption', 'pgp', 'gpg', 'cryptography', 'tls', 'ssl',
    'authentication', 'authorization', 'oauth', 'vulnerability',

    # AI/ML
    'ai', 'llm', 'claude', 'anthropic', 'openai', 'chatgpt', 'gpt',
    'machine learning', 'neural network', 'transformer', 'mcp', 'model context protocol',

    # Development
    'api', 'sdk', 'cli', 'git', 'github', 'gitlab', 'bitbucket', 'vcs'
]


def parse_timeline(filepath, filter_type='all', max_posts=50, min_likes=0):
    """Parse timeline JSON file and extract posts."""

    with open(filepath, 'r') as f:
        data = json.load(f)

    if not data or not isinstance(data, list) or 'text' not in data[0]:
        print("Error: Invalid timeline data format")
        return []

    text_content = data[0]['text']

    # CRITICAL: Extract author_handle from opening <post> tag, not after </content>
    # This avoids matching author_handle from <repost> tags that follow
    pattern = r'<post[^>]*author_handle="([^"]*)"[^>]*>.*?<content>\s*(.*?)\s*</content>.*?Engagement: (\d+) likes'

    matches = re.findall(pattern, text_content, re.DOTALL)

    posts = []
    for author, content, likes in matches:
        likes_num = int(likes)

        # Apply filters
        if min_likes > 0 and likes_num < min_likes:
            continue

        if filter_type == 'tech':
            content_lower = content.lower()
            if not any(keyword in content_lower for keyword in TECH_KEYWORDS):
                continue

        posts.append({
            'author': author,
            'content': content.strip(),
            'likes': likes_num
        })

    return posts[:max_posts]


def main():
    parser = argparse.ArgumentParser(
        description='Parse Bluesky timeline data from MCP tool results'
    )
    parser.add_argument('filepath', help='Path to timeline JSON file')
    parser.add_argument(
        '--filter',
        choices=['all', 'tech'],
        default='all',
        help='Filter posts by type (default: all)'
    )
    parser.add_argument(
        '--max',
        type=int,
        default=50,
        help='Maximum posts to display (default: 50)'
    )
    parser.add_argument(
        '--min-likes',
        type=int,
        default=0,
        help='Minimum likes to display (default: 0)'
    )

    args = parser.parse_args()

    posts = parse_timeline(args.filepath, args.filter, args.max, args.min_likes)

    print(f"=== PARSED POSTS ===")
    print(f"Filter: {args.filter}, Max: {args.max}, Min likes: {args.min_likes}")
    print(f"Found {len(posts)} matching posts\n")

    for i, post in enumerate(posts, 1):
        print(f"{i}. @{post['author']} ({post['likes']} likes):")
        print(f"   {post['content'][:300]}")
        if len(post['content']) > 300:
            print(f"   [...{len(post['content']) - 300} more characters]")
        print()


if __name__ == '__main__':
    main()

## repackage.sh
#!/bin/bash
# Repackage gist files into bluesky-mcp.skill format
set -e

echo "Creating directory structure..."
mkdir -p bluesky-mcp/scripts

echo "Moving files into correct locations..."
mv SKILL.md bluesky-mcp/
mv parse_timeline.py bluesky-mcp/scripts/

echo "Creating .skill file (ZIP archive)..."
zip -r bluesky-mcp.skill bluesky-mcp/

echo "Done! Created bluesky-mcp.skill"
echo ""
echo "To use: import bluesky-mcp.skill into Claude Code"

## SKILL.md

      
    Raw
  

              SKILL.md
            
          
  name
  description
  
  
  bluesky-mcp
  Parse and analyse Bluesky timeline posts and notifications using the Bluesky MCP server. Use when users want to summarise their Bluesky timeline, find technology-relevant posts, extract specific topics from timeline data, or analyse reply/quote notifications. Handles large timeline dumps efficiently and filters for work-relevant content.
  
  
Bluesky MCP Analysis

Efficient workflows for analysing Bluesky timeline data and notifications.
Getting Timeline Data

Use bluesky:get-timeline-posts to retrieve posts from your timeline. The tool supports two modes:
# Get last N posts
bluesky:get-timeline-posts(count=500, type="posts")

# Get posts from last N hours  
bluesky:get-timeline-posts(count=24, type="hours")
Important: Large timeline results are stored in /mnt/user-data/tool_results/ as JSON files. The tool will tell you the filename.
Getting Notifications

Use bluesky:get-notifications to retrieve reply and quote notifications:
bluesky:get-notifications(limit=500, reasons=["reply", "quote"])
Parsing Timeline Data

When timeline data is stored in a tool results file, use the parsing script to extract relevant content:
python3 scripts/parse_timeline.py /path/to/tool_result.json --filter tech --max 50
Options:

--filter tech: Filter for technology-related posts (observability, programming, AI, infrastructure, etc)
--filter all: Show all posts (no filtering)
--max N: Limit output to N posts (default 50)
--min-likes N: Only show posts with at least N likes (default 0)

The script correctly extracts author handles from the opening post tag to avoid the common bug of matching author handles from subsequent repost tags.
Common Workflows

Summarise recent timeline

# 1. Get last 8 hours of timeline
bluesky:get-timeline-posts(count=8, type="hours")

# 2. Parse for tech posts
python3 scripts/parse_timeline.py /mnt/user-data/tool_results/bluesky_*.json --filter tech --max 30
Find high-engagement tech posts worth commenting on

python3 scripts/parse_timeline.py /path/to/result.json --filter tech --min-likes 50
Check notification threads

# 1. Get recent notifications
bluesky:get-notifications(limit=100, reasons=["reply", "quote"])

# 2. Follow up on specific threads using bluesky:get-post-thread with URIs from notifications
Critical Parsing Bug to Avoid

When parsing the XML structure returned by timeline tools, always extract author_handle from the opening <post> tag, not from text after </content>.
Incorrect pattern (matches author from next repost tag):
r'<post[^>]*>.*?<content>(.*?)</content>.*?author_handle="([^"]*)"'
Correct pattern (extracts from post opening tag):
r'<post[^>]*author_handle="([^"]*)"[^>]*>.*?<content>(.*?)</content>'
The incorrect pattern matches the first author_handle after </content>, which may be from a <repost author_handle="..."> tag that follows, causing misattribution.
Technology Keywords Reference

The parsing script recognises these technology-related terms:
Observability/SRE: observability, honeycomb, opentelemetry, otel, monitoring, tracing, metrics, sre, reliability, incident, postmortem, on-call
Infrastructure: kubernetes, docker, aws, cloud, infrastructure, devops, distributed, microservices, performance, latency, scale
Databases: database, postgres, postgresql, mysql, redis, kafka, mongodb, sql
Programming: python, rust, golang, go, typescript, javascript, java, c++, programming, compiler, debugging, testing
Security: security, encryption, pgp, gpg, cryptography, tls, ssl, authentication
AI/ML: ai, llm, claude, anthropic, openai, chatgpt, machine learning, neural network, transformer, mcp, model context protocol
Development: api, sdk, cli, git, github, gitlab, cicd, deployment
Customise these keywords in the script for your specific filtering requirements.
	#!/usr/bin/env python3
	"""Parse Bluesky timeline data from MCP tool results.

	This script correctly extracts author handles from post opening tags
	to avoid misattribution bugs from repost tags.
	"""

	import json
	import re
	import sys
	import argparse

	# Technology keywords for filtering
	TECH_KEYWORDS = [
	# Observability/SRE
	'observability', 'honeycomb', 'opentelemetry', 'otel', 'monitoring', 'tracing',
	'metrics', 'sre', 'reliability', 'incident', 'postmortem', 'on-call',

	# Infrastructure
	'kubernetes', 'k8s', 'docker', 'aws', 'gcp', 'azure', 'cloud', 'infrastructure',
	'devops', 'distributed', 'microservices', 'performance', 'latency', 'scale',

	# Databases
	'database', 'postgres', 'postgresql', 'mysql', 'redis', 'kafka', 'mongodb',
	'cassandra', 'elasticsearch', 'sql', 'nosql',

	# Programming
	'python', 'rust', 'golang', 'go', 'typescript', 'javascript', 'java', 'c++',
	'programming', 'compiler', 'debugging', 'testing', 'ci/cd', 'deployment',

	# Security
	'security', 'encryption', 'pgp', 'gpg', 'cryptography', 'tls', 'ssl',
	'authentication', 'authorization', 'oauth', 'vulnerability',

	# AI/ML
	'ai', 'llm', 'claude', 'anthropic', 'openai', 'chatgpt', 'gpt',
	'machine learning', 'neural network', 'transformer', 'mcp', 'model context protocol',

	# Development
	'api', 'sdk', 'cli', 'git', 'github', 'gitlab', 'bitbucket', 'vcs'
	]


	def parse_timeline(filepath, filter_type='all', max_posts=50, min_likes=0):
	"""Parse timeline JSON file and extract posts."""

	with open(filepath, 'r') as f:
	data = json.load(f)

	if not data or not isinstance(data, list) or 'text' not in data[0]:
	print("Error: Invalid timeline data format")
	return []

	text_content = data[0]['text']

	# CRITICAL: Extract author_handle from opening <post> tag, not after </content>
	# This avoids matching author_handle from <repost> tags that follow
	pattern = r'<post[^>]author_handle="([^"])"[^>]>.?<content>\s(.?)\s</content>.?Engagement: (\d+) likes'

	matches = re.findall(pattern, text_content, re.DOTALL)

	posts = []
	for author, content, likes in matches:
	likes_num = int(likes)

	# Apply filters
	if min_likes > 0 and likes_num < min_likes:
	continue

	if filter_type == 'tech':
	content_lower = content.lower()
	if not any(keyword in content_lower for keyword in TECH_KEYWORDS):
	continue

	posts.append({
	'author': author,
	'content': content.strip(),
	'likes': likes_num
	})

	return posts[:max_posts]


	def main():
	parser = argparse.ArgumentParser(
	description='Parse Bluesky timeline data from MCP tool results'
	)
	parser.add_argument('filepath', help='Path to timeline JSON file')
	parser.add_argument(
	'--filter',
	choices=['all', 'tech'],
	default='all',
	help='Filter posts by type (default: all)'
	)
	parser.add_argument(
	'--max',
	type=int,
	default=50,
	help='Maximum posts to display (default: 50)'
	)
	parser.add_argument(
	'--min-likes',
	type=int,
	default=0,
	help='Minimum likes to display (default: 0)'
	)

	args = parser.parse_args()

	posts = parse_timeline(args.filepath, args.filter, args.max, args.min_likes)

	print(f"=== PARSED POSTS ===")
	print(f"Filter: {args.filter}, Max: {args.max}, Min likes: {args.min_likes}")
	print(f"Found {len(posts)} matching posts\n")

	for i, post in enumerate(posts, 1):
	print(f"{i}. @{post['author']} ({post['likes']} likes):")
	print(f" {post['content'][:300]}")
	if len(post['content']) > 300:
	print(f" [...{len(post['content']) - 300} more characters]")
	print()


	if __name__ == '__main__':
	main()
	#!/bin/bash
	# Repackage gist files into bluesky-mcp.skill format
	set -e

	echo "Creating directory structure..."
	mkdir -p bluesky-mcp/scripts

	echo "Moving files into correct locations..."
	mv SKILL.md bluesky-mcp/
	mv parse_timeline.py bluesky-mcp/scripts/

	echo "Creating .skill file (ZIP archive)..."
	zip -r bluesky-mcp.skill bluesky-mcp/

	echo "Done! Created bluesky-mcp.skill"
	echo ""
	echo "To use: import bluesky-mcp.skill into Claude Code"